-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | src/Makefile.am | 12 | ||||
-rw-r--r-- | src/aggregator.c | 30 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 152 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.h | 13 |
5 files changed, 208 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..af437a6 --- a/dev/null +++ b/Makefile.am | |||
@@ -0,0 +1 @@ | |||
SUBDIRS = src | |||
diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..7cb4282 --- a/dev/null +++ b/src/Makefile.am | |||
@@ -0,0 +1,12 @@ | |||
1 | bin_PROGRAMS = aggregator | ||
2 | |||
3 | aggregator_SOURCES = \ | ||
4 | aggregator.c \ | ||
5 | load_influenza_aa_dat.c | ||
6 | |||
7 | aggregator_LDADD = -lhdf5 | ||
8 | |||
9 | noinst_HEADERS = \ | ||
10 | load_influenza_aa_dat.h | ||
11 | |||
12 | AM_CFLAGS = -Wall -std=gnu99 -ggdb | ||
diff --git a/src/aggregator.c b/src/aggregator.c new file mode 100644 index 0000000..ae5aa60 --- a/dev/null +++ b/src/aggregator.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Aggregate the collected influenza data into a single HDF5 | ||
3 | * container. | ||
4 | */ | ||
5 | |||
6 | #include <hdf5.h> | ||
7 | #include "load_influenza_aa_dat.h" | ||
8 | |||
9 | #define FILE "influenza.h5" | ||
10 | |||
11 | int | ||
12 | main() | ||
13 | { | ||
14 | /* | ||
15 | * Create the HDF5 file. | ||
16 | */ | ||
17 | hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); | ||
18 | |||
19 | /* | ||
20 | * Load the supplementary protein data file. | ||
21 | */ | ||
22 | load_influenza_aa_dat (file_id); | ||
23 | |||
24 | /* | ||
25 | * Close the HD5 file. | ||
26 | */ | ||
27 | herr_t status = H5Fclose (file_id); | ||
28 | |||
29 | return 0; | ||
30 | } | ||
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c new file mode 100644 index 0000000..72aacb5 --- a/dev/null +++ b/src/load_influenza_aa_dat.c | |||
@@ -0,0 +1,152 @@ | |||
1 | #include "load_influenza_aa_dat.h" | ||
2 | #include "hdf5_hl.h" | ||
3 | |||
4 | #define NFIELDS (hsize_t) 11 | ||
5 | //#define NRECORDS (hsize_t) 138052 | ||
6 | #define NRECORDS (hsize_t) 1 | ||
7 | #define TABLE_NAME "influenza_aa.dat" | ||
8 | |||
9 | void | ||
10 | load_influenza_aa_dat (hid_t file_id) | ||
11 | { | ||
12 | /* | ||
13 | * Model the data using native types. | ||
14 | */ | ||
15 | typedef struct supplementary_data | ||
16 | { | ||
17 | char genbank_accession_number[9]; | ||
18 | char host[15]; | ||
19 | int genome_segment_number; | ||
20 | char subtype[7]; | ||
21 | char country[25]; | ||
22 | int year; | ||
23 | int sequence_length; | ||
24 | char virus_name[196]; | ||
25 | char age[17]; | ||
26 | char gender[6]; | ||
27 | char full_length_indicator[4]; | ||
28 | } supplementary_data; | ||
29 | |||
30 | /* | ||
31 | * Use an HDF5 Table for storage. | ||
32 | * http://www.hdfgroup.org/HDF5/Tutor/h5table.html | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * "Calculate the size and the offsets of our struct members in | ||
37 | * memory." | ||
38 | */ | ||
39 | size_t dst_size = sizeof (supplementary_data); | ||
40 | size_t dst_offset[NFIELDS] = { HOFFSET ( supplementary_data, genbank_accession_number ), | ||
41 | HOFFSET ( supplementary_data, host ), | ||
42 | HOFFSET ( supplementary_data, genome_segment_number ), | ||
43 | HOFFSET ( supplementary_data, subtype ), | ||
44 | HOFFSET ( supplementary_data, country ), | ||
45 | HOFFSET ( supplementary_data, year ), | ||
46 | HOFFSET ( supplementary_data, sequence_length ), | ||
47 | HOFFSET ( supplementary_data, virus_name ), | ||
48 | HOFFSET ( supplementary_data, age ), | ||
49 | HOFFSET ( supplementary_data, gender ), | ||
50 | HOFFSET ( supplementary_data, full_length_indicator )}; | ||
51 | |||
52 | /* | ||
53 | |||
54 | Only needed for reading? | ||
55 | |||
56 | supplementary_data dst_buf[NRECORDS]; | ||
57 | |||
58 | size_t dst_sizes[NFIELDS] = { sizeof ( dst_buf[0].genbank_accession_number ), | ||
59 | sizeof ( dst_buf[0].host ), | ||
60 | sizeof ( dst_buf[0].genome_segment_number ), | ||
61 | sizeof ( dst_buf[0].subtype ), | ||
62 | sizeof ( dst_buf[0].country ), | ||
63 | sizeof ( dst_buf[0].year ), | ||
64 | sizeof ( dst_buf[0].sequence_length ), | ||
65 | sizeof ( dst_buf[0].virus_name ), | ||
66 | sizeof ( dst_buf[0].age ), | ||
67 | sizeof ( dst_buf[0].gender ), | ||
68 | sizeof ( dst_buf[0].full_length_indicator)}; | ||
69 | */ | ||
70 | |||
71 | /* | ||
72 | * "Define field information." | ||
73 | */ | ||
74 | const char *field_names[NFIELDS] = | ||
75 | { "GenBank accession number", | ||
76 | "Host", | ||
77 | "Genome segment number", | ||
78 | "Subtype", | ||
79 | "Country", | ||
80 | "Year", | ||
81 | "Sequence length", | ||
82 | "Virus name", | ||
83 | "Age", | ||
84 | "Gender", | ||
85 | "Full-length Indicator" }; | ||
86 | hsize_t chunk_size = 10; | ||
87 | int *fill_data = NULL; | ||
88 | int compress = 0; | ||
89 | |||
90 | /* | ||
91 | * "Initialize field type." | ||
92 | */ | ||
93 | hid_t field_type[NFIELDS]; | ||
94 | |||
95 | hid_t genbank_accession_number_type = H5Tcopy ( H5T_C_S1 ); | ||
96 | H5Tset_size ( genbank_accession_number_type, 9 ); | ||
97 | field_type[0] = genbank_accession_number_type; | ||
98 | |||
99 | hid_t host_type = H5Tcopy ( H5T_C_S1 ); | ||
100 | H5Tset_size ( host_type, 15 ); | ||
101 | field_type[1] = host_type; | ||
102 | |||
103 | field_type[2] = H5T_NATIVE_INT; | ||
104 | |||
105 | hid_t subtype_type = H5Tcopy ( H5T_C_S1 ); | ||
106 | H5Tset_size (subtype_type, 7 ); | ||
107 | field_type[3] = subtype_type; | ||
108 | |||
109 | hid_t country_type = H5Tcopy ( H5T_C_S1 ); | ||
110 | H5Tset_size (country_type, 25 ); | ||
111 | field_type[4] = country_type; | ||
112 | |||
113 | field_type[5] = H5T_NATIVE_INT; | ||
114 | |||
115 | field_type[6] = H5T_NATIVE_INT; | ||
116 | |||
117 | hid_t virus_name_type = H5Tcopy ( H5T_C_S1 ); | ||
118 | H5Tset_size (virus_name_type, 196); | ||
119 | field_type[7] = virus_name_type; | ||
120 | |||
121 | hid_t age_type = H5Tcopy (H5T_C_S1); | ||
122 | H5Tset_size (age_type, 17); | ||
123 | field_type[8] = age_type; | ||
124 | |||
125 | hid_t gender_type = H5Tcopy (H5T_C_S1); | ||
126 | H5Tset_size (gender_type, 6); | ||
127 | field_type[9] = gender_type; | ||
128 | |||
129 | hid_t full_length_indicator_type = H5Tcopy (H5T_C_S1); | ||
130 | H5Tset_size (full_length_indicator_type, 4); | ||
131 | field_type[10] = full_length_indicator_type; | ||
132 | |||
133 | supplementary_data p_data[NRECORDS] = { | ||
134 | {"BAC53999", "Human", 7, "", "Zambia", 1999, 109, "Influenza B virus (B/Lusaka/270/99)", | ||
135 | "", "", "yes"} | ||
136 | }; | ||
137 | |||
138 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, TABLE_NAME,NFIELDS,NRECORDS, | ||
139 | dst_size,field_names, dst_offset, field_type, | ||
140 | chunk_size, fill_data, compress, p_data); | ||
141 | |||
142 | H5Tclose (genbank_accession_number_type); | ||
143 | H5Tclose (host_type); | ||
144 | H5Tclose (subtype_type); | ||
145 | H5Tclose (country_type); | ||
146 | H5Tclose (virus_name_type); | ||
147 | H5Tclose (age_type); | ||
148 | H5Tclose (gender_type); | ||
149 | H5Tclose (full_length_indicator_type); | ||
150 | |||
151 | return; | ||
152 | } | ||
diff --git a/src/load_influenza_aa_dat.h b/src/load_influenza_aa_dat.h new file mode 100644 index 0000000..c431e67 --- a/dev/null +++ b/src/load_influenza_aa_dat.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef LOAD_INFLUENZA_AA_DAT_H | ||
2 | #define LOAD_INFLUENZA_AA_DAT_H | ||
3 | |||
4 | #include <hdf5.h> | ||
5 | |||
6 | /* | ||
7 | * Load the supplementary protein data from the NCBI influenza_aa.dat | ||
8 | * file. | ||
9 | */ | ||
10 | void | ||
11 | load_influenza_aa_dat (hid_t file_id); | ||
12 | |||
13 | #endif // LOAD_INFLUENZA_AA_DAT_H | ||