summaryrefslogtreecommitdiffstats
Unidiff
-rw-r--r--src/aggregator.c2
-rw-r--r--src/assign_protein_type.c7
-rw-r--r--src/assign_protein_type.h3
-rw-r--r--src/check_error.c2
-rw-r--r--src/check_error.h3
-rw-r--r--src/check_h5_error.c2
-rw-r--r--src/check_h5_error.h2
-rw-r--r--src/check_ncbi_error.c3
-rw-r--r--src/check_ncbi_error.h2
-rw-r--r--src/load_influenza_aa_dat.c259
-rw-r--r--src/load_influenza_aa_dat.h3
-rw-r--r--src/load_influenza_faa.c2
-rw-r--r--src/load_influenza_faa.h3
13 files changed, 145 insertions, 148 deletions
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index 91ef415..9ee3c46 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -46,60 +46,63 @@ load_influenza_aa_dat (hid_t file_id)
46 * memory."46 * memory."
47 */47 */
48 size_t dst_size = sizeof (supplementary_data);48 size_t dst_size = sizeof (supplementary_data);
49 size_t dst_offset[NFIELDS] = { HOFFSET ( supplementary_data, genbank_accession_number ),49 size_t dst_offset[NFIELDS] =
50 HOFFSET ( supplementary_data, host ),50 { HOFFSET (supplementary_data, genbank_accession_number),
51 HOFFSET ( supplementary_data, genome_segment_number ),51 HOFFSET (supplementary_data, host),
52 HOFFSET ( supplementary_data, subtype ),52 HOFFSET (supplementary_data, genome_segment_number),
53 HOFFSET ( supplementary_data, country ),53 HOFFSET (supplementary_data, subtype),
54 HOFFSET ( supplementary_data, year ),54 HOFFSET (supplementary_data, country),
55 HOFFSET ( supplementary_data, sequence_length ),55 HOFFSET (supplementary_data, year),
56 HOFFSET ( supplementary_data, virus_name ),56 HOFFSET (supplementary_data, sequence_length),
57 HOFFSET ( supplementary_data, age ),57 HOFFSET (supplementary_data, virus_name),
58 HOFFSET ( supplementary_data, gender ),58 HOFFSET (supplementary_data, age),
59 HOFFSET ( supplementary_data, full_length_indicator )};59 HOFFSET (supplementary_data, gender),
60 HOFFSET (supplementary_data, full_length_indicator)
61 };
6062
61 supplementary_data dst_buf[1];63 supplementary_data dst_buf[1];
6264
63 size_t dst_sizes[NFIELDS] = { sizeof ( dst_buf[0].genbank_accession_number ),65 size_t dst_sizes[NFIELDS] = { sizeof (dst_buf[0].genbank_accession_number),
64 sizeof ( dst_buf[0].host ),66 sizeof (dst_buf[0].host),
65 sizeof ( dst_buf[0].genome_segment_number ),67 sizeof (dst_buf[0].genome_segment_number),
66 sizeof ( dst_buf[0].subtype ),68 sizeof (dst_buf[0].subtype),
67 sizeof ( dst_buf[0].country ),69 sizeof (dst_buf[0].country),
68 sizeof ( dst_buf[0].year ),70 sizeof (dst_buf[0].year),
69 sizeof ( dst_buf[0].sequence_length ),71 sizeof (dst_buf[0].sequence_length),
70 sizeof ( dst_buf[0].virus_name ),72 sizeof (dst_buf[0].virus_name),
71 sizeof ( dst_buf[0].age ),73 sizeof (dst_buf[0].age),
72 sizeof ( dst_buf[0].gender ),74 sizeof (dst_buf[0].gender),
73 sizeof ( dst_buf[0].full_length_indicator)};75 sizeof (dst_buf[0].full_length_indicator)
76 };
7477
75 /*78 /*
76 * Map the native types to HDF5 types for each field.79 * Map the native types to HDF5 types for each field.
77 */80 */
78 hid_t field_type[NFIELDS];81 hid_t field_type[NFIELDS];
7982
80 hid_t genbank_accession_number_type = H5Tcopy ( H5T_C_S1 );83 hid_t genbank_accession_number_type = H5Tcopy (H5T_C_S1);
81 H5Tset_size ( genbank_accession_number_type, 9 );84 H5Tset_size (genbank_accession_number_type, 9);
82 field_type[0] = genbank_accession_number_type;85 field_type[0] = genbank_accession_number_type;
8386
84 hid_t host_type = H5Tcopy ( H5T_C_S1 );87 hid_t host_type = H5Tcopy (H5T_C_S1);
85 H5Tset_size ( host_type, 15 );88 H5Tset_size (host_type, 15);
86 field_type[1] = host_type;89 field_type[1] = host_type;
87 90
88 field_type[2] = H5T_NATIVE_INT;91 field_type[2] = H5T_NATIVE_INT;
8992
90 hid_t subtype_type = H5Tcopy ( H5T_C_S1 );93 hid_t subtype_type = H5Tcopy (H5T_C_S1);
91 H5Tset_size (subtype_type, 7 );94 H5Tset_size (subtype_type, 7);
92 field_type[3] = subtype_type;95 field_type[3] = subtype_type;
9396
94 hid_t country_type = H5Tcopy ( H5T_C_S1 );97 hid_t country_type = H5Tcopy (H5T_C_S1);
95 H5Tset_size (country_type, 25 );98 H5Tset_size (country_type, 25);
96 field_type[4] = country_type;99 field_type[4] = country_type;
97100
98 field_type[5] = H5T_NATIVE_INT;101 field_type[5] = H5T_NATIVE_INT;
99102
100 field_type[6] = H5T_NATIVE_INT;103 field_type[6] = H5T_NATIVE_INT;
101104
102 hid_t virus_name_type = H5Tcopy ( H5T_C_S1 );105 hid_t virus_name_type = H5Tcopy (H5T_C_S1);
103 H5Tset_size (virus_name_type, 196);106 H5Tset_size (virus_name_type, 196);
104 field_type[7] = virus_name_type;107 field_type[7] = virus_name_type;
105108
@@ -118,18 +121,18 @@ load_influenza_aa_dat (hid_t file_id)
118 /*121 /*
119 * Labels used for the fields in the table.122 * Labels used for the fields in the table.
120 */123 */
121 const char *field_names[NFIELDS] =124 const char *field_names[NFIELDS] = { "GenBank accession number",
122 { "GenBank accession number",125 "Host",
123 "Host",126 "Genome segment number",
124 "Genome segment number",127 "Subtype",
125 "Subtype",128 "Country",
126 "Country",129 "Year",
127 "Year",130 "Sequence length",
128 "Sequence length",131 "Virus name",
129 "Virus name",132 "Age",
130 "Age",133 "Gender",
131 "Gender",134 "Full-length Indicator"
132 "Full-length Indicator" };135 };
133136
134 /*137 /*
135 * Table storage options.138 * Table storage options.
@@ -142,7 +145,7 @@ load_influenza_aa_dat (hid_t file_id)
142 * Insert the records.145 * Insert the records.
143 */146 */
144 supplementary_data p_data;147 supplementary_data p_data;
145 FILE* dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat",148 FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat",
146 "r");149 "r");
147 if (dat == NULL)150 if (dat == NULL)
148 check_error (__FILE__, __LINE__);151 check_error (__FILE__, __LINE__);
@@ -150,89 +153,89 @@ load_influenza_aa_dat (hid_t file_id)
150 size_t len = 0;153 size_t len = 0;
151 int current_line = 0;154 int current_line = 0;
152155
153 while (getline (&line, &len, dat) != -1) {156 while (getline (&line, &len, dat) != -1)
154157 {
155 current_line++;158
156 char *running = strdup (line);159 current_line++;
157 char *token;160 char *running = strdup (line);
158 161 char *token;
159 /*162
160 * Parse the line, handling the case of empty fields represented163 /*
161 * by sequential delimiters.164 * Parse the line, handling the case of empty fields represented
162 */165 * by sequential delimiters.
163 strncpy(p_data.genbank_accession_number, strsep (&running, "\t"),166 */
164 sizeof(p_data.genbank_accession_number));167 strncpy (p_data.genbank_accession_number, strsep (&running, "\t"),
165 168 sizeof (p_data.genbank_accession_number));
166 strncpy(p_data.host, strsep (&running, "\t"),169
167 sizeof(p_data.host));170 strncpy (p_data.host, strsep (&running, "\t"), sizeof (p_data.host));
168 171
169 token = strsep (&running, "\t");172 token = strsep (&running, "\t");
170 if (strcmp (token, "\0") == 0)173 if (strcmp (token, "\0") == 0)
171 p_data.genome_segment_number = 0;174 p_data.genome_segment_number = 0;
172 else175 else
173 p_data.genome_segment_number = atoi(token);176 p_data.genome_segment_number = atoi (token);
174 177
175 strncpy(p_data.subtype, strsep (&running, "\t"),178 strncpy (p_data.subtype, strsep (&running, "\t"),
176 sizeof(p_data.subtype));179 sizeof (p_data.subtype));
177 180
178 strncpy(p_data.country, strsep (&running, "\t"),181 strncpy (p_data.country, strsep (&running, "\t"),
179 sizeof(p_data.country));182 sizeof (p_data.country));
180 183
181 /*184 /*
182 * Convert the year field from text to numeric. Unknown and empty185 * Convert the year field from text to numeric. Unknown and empty
183 * values are assigned a numeric value of zero.186 * values are assigned a numeric value of zero.
184 */187 */
185 token = strsep (&running, "\t");188 token = strsep (&running, "\t");
186 if (strcmp (token, "\0") == 0)189 if (strcmp (token, "\0") == 0)
187 p_data.year = 0;190 p_data.year = 0;
188 else if (strcmp (token, "unknown") == 0)191 else if (strcmp (token, "unknown") == 0)
189 p_data.year = 0;192 p_data.year = 0;
190 else if (strcmp (token, "NON") == 0)193 else if (strcmp (token, "NON") == 0)
191 p_data.year = 0;194 p_data.year = 0;
192 else195 else
193 p_data.year = atoi(token);196 p_data.year = atoi (token);
194197
195 token = strsep (&running, "\t");198 token = strsep (&running, "\t");
196 if (strcmp (token, "\0") == 0)199 if (strcmp (token, "\0") == 0)
197 p_data.sequence_length = 0;200 p_data.sequence_length = 0;
198 else201 else
199 p_data.sequence_length = atoi(token);202 p_data.sequence_length = atoi (token);
200 203
201 strncpy(p_data.virus_name, strsep (&running, "\t"),204 strncpy (p_data.virus_name, strsep (&running, "\t"),
202 sizeof(p_data.virus_name));205 sizeof (p_data.virus_name));
203 206
204 strncpy(p_data.age, strsep (&running, "\t"),207 strncpy (p_data.age, strsep (&running, "\t"), sizeof (p_data.age));
205 sizeof(p_data.age));208
206 209 strncpy (p_data.gender, strsep (&running, "\t"),
207 strncpy(p_data.gender, strsep (&running, "\t"),210 sizeof (p_data.gender));
208 sizeof(p_data.gender));211
209 212 strncpy (p_data.full_length_indicator, strsep (&running, "\t"),
210 strncpy(p_data.full_length_indicator, strsep (&running, "\t"),213 sizeof (p_data.full_length_indicator));
211 sizeof(p_data.full_length_indicator));214
212215 if (current_line == 1)
213 if (current_line == 1) 216 {
214 {217 herr_t status = H5TBmake_table ("influenza_aa.dat", file_id,
215 herr_t status = H5TBmake_table ("influenza_aa.dat", file_id,218 TABLE_NAME, NFIELDS, 1, dst_size,
216 TABLE_NAME, NFIELDS, 1,dst_size,219 field_names, dst_offset, field_type,
217 field_names, dst_offset, field_type,220 chunk_size, fill_data, compress,
218 chunk_size, fill_data, compress,221 &p_data);
219 &p_data);222 if (status < 0)
220 if (status < 0)223 check_h5_error (status, __FILE__, __LINE__);
221 check_h5_error (status, __FILE__, __LINE__);224 }
222 }225 else
223 else226 {
224 {227 herr_t status =
225 herr_t status = H5TBappend_records (file_id, TABLE_NAME, 1, dst_size,228 H5TBappend_records (file_id, TABLE_NAME, 1, dst_size,
226 dst_offset, dst_sizes, &p_data);229 dst_offset, dst_sizes, &p_data);
227 if (status < 0)230 if (status < 0)
228 check_h5_error (status, __FILE__, __LINE__);231 check_h5_error (status, __FILE__, __LINE__);
229 }232 }
230233
231 if (running)234 if (running)
232 free (running);235 free (running);
233 236
234 }237 }
235 238
236 if (line)239 if (line)
237 free (line);240 free (line);
238241

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.