-rw-r--r-- | src/aggregator.c | 3 | ||||
-rw-r--r-- | src/assign/assign_protein_type.c | 110 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.c | 13 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.h | 2 | ||||
-rw-r--r-- | src/load/load_influenza_faa.c | 30 | ||||
-rw-r--r-- | src/load/load_influenza_faa.h | 2 | ||||
-rw-r--r-- | src/model/gi_type_data_init.c | 4 | ||||
-rw-r--r-- | src/model/gi_type_data_init.h | 4 | ||||
-rw-r--r-- | src/model/sequence_data_init.c | 18 | ||||
-rw-r--r-- | src/model/sequence_data_init.h | 6 |
10 files changed, 99 insertions, 93 deletions
diff --git a/src/aggregator.c b/src/aggregator.c index c9a03b5..4c4ca8e 100644 --- a/src/aggregator.c +++ b/src/aggregator.c | |||
@@ -45,8 +45,7 @@ main () | |||
45 | /* | 45 | /* |
46 | * Load the FASTA protein sequence data file. | 46 | * Load the FASTA protein sequence data file. |
47 | */ | 47 | */ |
48 | printf ("Loading \"influenza.faa\" with contents of %s.\n", | 48 | printf ("Loading \"influenza.faa\" with contents of %s.\n", INFLUENZA_FAA); |
49 | INFLUENZA_FAA); | ||
50 | load_influenza_faa (file_id, INFLUENZA_FAA); | 49 | load_influenza_faa (file_id, INFLUENZA_FAA); |
51 | 50 | ||
52 | /* | 51 | /* |
diff --git a/src/assign/assign_protein_type.c b/src/assign/assign_protein_type.c index 9a0717b..1df4c8d 100644 --- a/src/assign/assign_protein_type.c +++ b/src/assign/assign_protein_type.c | |||
@@ -70,7 +70,7 @@ assign_protein_type (hid_t file_id) | |||
70 | if (status < 0) | 70 | if (status < 0) |
71 | check_h5_error (status, __FILE__, __LINE__); | 71 | check_h5_error (status, __FILE__, __LINE__); |
72 | 72 | ||
73 | sequence_data* faa_buf = malloc (sizeof(sequence_data) * faa_nrecords); | 73 | sequence_data *faa_buf = malloc (sizeof (sequence_data) * faa_nrecords); |
74 | 74 | ||
75 | size_t faa_size; | 75 | size_t faa_size; |
76 | size_t faa_offset[SEQUENCE_DATA_FIELD_NUM]; | 76 | size_t faa_offset[SEQUENCE_DATA_FIELD_NUM]; |
@@ -86,7 +86,7 @@ assign_protein_type (hid_t file_id) | |||
86 | /* | 86 | /* |
87 | * Allocate memory for the new table. | 87 | * Allocate memory for the new table. |
88 | */ | 88 | */ |
89 | gi_type_data* new_buf = malloc (sizeof (gi_type_data) * faa_nrecords); | 89 | gi_type_data *new_buf = malloc (sizeof (gi_type_data) * faa_nrecords); |
90 | if (new_buf == NULL) | 90 | if (new_buf == NULL) |
91 | check_error (__FILE__, __LINE__); | 91 | check_error (__FILE__, __LINE__); |
92 | 92 | ||
@@ -101,7 +101,7 @@ assign_protein_type (hid_t file_id) | |||
101 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; | 101 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; |
102 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); | 102 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); |
103 | 103 | ||
104 | gi_type_data* old_buf = NULL; | 104 | gi_type_data *old_buf = NULL; |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * If the table is already present read the values into memory and | 107 | * If the table is already present read the values into memory and |
@@ -117,10 +117,11 @@ assign_protein_type (hid_t file_id) | |||
117 | if (status < 0) | 117 | if (status < 0) |
118 | check_h5_error (status, __FILE__, __LINE__); | 118 | check_h5_error (status, __FILE__, __LINE__); |
119 | 119 | ||
120 | printf (" Using gi_type_data cache of %i records.\n", (int)gi_nrecords); | 120 | printf (" Using gi_type_data cache of %i records.\n", |
121 | 121 | (int) gi_nrecords); | |
122 | old_buf = malloc (sizeof(gi_type_data) * gi_nrecords); | 122 | |
123 | 123 | old_buf = malloc (sizeof (gi_type_data) * gi_nrecords); | |
124 | |||
124 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, | 125 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, |
125 | gi_sizes, old_buf); | 126 | gi_sizes, old_buf); |
126 | if (status < 0) | 127 | if (status < 0) |
@@ -129,18 +130,18 @@ assign_protein_type (hid_t file_id) | |||
129 | status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords); | 130 | status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords); |
130 | if (status < 0) | 131 | if (status < 0) |
131 | check_h5_error (status, __FILE__, __LINE__); | 132 | check_h5_error (status, __FILE__, __LINE__); |
132 | 133 | ||
133 | } | 134 | } |
134 | 135 | ||
135 | /* | 136 | /* |
136 | * If the table is not already present create it. | 137 | * If the table is not already present create it. |
137 | */ | 138 | */ |
138 | else | 139 | else |
139 | { | 140 | { |
140 | 141 | ||
141 | printf ("Creating gi_type_data.\n"); | 142 | printf ("Creating gi_type_data.\n"); |
142 | 143 | ||
143 | const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = | 144 | const char *gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = |
144 | GI_TYPE_DATA_FIELD_NAMES; | 145 | GI_TYPE_DATA_FIELD_NAMES; |
145 | 146 | ||
146 | hsize_t chunk_size = 10; | 147 | hsize_t chunk_size = 10; |
@@ -152,8 +153,7 @@ assign_protein_type (hid_t file_id) | |||
152 | GI_TYPE_DATA_FIELD_NUM, 0, | 153 | GI_TYPE_DATA_FIELD_NUM, 0, |
153 | gi_size, gi_type_data_field_names, | 154 | gi_size, gi_type_data_field_names, |
154 | gi_offset, gi_field_type, | 155 | gi_offset, gi_field_type, |
155 | chunk_size, fill_data, compress, | 156 | chunk_size, fill_data, compress, NULL); |
156 | NULL); | ||
157 | if (status < 0) | 157 | if (status < 0) |
158 | check_h5_error (status, __FILE__, __LINE__); | 158 | check_h5_error (status, __FILE__, __LINE__); |
159 | 159 | ||
@@ -169,7 +169,7 @@ assign_protein_type (hid_t file_id) | |||
169 | "Allocation of cache failed."); | 169 | "Allocation of cache failed."); |
170 | ENTRY e, *ep; | 170 | ENTRY e, *ep; |
171 | 171 | ||
172 | for (int i = 0; i < (int)gi_nrecords; i++) | 172 | for (int i = 0; i < (int) gi_nrecords; i++) |
173 | { | 173 | { |
174 | char gi_chr[25]; | 174 | char gi_chr[25]; |
175 | snprintf (gi_chr, 25, "%i", old_buf[i].gi); | 175 | snprintf (gi_chr, 25, "%i", old_buf[i].gi); |
@@ -183,14 +183,14 @@ assign_protein_type (hid_t file_id) | |||
183 | /* | 183 | /* |
184 | * Assign protein types to records for which the field is empty. | 184 | * Assign protein types to records for which the field is empty. |
185 | */ | 185 | */ |
186 | printf ("Records to process: %i\n", (int)faa_nrecords); | 186 | printf ("Records to process: %i\n", (int) faa_nrecords); |
187 | int written = 0; | 187 | int written = 0; |
188 | for (int i = 0; i < (int)faa_nrecords; i++) | 188 | for (int i = 0; i < (int) faa_nrecords; i++) |
189 | { | 189 | { |
190 | new_buf[i].gi = faa_buf[i].gi; | 190 | new_buf[i].gi = faa_buf[i].gi; |
191 | strncpy (new_buf[i].type, "", sizeof (new_buf[i].type)); | 191 | strncpy (new_buf[i].type, "", sizeof (new_buf[i].type)); |
192 | strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein)); | 192 | strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein)); |
193 | 193 | ||
194 | char gi_chr[25]; | 194 | char gi_chr[25]; |
195 | snprintf (gi_chr, 25, "%i", faa_buf[i].gi); | 195 | snprintf (gi_chr, 25, "%i", faa_buf[i].gi); |
196 | e.key = gi_chr; | 196 | e.key = gi_chr; |
@@ -199,24 +199,23 @@ assign_protein_type (hid_t file_id) | |||
199 | /* | 199 | /* |
200 | * A record was not found in the cache for this gi. | 200 | * A record was not found in the cache for this gi. |
201 | */ | 201 | */ |
202 | if (hsearch_r (e, FIND, &ep, &htab) == 0) | 202 | if (hsearch_r (e, FIND, &ep, &htab) == 0) |
203 | { | 203 | { |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * Read the sequence from the database by GI. | 206 | * Read the sequence from the database by GI. |
207 | */ | 207 | */ |
208 | Int4 sequence_number = readdb_gi2seq (seqdb, faa_buf[i].gi, NULL); | 208 | Int4 sequence_number = readdb_gi2seq (seqdb, faa_buf[i].gi, NULL); |
209 | BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); | 209 | BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); |
210 | if (bsp == NULL) | 210 | if (bsp == NULL) |
211 | { | 211 | { |
212 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, | 212 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, |
213 | "Unable to find BLAST record for gi|%i. Ensure " | 213 | "Unable to find BLAST record for gi|%i. Ensure " |
214 | "the BLAST database is up-to-date with the HDF5 " | 214 | "the BLAST database is up-to-date with the HDF5 " |
215 | "record set. See the BLAST formatdb.log file " | 215 | "record set. See the BLAST formatdb.log file " |
216 | "for details.\n", | 216 | "for details.\n", faa_buf[i].gi); |
217 | faa_buf[i].gi); | ||
218 | } | 217 | } |
219 | 218 | ||
220 | SeqAlignPtr seqalign = BioseqBlastEngine (bsp, | 219 | SeqAlignPtr seqalign = BioseqBlastEngine (bsp, |
221 | "blastp", | 220 | "blastp", |
222 | REFDB, | 221 | REFDB, |
@@ -224,19 +223,19 @@ assign_protein_type (hid_t file_id) | |||
224 | NULL, | 223 | NULL, |
225 | &error_returns, | 224 | &error_returns, |
226 | NULL); | 225 | NULL); |
227 | 226 | ||
228 | /* | 227 | /* |
229 | * BLAST reported an error. Write it out and continue processing. | 228 | * BLAST reported an error. Write it out and continue processing. |
230 | */ | 229 | */ |
231 | if (error_returns != NULL) | 230 | if (error_returns != NULL) |
232 | { | 231 | { |
233 | CharPtr msg = BlastErrorToString (error_returns); | 232 | CharPtr msg = BlastErrorToString (error_returns); |
234 | printf ("Warning: An error has been reported by the NCBI Toolkit " | 233 | printf |
235 | "API for sequence gi|%i: %s", | 234 | ("Warning: An error has been reported by the NCBI Toolkit " |
236 | faa_buf[i].gi, msg); | 235 | "API for sequence gi|%i: %s", faa_buf[i].gi, msg); |
237 | free (msg); | 236 | free (msg); |
238 | } | 237 | } |
239 | 238 | ||
240 | /* | 239 | /* |
241 | * A hit was found. Record the first hit as the protein type. | 240 | * A hit was found. Record the first hit as the protein type. |
242 | * Skip the first 4 characters and eat the "lcl|". | 241 | * Skip the first 4 characters and eat the "lcl|". |
@@ -245,18 +244,18 @@ assign_protein_type (hid_t file_id) | |||
245 | { | 244 | { |
246 | Char target_id_buf[BUFFER_LEN + 1]; | 245 | Char target_id_buf[BUFFER_LEN + 1]; |
247 | SeqIdPtr target_id = SeqAlignId (seqalign, 1); | 246 | SeqIdPtr target_id = SeqAlignId (seqalign, 1); |
248 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, | 247 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, |
249 | BUFFER_LEN); | 248 | BUFFER_LEN); |
250 | 249 | ||
251 | // Species Type | 250 | // Species Type |
252 | new_buf[i].type[0] = target_id_buf[4]; | 251 | new_buf[i].type[0] = target_id_buf[4]; |
253 | new_buf[i].type[1] = '\0'; | 252 | new_buf[i].type[1] = '\0'; |
254 | 253 | ||
255 | // Protein Type (Skip the underscore in the string). | 254 | // Protein Type (Skip the underscore in the string). |
256 | strncpy (new_buf[i].protein, &target_id_buf[6], | 255 | strncpy (new_buf[i].protein, &target_id_buf[6], |
257 | sizeof (new_buf[i].protein)); | 256 | sizeof (new_buf[i].protein)); |
258 | } | 257 | } |
259 | 258 | ||
260 | /* | 259 | /* |
261 | * BLAST did not find any hits. | 260 | * BLAST did not find any hits. |
262 | */ | 261 | */ |
@@ -265,73 +264,74 @@ assign_protein_type (hid_t file_id) | |||
265 | printf ("Warning: Unable to identify protein type for sequence " | 264 | printf ("Warning: Unable to identify protein type for sequence " |
266 | "gi|%i\n", faa_buf[i].gi); | 265 | "gi|%i\n", faa_buf[i].gi); |
267 | } | 266 | } |
268 | 267 | ||
269 | /* | 268 | /* |
270 | * Clean up memory for the next ieration. | 269 | * Clean up memory for the next ieration. |
271 | */ | 270 | */ |
272 | seqalign = SeqAlignSetFree (seqalign); | 271 | seqalign = SeqAlignSetFree (seqalign); |
273 | bsp = BioseqFree (bsp); | 272 | bsp = BioseqFree (bsp); |
274 | 273 | ||
275 | } // End existing entry not found. | 274 | } // End existing entry not found. |
276 | 275 | ||
277 | /* | 276 | /* |
278 | * Hash table entry found. Keep the old value. | 277 | * Hash table entry found. Keep the old value. |
279 | */ | 278 | */ |
280 | else | 279 | else |
281 | { | 280 | { |
282 | gi_type_data* old_value = (gi_type_data*)ep->data; | 281 | gi_type_data *old_value = (gi_type_data *) ep->data; |
283 | new_buf[i].gi = old_value->gi; | 282 | new_buf[i].gi = old_value->gi; |
284 | strncpy (new_buf[i].type, old_value->type, sizeof (new_buf[i].type)); | 283 | strncpy (new_buf[i].type, old_value->type, |
285 | strncpy (new_buf[i].protein, old_value->protein, sizeof (new_buf[i].protein)); | 284 | sizeof (new_buf[i].type)); |
285 | strncpy (new_buf[i].protein, old_value->protein, | ||
286 | sizeof (new_buf[i].protein)); | ||
286 | } | 287 | } |
287 | 288 | ||
288 | /* | 289 | /* |
289 | * Write the data out to the file. | 290 | * Write the data out to the file. |
290 | */ | 291 | */ |
291 | if ( (i % 1000 == 0) && (i > 0) ) | 292 | if ((i % 1000 == 0) && (i > 0)) |
292 | { | 293 | { |
293 | status = H5TBappend_records (file_id, "gi_type_data", 1000, | 294 | status = H5TBappend_records (file_id, "gi_type_data", 1000, |
294 | gi_size, gi_offset, gi_sizes, | 295 | gi_size, gi_offset, gi_sizes, |
295 | &new_buf[i-1000]); | 296 | &new_buf[i - 1000]); |
296 | if (status < 0) | 297 | if (status < 0) |
297 | check_h5_error (status, __FILE__, __LINE__); | 298 | check_h5_error (status, __FILE__, __LINE__); |
298 | 299 | ||
299 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | 300 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); |
300 | if (status < 0) | 301 | if (status < 0) |
301 | check_h5_error (status, __FILE__, __LINE__); | 302 | check_h5_error (status, __FILE__, __LINE__); |
302 | 303 | ||
303 | written = i; | 304 | written = i; |
304 | 305 | ||
305 | printf ("Processed %i of %i records.\n", i, (int)faa_nrecords); | 306 | printf ("Processed %i of %i records.\n", i, (int) faa_nrecords); |
306 | } | 307 | } |
307 | 308 | ||
308 | } | 309 | } |
309 | 310 | ||
310 | /* | 311 | /* |
311 | * Write out records from the last bin if it was less than 1000 | 312 | * Write out records from the last bin if it was less than 1000 |
312 | * records in size. | 313 | * records in size. |
313 | */ | 314 | */ |
314 | if ((int)faa_nrecords < 1000) | 315 | if ((int) faa_nrecords < 1000) |
315 | { | 316 | { |
316 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords, | 317 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords, |
317 | gi_size, gi_offset, gi_sizes, | 318 | gi_size, gi_offset, gi_sizes, new_buf); |
318 | new_buf); | ||
319 | } | 319 | } |
320 | 320 | ||
321 | else | 321 | else |
322 | { | 322 | { |
323 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written, | 323 | status = |
324 | gi_size, gi_offset, gi_sizes, | 324 | H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written, |
325 | &new_buf[written]); | 325 | gi_size, gi_offset, gi_sizes, &new_buf[written]); |
326 | } | 326 | } |
327 | 327 | ||
328 | if (status < 0) | 328 | if (status < 0) |
329 | check_h5_error (status, __FILE__, __LINE__); | 329 | check_h5_error (status, __FILE__, __LINE__); |
330 | 330 | ||
331 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | 331 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); |
332 | if (status < 0) | 332 | if (status < 0) |
333 | check_h5_error (status, __FILE__, __LINE__); | 333 | check_h5_error (status, __FILE__, __LINE__); |
334 | 334 | ||
335 | free (faa_buf); | 335 | free (faa_buf); |
336 | free (old_buf); | 336 | free (old_buf); |
337 | free (new_buf); | 337 | free (new_buf); |
@@ -339,6 +339,6 @@ assign_protein_type (hid_t file_id) | |||
339 | 339 | ||
340 | options = BLASTOptionDelete (options); | 340 | options = BLASTOptionDelete (options); |
341 | readdb_destruct (seqdb); | 341 | readdb_destruct (seqdb); |
342 | 342 | ||
343 | return; | 343 | return; |
344 | } | 344 | } |
diff --git a/src/load/load_influenza_aa_dat.c b/src/load/load_influenza_aa_dat.c index 3826349..b50fcab 100644 --- a/src/load/load_influenza_aa_dat.c +++ b/src/load/load_influenza_aa_dat.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #define NFIELDS (hsize_t) 11 | 15 | #define NFIELDS (hsize_t) 11 |
16 | 16 | ||
17 | void | 17 | void |
18 | load_influenza_aa_dat (hid_t file_id, const char* file_name) | 18 | load_influenza_aa_dat (hid_t file_id, const char *file_name) |
19 | { | 19 | { |
20 | /* | 20 | /* |
21 | * Model the data using native types. | 21 | * Model the data using native types. |
@@ -225,7 +225,8 @@ load_influenza_aa_dat (hid_t file_id, const char* file_name) | |||
225 | if (status < 0) | 225 | if (status < 0) |
226 | check_h5_error (status, __FILE__, __LINE__); | 226 | check_h5_error (status, __FILE__, __LINE__); |
227 | 227 | ||
228 | status = H5TBdelete_record (file_id, "influenza_aa.dat", 0, nrecords); | 228 | status = |
229 | H5TBdelete_record (file_id, "influenza_aa.dat", 0, nrecords); | ||
229 | if (status < 0) | 230 | if (status < 0) |
230 | check_h5_error (status, __FILE__, __LINE__); | 231 | check_h5_error (status, __FILE__, __LINE__); |
231 | 232 | ||
@@ -233,7 +234,7 @@ load_influenza_aa_dat (hid_t file_id, const char* file_name) | |||
233 | H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size, | 234 | H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size, |
234 | dst_offset, dst_sizes, &p_data); | 235 | dst_offset, dst_sizes, &p_data); |
235 | if (status < 0) | 236 | if (status < 0) |
236 | check_h5_error (status, __FILE__, __LINE__); | 237 | check_h5_error (status, __FILE__, __LINE__); |
237 | } | 238 | } |
238 | 239 | ||
239 | /* | 240 | /* |
@@ -242,8 +243,10 @@ load_influenza_aa_dat (hid_t file_id, const char* file_name) | |||
242 | else | 243 | else |
243 | { | 244 | { |
244 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, | 245 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, |
245 | "influenza_aa.dat", NFIELDS, 1, dst_size, | 246 | "influenza_aa.dat", NFIELDS, 1, |
246 | field_names, dst_offset, field_type, | 247 | dst_size, |
248 | field_names, dst_offset, | ||
249 | field_type, | ||
247 | chunk_size, fill_data, compress, | 250 | chunk_size, fill_data, compress, |
248 | &p_data); | 251 | &p_data); |
249 | if (status < 0) | 252 | if (status < 0) |
diff --git a/src/load/load_influenza_aa_dat.h b/src/load/load_influenza_aa_dat.h index 97e36f8..c4ce152 100644 --- a/src/load/load_influenza_aa_dat.h +++ b/src/load/load_influenza_aa_dat.h | |||
@@ -7,6 +7,6 @@ | |||
7 | * Load the supplementary protein data from the NCBI influenza_aa.dat | 7 | * Load the supplementary protein data from the NCBI influenza_aa.dat |
8 | * file. | 8 | * file. |
9 | */ | 9 | */ |
10 | void load_influenza_aa_dat (hid_t file_id, const char* file_name); | 10 | void load_influenza_aa_dat (hid_t file_id, const char *file_name); |
11 | 11 | ||
12 | #endif // LOAD_INFLUENZA_AA_DAT_H | 12 | #endif // LOAD_INFLUENZA_AA_DAT_H |
diff --git a/src/load/load_influenza_faa.c b/src/load/load_influenza_faa.c index 04bf05b..80eb3ae 100644 --- a/src/load/load_influenza_faa.c +++ b/src/load/load_influenza_faa.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <stdlib.h> | 8 | #include <stdlib.h> |
9 | 9 | ||
10 | void | 10 | void |
11 | load_influenza_faa (hid_t file_id, const char* file_name) | 11 | load_influenza_faa (hid_t file_id, const char *file_name) |
12 | { | 12 | { |
13 | size_t dst_size; | 13 | size_t dst_size; |
14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; | 14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; |
@@ -16,7 +16,7 @@ load_influenza_faa (hid_t file_id, const char* file_name) | |||
16 | hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; | 16 | hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; |
17 | 17 | ||
18 | sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type); | 18 | sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type); |
19 | 19 | ||
20 | hsize_t chunk_size = 10; | 20 | hsize_t chunk_size = 10; |
21 | int *fill_data = NULL; | 21 | int *fill_data = NULL; |
22 | int compress = 0; | 22 | int compress = 0; |
@@ -31,17 +31,17 @@ load_influenza_faa (hid_t file_id, const char* file_name) | |||
31 | 31 | ||
32 | while (getline (&line, &len, dat) != -1) | 32 | while (getline (&line, &len, dat) != -1) |
33 | { | 33 | { |
34 | current_line++; | 34 | current_line++; |
35 | 35 | ||
36 | // Header line. | 36 | // Header line. |
37 | if (line[0] == '>') | 37 | if (line[0] == '>') |
38 | { | 38 | { |
39 | char *running = strdup (line); | 39 | char *running = strdup (line); |
40 | char *token = NULL; | 40 | char *token = NULL; |
41 | 41 | ||
42 | // Eat the ">gi". | 42 | // Eat the ">gi". |
43 | strsep (&running, "|"); | 43 | strsep (&running, "|"); |
44 | 44 | ||
45 | // GI value. | 45 | // GI value. |
46 | token = strsep (&running, "|"); | 46 | token = strsep (&running, "|"); |
47 | p_data.gi = atoi (token); | 47 | p_data.gi = atoi (token); |
@@ -50,13 +50,13 @@ load_influenza_faa (hid_t file_id, const char* file_name) | |||
50 | strsep (&running, "|"); | 50 | strsep (&running, "|"); |
51 | 51 | ||
52 | // GB value. | 52 | // GB value. |
53 | strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb)); | 53 | strncpy (p_data.gb, strsep (&running, "|"), sizeof (p_data.gb)); |
54 | 54 | ||
55 | // Description value. | 55 | // Description value. |
56 | strncpy (p_data.description, strsep (&running, "|"), | 56 | strncpy (p_data.description, strsep (&running, "|"), |
57 | sizeof (p_data.description)); | 57 | sizeof (p_data.description)); |
58 | 58 | ||
59 | const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] = | 59 | const char *sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] = |
60 | SEQUENCE_DATA_FIELD_NAMES; | 60 | SEQUENCE_DATA_FIELD_NAMES; |
61 | 61 | ||
62 | if (current_line == 1) | 62 | if (current_line == 1) |
@@ -68,12 +68,14 @@ load_influenza_faa (hid_t file_id, const char* file_name) | |||
68 | { | 68 | { |
69 | hsize_t nfields = 0; | 69 | hsize_t nfields = 0; |
70 | hsize_t nrecords = 0; | 70 | hsize_t nrecords = 0; |
71 | herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields, | 71 | herr_t status = |
72 | &nrecords); | 72 | H5TBget_table_info (file_id, "influenza.faa", &nfields, |
73 | &nrecords); | ||
73 | if (status < 0) | 74 | if (status < 0) |
74 | check_h5_error (status, __FILE__, __LINE__); | 75 | check_h5_error (status, __FILE__, __LINE__); |
75 | 76 | ||
76 | status = H5TBdelete_record (file_id, "influenza.faa", 0, nrecords); | 77 | status = |
78 | H5TBdelete_record (file_id, "influenza.faa", 0, nrecords); | ||
77 | if (status < 0) | 79 | if (status < 0) |
78 | check_h5_error (status, __FILE__, __LINE__); | 80 | check_h5_error (status, __FILE__, __LINE__); |
79 | 81 | ||
@@ -92,9 +94,11 @@ load_influenza_faa (hid_t file_id, const char* file_name) | |||
92 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | 94 | herr_t status = H5TBmake_table ("influenza.faa", file_id, |
93 | "influenza.faa", | 95 | "influenza.faa", |
94 | SEQUENCE_DATA_FIELD_NUM, 1, | 96 | SEQUENCE_DATA_FIELD_NUM, 1, |
95 | dst_size, sequence_data_field_names, | 97 | dst_size, |
98 | sequence_data_field_names, | ||
96 | dst_offset, field_type, | 99 | dst_offset, field_type, |
97 | chunk_size, fill_data, compress, | 100 | chunk_size, fill_data, |
101 | compress, | ||
98 | &p_data); | 102 | &p_data); |
99 | if (status < 0) | 103 | if (status < 0) |
100 | check_h5_error (status, __FILE__, __LINE__); | 104 | check_h5_error (status, __FILE__, __LINE__); |
@@ -114,7 +118,7 @@ load_influenza_faa (hid_t file_id, const char* file_name) | |||
114 | free (running); | 118 | free (running); |
115 | 119 | ||
116 | } | 120 | } |
117 | 121 | ||
118 | } | 122 | } |
119 | 123 | ||
120 | if (line) | 124 | if (line) |
diff --git a/src/load/load_influenza_faa.h b/src/load/load_influenza_faa.h index 1ad5797..070bdea 100644 --- a/src/load/load_influenza_faa.h +++ b/src/load/load_influenza_faa.h | |||
@@ -6,6 +6,6 @@ | |||
6 | /* | 6 | /* |
7 | * Load the protein sequence data from the NCBI influenza.faa file. | 7 | * Load the protein sequence data from the NCBI influenza.faa file. |
8 | */ | 8 | */ |
9 | void load_influenza_faa (hid_t file_id, const char* file_name); | 9 | void load_influenza_faa (hid_t file_id, const char *file_name); |
10 | 10 | ||
11 | #endif // LOAD_INFLUENZA_FAA_H | 11 | #endif // LOAD_INFLUENZA_FAA_H |
diff --git a/src/model/gi_type_data_init.c b/src/model/gi_type_data_init.c index 4a161c7..54f47a7 100644 --- a/src/model/gi_type_data_init.c +++ b/src/model/gi_type_data_init.c | |||
@@ -7,8 +7,8 @@ | |||
7 | * struct. Perhaps an HDF5 precompiler could do such a thing. | 7 | * struct. Perhaps an HDF5 precompiler could do such a thing. |
8 | */ | 8 | */ |
9 | void | 9 | void |
10 | gi_type_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | 10 | gi_type_data_init (size_t * dst_size, size_t * dst_offset, size_t * dst_sizes, |
11 | hid_t *field_type) | 11 | hid_t * field_type) |
12 | { | 12 | { |
13 | *dst_size = sizeof (gi_type_data); | 13 | *dst_size = sizeof (gi_type_data); |
14 | 14 | ||
diff --git a/src/model/gi_type_data_init.h b/src/model/gi_type_data_init.h index 5c45cba..080f035 100644 --- a/src/model/gi_type_data_init.h +++ b/src/model/gi_type_data_init.h | |||
@@ -8,7 +8,7 @@ | |||
8 | * structures are used by the HDF5 API. | 8 | * structures are used by the HDF5 API. |
9 | */ | 9 | */ |
10 | void | 10 | void |
11 | gi_type_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | 11 | gi_type_data_init (size_t * dst_size, size_t * dst_offset, size_t * dst_sizes, |
12 | hid_t *field_type); | 12 | hid_t * field_type); |
13 | 13 | ||
14 | #endif // GI_TYPE_DATA_INIT_H | 14 | #endif // GI_TYPE_DATA_INIT_H |
diff --git a/src/model/sequence_data_init.c b/src/model/sequence_data_init.c index f6b3b1f..21881bf 100644 --- a/src/model/sequence_data_init.c +++ b/src/model/sequence_data_init.c | |||
@@ -1,28 +1,28 @@ | |||
1 | #include "sequence_data_init.h" | 1 | #include "sequence_data_init.h" |
2 | #include "sequence_data.h" | 2 | #include "sequence_data.h" |
3 | 3 | ||
4 | void | 4 | void |
5 | sequence_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | 5 | sequence_data_init (size_t * dst_size, size_t * dst_offset, |
6 | hid_t *field_type) | 6 | size_t * dst_sizes, hid_t * field_type) |
7 | { | 7 | { |
8 | *dst_size = sizeof (sequence_data); | 8 | *dst_size = sizeof (sequence_data); |
9 | 9 | ||
10 | dst_offset[0] = HOFFSET (sequence_data, gi); | 10 | dst_offset[0] = HOFFSET (sequence_data, gi); |
11 | dst_offset[1] = HOFFSET (sequence_data, gb); | 11 | dst_offset[1] = HOFFSET (sequence_data, gb); |
12 | dst_offset[2] = HOFFSET (sequence_data, description); | 12 | dst_offset[2] = HOFFSET (sequence_data, description); |
13 | 13 | ||
14 | sequence_data dst_buf[1]; | 14 | sequence_data dst_buf[1]; |
15 | 15 | ||
16 | dst_sizes[0] = sizeof (dst_buf[0].gi); | 16 | dst_sizes[0] = sizeof (dst_buf[0].gi); |
17 | dst_sizes[1] = sizeof (dst_buf[0].gb); | 17 | dst_sizes[1] = sizeof (dst_buf[0].gb); |
18 | dst_sizes[2] = sizeof (dst_buf[0].description); | 18 | dst_sizes[2] = sizeof (dst_buf[0].description); |
19 | 19 | ||
20 | field_type[0] = H5T_NATIVE_INT; | 20 | field_type[0] = H5T_NATIVE_INT; |
21 | 21 | ||
22 | hid_t gb_type = H5Tcopy (H5T_C_S1); | 22 | hid_t gb_type = H5Tcopy (H5T_C_S1); |
23 | H5Tset_size (gb_type, 9); | 23 | H5Tset_size (gb_type, 9); |
24 | field_type[1] = gb_type; | 24 | field_type[1] = gb_type; |
25 | 25 | ||
26 | hid_t description_type = H5Tcopy (H5T_C_S1); | 26 | hid_t description_type = H5Tcopy (H5T_C_S1); |
27 | H5Tset_size (description_type, 196); | 27 | H5Tset_size (description_type, 196); |
28 | field_type[2] = description_type; | 28 | field_type[2] = description_type; |
diff --git a/src/model/sequence_data_init.h b/src/model/sequence_data_init.h index c87e7e6..1a2c75c 100644 --- a/src/model/sequence_data_init.h +++ b/src/model/sequence_data_init.h | |||
@@ -7,8 +7,8 @@ | |||
7 | * Initialize the structures describing sequence_data. These | 7 | * Initialize the structures describing sequence_data. These |
8 | * descriptive structures are used by the HDF5 API. | 8 | * descriptive structures are used by the HDF5 API. |
9 | */ | 9 | */ |
10 | void | 10 | void |
11 | sequence_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | 11 | sequence_data_init (size_t * dst_size, size_t * dst_offset, |
12 | hid_t *field_type); | 12 | size_t * dst_sizes, hid_t * field_type); |
13 | 13 | ||
14 | #endif // SEQUENCE_DATA_INIT_H | 14 | #endif // SEQUENCE_DATA_INIT_H |