-rw-r--r-- | src/aggregator.c | 34 | ||||
-rw-r--r-- | src/assign/assign_protein_type.c | 141 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.c | 53 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.h | 2 | ||||
-rw-r--r-- | src/load/load_influenza_faa.c | 53 | ||||
-rw-r--r-- | src/load/load_influenza_faa.h | 2 | ||||
-rw-r--r-- | src/updator.c | 2 |
7 files changed, 201 insertions, 86 deletions
diff --git a/src/aggregator.c b/src/aggregator.c index c00d912..c9a03b5 100644 --- a/src/aggregator.c +++ b/src/aggregator.c | |||
@@ -6,28 +6,48 @@ | |||
6 | #include "error/check_h5_error.h" | 6 | #include "error/check_h5_error.h" |
7 | #include "load/load_influenza_aa_dat.h" | 7 | #include "load/load_influenza_aa_dat.h" |
8 | #include "load/load_influenza_faa.h" | 8 | #include "load/load_influenza_faa.h" |
9 | #include <stdio.h> | ||
9 | 10 | ||
10 | #define FILE "influenza.h5" | 11 | #define H5FILE "influenza.h5" |
12 | #define INFLUENZA_AA_DAT "/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat" | ||
13 | #define INFLUENZA_FAA "/home/don/exp004/genomes/INFLUENZA/influenza.faa" | ||
11 | 14 | ||
12 | int | 15 | int |
13 | main () | 16 | main () |
14 | { | 17 | { |
15 | /* | 18 | /* |
16 | * Create the HDF5 file. | 19 | * Create a new HDF5 file if it does not already exist. If an |
20 | * existing file is found then open it. | ||
17 | */ | 21 | */ |
18 | hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); | 22 | hid_t file_id = 0; |
19 | if (file_id < 0) | 23 | FILE *f = fopen (H5FILE, "r+"); |
20 | check_h5_error (file_id, __FILE__, __LINE__); | 24 | if (f == NULL) |
25 | { | ||
26 | file_id = H5Fcreate (H5FILE, H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT); | ||
27 | if (file_id < 0) | ||
28 | check_h5_error (file_id, __FILE__, __LINE__); | ||
29 | } | ||
30 | else | ||
31 | { | ||
32 | fclose (f); | ||
33 | file_id = H5Fopen (H5FILE, H5F_ACC_RDWR, H5P_DEFAULT); | ||
34 | if (file_id < 0) | ||
35 | check_h5_error (file_id, __FILE__, __LINE__); | ||
36 | } | ||
21 | 37 | ||
22 | /* | 38 | /* |
23 | * Load the supplementary protein data file. | 39 | * Load the supplementary protein data file. |
24 | */ | 40 | */ |
25 | load_influenza_aa_dat (file_id); | 41 | printf ("Loading \"influenza_aa.dat\" with contents of %s.\n", |
42 | INFLUENZA_AA_DAT); | ||
43 | load_influenza_aa_dat (file_id, INFLUENZA_AA_DAT); | ||
26 | 44 | ||
27 | /* | 45 | /* |
28 | * Load the FASTA protein sequence data file. | 46 | * Load the FASTA protein sequence data file. |
29 | */ | 47 | */ |
30 | load_influenza_faa (file_id); | 48 | printf ("Loading \"influenza.faa\" with contents of %s.\n", |
49 | INFLUENZA_FAA); | ||
50 | load_influenza_faa (file_id, INFLUENZA_FAA); | ||
31 | 51 | ||
32 | /* | 52 | /* |
33 | * Close the HDF5 file. | 53 | * Close the HDF5 file. |
diff --git a/src/assign/assign_protein_type.c b/src/assign/assign_protein_type.c index 73685bb..3947800 100644 --- a/src/assign/assign_protein_type.c +++ b/src/assign/assign_protein_type.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #define _GNU_SOURCE | 1 | #define _GNU_SOURCE |
2 | #include "assign_protein_type.h" | 2 | #include "assign_protein_type.h" |
3 | #include "error/check_error.h" | ||
3 | #include "error/check_h5_error.h" | 4 | #include "error/check_h5_error.h" |
4 | #include "error/check_ncbi_error.h" | 5 | #include "error/check_ncbi_error.h" |
5 | #include "model/gi_type_data.h" | 6 | #include "model/gi_type_data.h" |
@@ -84,6 +85,13 @@ assign_protein_type (hid_t file_id) | |||
84 | check_h5_error (status, __FILE__, __LINE__); | 85 | check_h5_error (status, __FILE__, __LINE__); |
85 | 86 | ||
86 | /* | 87 | /* |
88 | * Allocate memory for the new table. | ||
89 | */ | ||
90 | gi_type_data* new_buf = malloc (sizeof (gi_type_data) * faa_nrecords); | ||
91 | if (new_buf == NULL) | ||
92 | check_error (__FILE__, __LINE__); | ||
93 | |||
94 | /* | ||
87 | * Read the data from HDF5 gi_type_data. | 95 | * Read the data from HDF5 gi_type_data. |
88 | */ | 96 | */ |
89 | hsize_t gi_nfields = 0; | 97 | hsize_t gi_nfields = 0; |
@@ -94,8 +102,12 @@ assign_protein_type (hid_t file_id) | |||
94 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; | 102 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; |
95 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); | 103 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); |
96 | 104 | ||
97 | gi_type_data* gi_buf = NULL; | 105 | gi_type_data* old_buf = NULL; |
98 | 106 | ||
107 | /* | ||
108 | * If the table is already present read the values into memory and | ||
109 | * then clear the table. | ||
110 | */ | ||
99 | if (H5LTfind_dataset (file_id, "gi_type_data") == 1) | 111 | if (H5LTfind_dataset (file_id, "gi_type_data") == 1) |
100 | { | 112 | { |
101 | 113 | ||
@@ -105,22 +117,30 @@ assign_protein_type (hid_t file_id) | |||
105 | &gi_nrecords); | 117 | &gi_nrecords); |
106 | if (status < 0) | 118 | if (status < 0) |
107 | check_h5_error (status, __FILE__, __LINE__); | 119 | check_h5_error (status, __FILE__, __LINE__); |
120 | |||
121 | printf (" Using gi_type_data cache of %i records.\n", (int)gi_nrecords); | ||
108 | 122 | ||
109 | gi_buf = malloc (sizeof(gi_type_data) * gi_nrecords); | 123 | old_buf = malloc (sizeof(gi_type_data) * gi_nrecords); |
110 | 124 | ||
111 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, | 125 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, |
112 | gi_sizes, gi_buf); | 126 | gi_sizes, old_buf); |
127 | if (status < 0) | ||
128 | check_h5_error (status, __FILE__, __LINE__); | ||
129 | |||
130 | status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords); | ||
113 | if (status < 0) | 131 | if (status < 0) |
114 | check_h5_error (status, __FILE__, __LINE__); | 132 | check_h5_error (status, __FILE__, __LINE__); |
115 | 133 | ||
116 | } | 134 | } |
135 | |||
136 | /* | ||
137 | * If the table is not already present create it. | ||
138 | */ | ||
117 | else | 139 | else |
118 | { | 140 | { |
119 | 141 | ||
120 | printf ("Creating gi_type_data.\n"); | 142 | printf ("Creating gi_type_data.\n"); |
121 | 143 | ||
122 | gi_buf = malloc (sizeof(gi_type_data) * faa_nrecords); | ||
123 | |||
124 | const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = | 144 | const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = |
125 | GI_TYPE_DATA_FIELD_NAMES; | 145 | GI_TYPE_DATA_FIELD_NAMES; |
126 | 146 | ||
@@ -130,7 +150,7 @@ assign_protein_type (hid_t file_id) | |||
130 | 150 | ||
131 | status = H5TBmake_table ("gi_type_data", file_id, | 151 | status = H5TBmake_table ("gi_type_data", file_id, |
132 | "gi_type_data", | 152 | "gi_type_data", |
133 | GI_TYPE_DATA_FIELD_NUM, faa_nrecords, | 153 | GI_TYPE_DATA_FIELD_NUM, 0, |
134 | gi_size, gi_type_data_field_names, | 154 | gi_size, gi_type_data_field_names, |
135 | gi_offset, gi_field_type, | 155 | gi_offset, gi_field_type, |
136 | chunk_size, fill_data, compress, | 156 | chunk_size, fill_data, compress, |
@@ -140,17 +160,22 @@ assign_protein_type (hid_t file_id) | |||
140 | 160 | ||
141 | } | 161 | } |
142 | 162 | ||
163 | /* | ||
164 | * Copy the contents of the old table into a hash. | ||
165 | */ | ||
143 | struct hsearch_data htab; | 166 | struct hsearch_data htab; |
144 | bzero (&htab, sizeof (htab)); | 167 | bzero (&htab, sizeof (htab)); |
145 | hcreate_r (gi_nrecords * 2, &htab); | 168 | if (hcreate_r (gi_nrecords * 2, &htab) == 0) |
169 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, | ||
170 | "Allocation of cache failed."); | ||
146 | ENTRY e, *ep; | 171 | ENTRY e, *ep; |
147 | 172 | ||
148 | for (int i = 0; i < gi_nrecords; i++) | 173 | for (int i = 0; i < (int)gi_nrecords; i++) |
149 | { | 174 | { |
150 | char gi_chr[25]; | 175 | char gi_chr[25]; |
151 | snprintf (gi_chr, 25, "%i", gi_buf[i].gi); | 176 | snprintf (gi_chr, 25, "%i", old_buf[i].gi); |
152 | e.key = gi_chr; | 177 | e.key = strdup (gi_chr); |
153 | e.data = &gi_buf[i]; | 178 | e.data = &old_buf[i]; |
154 | if (hsearch_r (e, ENTER, &ep, &htab) == 0) | 179 | if (hsearch_r (e, ENTER, &ep, &htab) == 0) |
155 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, | 180 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, |
156 | "Allocation failed."); | 181 | "Allocation failed."); |
@@ -160,19 +185,23 @@ assign_protein_type (hid_t file_id) | |||
160 | * Assign protein types to records for which the field is empty. | 185 | * Assign protein types to records for which the field is empty. |
161 | */ | 186 | */ |
162 | printf ("Records to process: %i\n", (int)faa_nrecords); | 187 | printf ("Records to process: %i\n", (int)faa_nrecords); |
163 | bool updates_pending = false; | 188 | int written = 0; |
164 | for (int i = 0; i < faa_nrecords; i++) | 189 | for (int i = 0; i < (int)faa_nrecords; i++) |
165 | { | 190 | { |
191 | new_buf[i].gi = faa_buf[i].gi; | ||
192 | strncpy (new_buf[i].type, "", sizeof (new_buf[i].type)); | ||
193 | strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein)); | ||
166 | 194 | ||
167 | char gi_chr[25]; | 195 | char gi_chr[25]; |
168 | snprintf (gi_chr, 25, "%i", faa_buf[i].gi); | 196 | snprintf (gi_chr, 25, "%i", faa_buf[i].gi); |
169 | e.key = gi_chr; | 197 | e.key = gi_chr; |
198 | e.data = NULL; | ||
199 | |||
200 | /* | ||
201 | * A record was not found in the cache for this gi. | ||
202 | */ | ||
170 | if (hsearch_r (e, FIND, &ep, &htab) == 0) | 203 | if (hsearch_r (e, FIND, &ep, &htab) == 0) |
171 | { | 204 | { |
172 | |||
173 | gi_buf[i].gi = faa_buf[i].gi; | ||
174 | gi_buf[i].type[0] = '\0'; | ||
175 | gi_buf[i].protein[0] = '\0'; | ||
176 | 205 | ||
177 | /* | 206 | /* |
178 | * Read the sequence from the database by GI. | 207 | * Read the sequence from the database by GI. |
@@ -202,7 +231,7 @@ assign_protein_type (hid_t file_id) | |||
202 | */ | 231 | */ |
203 | if (error_returns != NULL) | 232 | if (error_returns != NULL) |
204 | { | 233 | { |
205 | char *msg = BlastErrorToString (error_returns); | 234 | CharPtr msg = BlastErrorToString (error_returns); |
206 | printf ("Warning: An error has been reported by the NCBI Toolkit " | 235 | printf ("Warning: An error has been reported by the NCBI Toolkit " |
207 | "API for sequence gi|%i: %s", | 236 | "API for sequence gi|%i: %s", |
208 | faa_buf[i].gi, msg); | 237 | faa_buf[i].gi, msg); |
@@ -221,14 +250,12 @@ assign_protein_type (hid_t file_id) | |||
221 | BUFFER_LEN); | 250 | BUFFER_LEN); |
222 | 251 | ||
223 | // Species Type | 252 | // Species Type |
224 | gi_buf[i].type[0] = target_id_buf[4]; | 253 | new_buf[i].type[0] = target_id_buf[4]; |
225 | gi_buf[i].type[1] = '\0'; | 254 | new_buf[i].type[1] = '\0'; |
226 | 255 | ||
227 | // Protein Type | 256 | // Protein Type |
228 | strncpy (gi_buf[i].protein, &target_id_buf[6], | 257 | strncpy (new_buf[i].protein, &target_id_buf[6], |
229 | sizeof (gi_buf[i].protein)); | 258 | sizeof (new_buf[i].protein)); |
230 | |||
231 | updates_pending = true; | ||
232 | } | 259 | } |
233 | 260 | ||
234 | /* | 261 | /* |
@@ -246,16 +273,27 @@ assign_protein_type (hid_t file_id) | |||
246 | seqalign = SeqAlignSetFree (seqalign); | 273 | seqalign = SeqAlignSetFree (seqalign); |
247 | bsp = BioseqFree (bsp); | 274 | bsp = BioseqFree (bsp); |
248 | 275 | ||
276 | } // End existing entry not found. | ||
277 | |||
278 | /* | ||
279 | * Hash table entry found. Keep the old value. | ||
280 | */ | ||
281 | else | ||
282 | { | ||
283 | gi_type_data* old_value = (gi_type_data*)ep->data; | ||
284 | new_buf[i].gi = old_value->gi; | ||
285 | strncpy (new_buf[i].type, old_value->type, sizeof (new_buf[i].type)); | ||
286 | strncpy (new_buf[i].protein, old_value->protein, sizeof (new_buf[i].protein)); | ||
249 | } | 287 | } |
250 | 288 | ||
251 | /* | 289 | /* |
252 | * Write the data out to the file. | 290 | * Write the data out to the file. |
253 | */ | 291 | */ |
254 | if ( (i % 1000 == 0) && (i > 0) && updates_pending) | 292 | if ( (i % 1000 == 0) && (i > 0) ) |
255 | { | 293 | { |
256 | status = H5TBwrite_records (file_id, "gi_type_data", i - 1000, 1000, | 294 | status = H5TBappend_records (file_id, "gi_type_data", 1000, |
257 | gi_size, gi_offset, gi_sizes, | 295 | gi_size, gi_offset, gi_sizes, |
258 | &gi_buf[i-1000]); | 296 | &new_buf[i-1000]); |
259 | if (status < 0) | 297 | if (status < 0) |
260 | check_h5_error (status, __FILE__, __LINE__); | 298 | check_h5_error (status, __FILE__, __LINE__); |
261 | 299 | ||
@@ -263,7 +301,7 @@ assign_protein_type (hid_t file_id) | |||
263 | if (status < 0) | 301 | if (status < 0) |
264 | check_h5_error (status, __FILE__, __LINE__); | 302 | check_h5_error (status, __FILE__, __LINE__); |
265 | 303 | ||
266 | updates_pending = false; | 304 | written = i; |
267 | 305 | ||
268 | printf ("Processed %i of %i records.\n", i, (int)faa_nrecords); | 306 | printf ("Processed %i of %i records.\n", i, (int)faa_nrecords); |
269 | } | 307 | } |
@@ -274,37 +312,34 @@ assign_protein_type (hid_t file_id) | |||
274 | * Write out records from the last bin if it was less than 1000 | 312 | * Write out records from the last bin if it was less than 1000 |
275 | * records in size. | 313 | * records in size. |
276 | */ | 314 | */ |
277 | if (updates_pending) | 315 | if ((int)faa_nrecords < 1000) |
278 | { | 316 | { |
279 | /* | 317 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords, |
280 | if ((int)faa_nrecords < 1000) | 318 | gi_size, gi_offset, gi_sizes, |
281 | { | 319 | new_buf); |
282 | status = H5TBwrite_records (file_id, "influenza.faa", 0, nrecords, | 320 | } |
283 | dst_size, dst_offset, dst_sizes, | 321 | |
284 | dst_buf); | 322 | else |
285 | } | 323 | { |
286 | else | 324 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written, |
287 | { | 325 | gi_size, gi_offset, gi_sizes, |
288 | status = H5TBwrite_records (file_id, "influenza.faa", nrecords - 1000, 1000, | 326 | &new_buf[written]); |
289 | dst_size, dst_offset, dst_sizes, | ||
290 | &dst_buf[nrecords-1000]); | ||
291 | } | ||
292 | if (status < 0) | ||
293 | check_h5_error (status, __FILE__, __LINE__); | ||
294 | |||
295 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
296 | if (status < 0) | ||
297 | check_h5_error (status, __FILE__, __LINE__); | ||
298 | |||
299 | updates_pending = false; | ||
300 | */ | ||
301 | } | 327 | } |
328 | |||
329 | if (status < 0) | ||
330 | check_h5_error (status, __FILE__, __LINE__); | ||
331 | |||
332 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
333 | if (status < 0) | ||
334 | check_h5_error (status, __FILE__, __LINE__); | ||
302 | 335 | ||
303 | free (faa_buf); | 336 | free (faa_buf); |
304 | free (gi_buf); | 337 | free (old_buf); |
338 | free (new_buf); | ||
305 | hdestroy_r (&htab); | 339 | hdestroy_r (&htab); |
306 | 340 | ||
307 | options = BLASTOptionDelete (options); | 341 | options = BLASTOptionDelete (options); |
342 | readdb_destruct (seqdb); | ||
308 | 343 | ||
309 | return; | 344 | return; |
310 | } | 345 | } |
diff --git a/src/load/load_influenza_aa_dat.c b/src/load/load_influenza_aa_dat.c index 8bf47aa..3826349 100644 --- a/src/load/load_influenza_aa_dat.c +++ b/src/load/load_influenza_aa_dat.c | |||
@@ -13,10 +13,9 @@ | |||
13 | #include <stdlib.h> | 13 | #include <stdlib.h> |
14 | 14 | ||
15 | #define NFIELDS (hsize_t) 11 | 15 | #define NFIELDS (hsize_t) 11 |
16 | #define TABLE_NAME "influenza_aa.dat" | ||
17 | 16 | ||
18 | void | 17 | void |
19 | load_influenza_aa_dat (hid_t file_id) | 18 | load_influenza_aa_dat (hid_t file_id, const char* file_name) |
20 | { | 19 | { |
21 | /* | 20 | /* |
22 | * Model the data using native types. | 21 | * Model the data using native types. |
@@ -145,8 +144,7 @@ load_influenza_aa_dat (hid_t file_id) | |||
145 | * Insert the records. | 144 | * Insert the records. |
146 | */ | 145 | */ |
147 | supplementary_data p_data; | 146 | supplementary_data p_data; |
148 | FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", | 147 | FILE *dat = fopen (file_name, "r"); |
149 | "r"); | ||
150 | if (dat == NULL) | 148 | if (dat == NULL) |
151 | check_error (__FILE__, __LINE__); | 149 | check_error (__FILE__, __LINE__); |
152 | char *line = NULL; | 150 | char *line = NULL; |
@@ -214,18 +212,49 @@ load_influenza_aa_dat (hid_t file_id) | |||
214 | 212 | ||
215 | if (current_line == 1) | 213 | if (current_line == 1) |
216 | { | 214 | { |
217 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, | 215 | |
218 | TABLE_NAME, NFIELDS, 1, dst_size, | 216 | /* |
219 | field_names, dst_offset, field_type, | 217 | * Dataset already exists. Purge it. |
220 | chunk_size, fill_data, compress, | 218 | */ |
221 | &p_data); | 219 | if (H5LTfind_dataset (file_id, "influenza_aa.dat") == 1) |
222 | if (status < 0) | 220 | { |
223 | check_h5_error (status, __FILE__, __LINE__); | 221 | hsize_t nfields = 0; |
222 | hsize_t nrecords = 0; | ||
223 | herr_t status = H5TBget_table_info (file_id, "influenza_aa.dat", | ||
224 | &nfields, &nrecords); | ||
225 | if (status < 0) | ||
226 | check_h5_error (status, __FILE__, __LINE__); | ||
227 | |||
228 | status = H5TBdelete_record (file_id, "influenza_aa.dat", 0, nrecords); | ||
229 | if (status < 0) | ||
230 | check_h5_error (status, __FILE__, __LINE__); | ||
231 | |||
232 | status = | ||
233 | H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size, | ||
234 | dst_offset, dst_sizes, &p_data); | ||
235 | if (status < 0) | ||
236 | check_h5_error (status, __FILE__, __LINE__); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Dataset does not exist. Create it. | ||
241 | */ | ||
242 | else | ||
243 | { | ||
244 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, | ||
245 | "influenza_aa.dat", NFIELDS, 1, dst_size, | ||
246 | field_names, dst_offset, field_type, | ||
247 | chunk_size, fill_data, compress, | ||
248 | &p_data); | ||
249 | if (status < 0) | ||
250 | check_h5_error (status, __FILE__, __LINE__); | ||
251 | } | ||
224 | } | 252 | } |
253 | |||
225 | else | 254 | else |
226 | { | 255 | { |
227 | herr_t status = | 256 | herr_t status = |
228 | H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, | 257 | H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size, |
229 | dst_offset, dst_sizes, &p_data); | 258 | dst_offset, dst_sizes, &p_data); |
230 | if (status < 0) | 259 | if (status < 0) |
231 | check_h5_error (status, __FILE__, __LINE__); | 260 | check_h5_error (status, __FILE__, __LINE__); |
diff --git a/src/load/load_influenza_aa_dat.h b/src/load/load_influenza_aa_dat.h index f6c60be..97e36f8 100644 --- a/src/load/load_influenza_aa_dat.h +++ b/src/load/load_influenza_aa_dat.h | |||
@@ -7,6 +7,6 @@ | |||
7 | * Load the supplementary protein data from the NCBI influenza_aa.dat | 7 | * Load the supplementary protein data from the NCBI influenza_aa.dat |
8 | * file. | 8 | * file. |
9 | */ | 9 | */ |
10 | void load_influenza_aa_dat (hid_t file_id); | 10 | void load_influenza_aa_dat (hid_t file_id, const char* file_name); |
11 | 11 | ||
12 | #endif // LOAD_INFLUENZA_AA_DAT_H | 12 | #endif // LOAD_INFLUENZA_AA_DAT_H |
diff --git a/src/load/load_influenza_faa.c b/src/load/load_influenza_faa.c index a217989..04bf05b 100644 --- a/src/load/load_influenza_faa.c +++ b/src/load/load_influenza_faa.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <stdlib.h> | 8 | #include <stdlib.h> |
9 | 9 | ||
10 | void | 10 | void |
11 | load_influenza_faa (hid_t file_id) | 11 | load_influenza_faa (hid_t file_id, const char* file_name) |
12 | { | 12 | { |
13 | size_t dst_size; | 13 | size_t dst_size; |
14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; | 14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; |
@@ -22,8 +22,7 @@ load_influenza_faa (hid_t file_id) | |||
22 | int compress = 0; | 22 | int compress = 0; |
23 | 23 | ||
24 | sequence_data p_data; | 24 | sequence_data p_data; |
25 | FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", | 25 | FILE *dat = fopen (file_name, "r"); |
26 | "r"); | ||
27 | if (dat == NULL) | 26 | if (dat == NULL) |
28 | check_error (__FILE__, __LINE__); | 27 | check_error (__FILE__, __LINE__); |
29 | char *line = NULL; | 28 | char *line = NULL; |
@@ -62,16 +61,46 @@ load_influenza_faa (hid_t file_id) | |||
62 | 61 | ||
63 | if (current_line == 1) | 62 | if (current_line == 1) |
64 | { | 63 | { |
65 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | 64 | /* |
66 | "influenza.faa", | 65 | * Dataset already exists. Purge it. |
67 | SEQUENCE_DATA_FIELD_NUM, 1, | 66 | */ |
68 | dst_size, sequence_data_field_names, | 67 | if (H5LTfind_dataset (file_id, "influenza.faa") == 1) |
69 | dst_offset, field_type, | 68 | { |
70 | chunk_size, fill_data, compress, | 69 | hsize_t nfields = 0; |
71 | &p_data); | 70 | hsize_t nrecords = 0; |
72 | if (status < 0) | 71 | herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields, |
73 | check_h5_error (status, __FILE__, __LINE__); | 72 | &nrecords); |
73 | if (status < 0) | ||
74 | check_h5_error (status, __FILE__, __LINE__); | ||
75 | |||
76 | status = H5TBdelete_record (file_id, "influenza.faa", 0, nrecords); | ||
77 | if (status < 0) | ||
78 | check_h5_error (status, __FILE__, __LINE__); | ||
79 | |||
80 | status = | ||
81 | H5TBappend_records (file_id, "influenza.faa", 1, dst_size, | ||
82 | dst_offset, dst_sizes, &p_data); | ||
83 | if (status < 0) | ||
84 | check_h5_error (status, __FILE__, __LINE__); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Dataset does not exist. Create it. | ||
89 | */ | ||
90 | else | ||
91 | { | ||
92 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | ||
93 | "influenza.faa", | ||
94 | SEQUENCE_DATA_FIELD_NUM, 1, | ||
95 | dst_size, sequence_data_field_names, | ||
96 | dst_offset, field_type, | ||
97 | chunk_size, fill_data, compress, | ||
98 | &p_data); | ||
99 | if (status < 0) | ||
100 | check_h5_error (status, __FILE__, __LINE__); | ||
101 | } | ||
74 | } | 102 | } |
103 | |||
75 | else | 104 | else |
76 | { | 105 | { |
77 | herr_t status = | 106 | herr_t status = |
diff --git a/src/load/load_influenza_faa.h b/src/load/load_influenza_faa.h index 569c411..1ad5797 100644 --- a/src/load/load_influenza_faa.h +++ b/src/load/load_influenza_faa.h | |||
@@ -6,6 +6,6 @@ | |||
6 | /* | 6 | /* |
7 | * Load the protein sequence data from the NCBI influenza.faa file. | 7 | * Load the protein sequence data from the NCBI influenza.faa file. |
8 | */ | 8 | */ |
9 | void load_influenza_faa (hid_t file_id); | 9 | void load_influenza_faa (hid_t file_id, const char* file_name); |
10 | 10 | ||
11 | #endif // LOAD_INFLUENZA_FAA_H | 11 | #endif // LOAD_INFLUENZA_FAA_H |
diff --git a/src/updator.c b/src/updator.c index 591d2f6..9a5ad18 100644 --- a/src/updator.c +++ b/src/updator.c | |||
@@ -4,6 +4,8 @@ | |||
4 | 4 | ||
5 | #include "assign/assign_protein_type.h" | 5 | #include "assign/assign_protein_type.h" |
6 | #include "error/check_h5_error.h" | 6 | #include "error/check_h5_error.h" |
7 | #include <stdio.h> | ||
8 | #include <signal.h> | ||
7 | 9 | ||
8 | #define FILE "influenza.h5" | 10 | #define FILE "influenza.h5" |
9 | 11 | ||