author | Don Pellegrino <don@drexel.edu> | 2010-01-19 20:27:10 (GMT) |
---|---|---|
committer | Don Pellegrino <don@drexel.edu> | 2010-01-19 20:27:10 (GMT) |
commit | 9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8 (patch) (unidiff) | |
tree | 05ad69a1255a77ca0739420345a540a03cd304e1 | |
parent | 9cbe92a3f1ea871cb23818e32a1b0f85734d9453 (diff) | |
download | exp007-9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8.zip exp007-9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8.tar.gz exp007-9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8.tar.bz2 |
Program now fully uses an existing file. The load functions detect
their existing target tables and purge them if they exist or create
them if they don't. The assign function will reuse existing type
values by gi and write a new table that is aligned by gi with influenza.faa.
-rw-r--r-- | src/aggregator.c | 34 | ||||
-rw-r--r-- | src/assign/assign_protein_type.c | 141 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.c | 53 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.h | 2 | ||||
-rw-r--r-- | src/load/load_influenza_faa.c | 53 | ||||
-rw-r--r-- | src/load/load_influenza_faa.h | 2 | ||||
-rw-r--r-- | src/updator.c | 2 |
7 files changed, 201 insertions, 86 deletions
diff --git a/src/aggregator.c b/src/aggregator.c index c00d912..c9a03b5 100644 --- a/src/aggregator.c +++ b/src/aggregator.c | |||
@@ -6,28 +6,48 @@ | |||
6 | #include "error/check_h5_error.h" | 6 | #include "error/check_h5_error.h" |
7 | #include "load/load_influenza_aa_dat.h" | 7 | #include "load/load_influenza_aa_dat.h" |
8 | #include "load/load_influenza_faa.h" | 8 | #include "load/load_influenza_faa.h" |
9 | #include <stdio.h> | ||
9 | 10 | ||
10 | #define FILE "influenza.h5" | 11 | #define H5FILE "influenza.h5" |
12 | #define INFLUENZA_AA_DAT "/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat" | ||
13 | #define INFLUENZA_FAA "/home/don/exp004/genomes/INFLUENZA/influenza.faa" | ||
11 | 14 | ||
12 | int | 15 | int |
13 | main () | 16 | main () |
14 | { | 17 | { |
15 | /* | 18 | /* |
16 | * Create the HDF5 file. | 19 | * Create a new HDF5 file if it does not already exist. If an |
20 | * existing file is found then open it. | ||
17 | */ | 21 | */ |
18 | hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); | 22 | hid_t file_id = 0; |
19 | if (file_id < 0) | 23 | FILE *f = fopen (H5FILE, "r+"); |
20 | check_h5_error (file_id, __FILE__, __LINE__); | 24 | if (f == NULL) |
25 | { | ||
26 | file_id = H5Fcreate (H5FILE, H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT); | ||
27 | if (file_id < 0) | ||
28 | check_h5_error (file_id, __FILE__, __LINE__); | ||
29 | } | ||
30 | else | ||
31 | { | ||
32 | fclose (f); | ||
33 | file_id = H5Fopen (H5FILE, H5F_ACC_RDWR, H5P_DEFAULT); | ||
34 | if (file_id < 0) | ||
35 | check_h5_error (file_id, __FILE__, __LINE__); | ||
36 | } | ||
21 | 37 | ||
22 | /* | 38 | /* |
23 | * Load the supplementary protein data file. | 39 | * Load the supplementary protein data file. |
24 | */ | 40 | */ |
25 | load_influenza_aa_dat (file_id); | 41 | printf ("Loading \"influenza_aa.dat\" with contents of %s.\n", |
42 | INFLUENZA_AA_DAT); | ||
43 | load_influenza_aa_dat (file_id, INFLUENZA_AA_DAT); | ||
26 | 44 | ||
27 | /* | 45 | /* |
28 | * Load the FASTA protein sequence data file. | 46 | * Load the FASTA protein sequence data file. |
29 | */ | 47 | */ |
30 | load_influenza_faa (file_id); | 48 | printf ("Loading \"influenza.faa\" with contents of %s.\n", |
49 | INFLUENZA_FAA); | ||
50 | load_influenza_faa (file_id, INFLUENZA_FAA); | ||
31 | 51 | ||
32 | /* | 52 | /* |
33 | * Close the HDF5 file. | 53 | * Close the HDF5 file. |
diff --git a/src/assign/assign_protein_type.c b/src/assign/assign_protein_type.c index 73685bb..3947800 100644 --- a/src/assign/assign_protein_type.c +++ b/src/assign/assign_protein_type.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #define _GNU_SOURCE | 1 | #define _GNU_SOURCE |
2 | #include "assign_protein_type.h" | 2 | #include "assign_protein_type.h" |
3 | #include "error/check_error.h" | ||
3 | #include "error/check_h5_error.h" | 4 | #include "error/check_h5_error.h" |
4 | #include "error/check_ncbi_error.h" | 5 | #include "error/check_ncbi_error.h" |
5 | #include "model/gi_type_data.h" | 6 | #include "model/gi_type_data.h" |
@@ -84,6 +85,13 @@ assign_protein_type (hid_t file_id) | |||
84 | check_h5_error (status, __FILE__, __LINE__); | 85 | check_h5_error (status, __FILE__, __LINE__); |
85 | 86 | ||
86 | /* | 87 | /* |
88 | * Allocate memory for the new table. | ||
89 | */ | ||
90 | gi_type_data* new_buf = malloc (sizeof (gi_type_data) * faa_nrecords); | ||
91 | if (new_buf == NULL) | ||
92 | check_error (__FILE__, __LINE__); | ||
93 | |||
94 | /* | ||
87 | * Read the data from HDF5 gi_type_data. | 95 | * Read the data from HDF5 gi_type_data. |
88 | */ | 96 | */ |
89 | hsize_t gi_nfields = 0; | 97 | hsize_t gi_nfields = 0; |
@@ -94,8 +102,12 @@ assign_protein_type (hid_t file_id) | |||
94 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; | 102 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; |
95 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); | 103 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); |
96 | 104 | ||
97 | gi_type_data* gi_buf = NULL; | 105 | gi_type_data* old_buf = NULL; |
98 | 106 | ||
107 | /* | ||
108 | * If the table is already present read the values into memory and | ||
109 | * then clear the table. | ||
110 | */ | ||
99 | if (H5LTfind_dataset (file_id, "gi_type_data") == 1) | 111 | if (H5LTfind_dataset (file_id, "gi_type_data") == 1) |
100 | { | 112 | { |
101 | 113 | ||
@@ -105,22 +117,30 @@ assign_protein_type (hid_t file_id) | |||
105 | &gi_nrecords); | 117 | &gi_nrecords); |
106 | if (status < 0) | 118 | if (status < 0) |
107 | check_h5_error (status, __FILE__, __LINE__); | 119 | check_h5_error (status, __FILE__, __LINE__); |
120 | |||
121 | printf (" Using gi_type_data cache of %i records.\n", (int)gi_nrecords); | ||
108 | 122 | ||
109 | gi_buf = malloc (sizeof(gi_type_data) * gi_nrecords); | 123 | old_buf = malloc (sizeof(gi_type_data) * gi_nrecords); |
110 | 124 | ||
111 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, | 125 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, |
112 | gi_sizes, gi_buf); | 126 | gi_sizes, old_buf); |
127 | if (status < 0) | ||
128 | check_h5_error (status, __FILE__, __LINE__); | ||
129 | |||
130 | status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords); | ||
113 | if (status < 0) | 131 | if (status < 0) |
114 | check_h5_error (status, __FILE__, __LINE__); | 132 | check_h5_error (status, __FILE__, __LINE__); |
115 | 133 | ||
116 | } | 134 | } |
135 | |||
136 | /* | ||
137 | * If the table is not already present create it. | ||
138 | */ | ||
117 | else | 139 | else |
118 | { | 140 | { |
119 | 141 | ||
120 | printf ("Creating gi_type_data.\n"); | 142 | printf ("Creating gi_type_data.\n"); |
121 | 143 | ||
122 | gi_buf = malloc (sizeof(gi_type_data) * faa_nrecords); | ||
123 | |||
124 | const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = | 144 | const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = |
125 | GI_TYPE_DATA_FIELD_NAMES; | 145 | GI_TYPE_DATA_FIELD_NAMES; |
126 | 146 | ||
@@ -130,7 +150,7 @@ assign_protein_type (hid_t file_id) | |||
130 | 150 | ||
131 | status = H5TBmake_table ("gi_type_data", file_id, | 151 | status = H5TBmake_table ("gi_type_data", file_id, |
132 | "gi_type_data", | 152 | "gi_type_data", |
133 | GI_TYPE_DATA_FIELD_NUM, faa_nrecords, | 153 | GI_TYPE_DATA_FIELD_NUM, 0, |
134 | gi_size, gi_type_data_field_names, | 154 | gi_size, gi_type_data_field_names, |
135 | gi_offset, gi_field_type, | 155 | gi_offset, gi_field_type, |
136 | chunk_size, fill_data, compress, | 156 | chunk_size, fill_data, compress, |
@@ -140,17 +160,22 @@ assign_protein_type (hid_t file_id) | |||
140 | 160 | ||
141 | } | 161 | } |
142 | 162 | ||
163 | /* | ||
164 | * Copy the contents of the old table into a hash. | ||
165 | */ | ||
143 | struct hsearch_data htab; | 166 | struct hsearch_data htab; |
144 | bzero (&htab, sizeof (htab)); | 167 | bzero (&htab, sizeof (htab)); |
145 | hcreate_r (gi_nrecords * 2, &htab); | 168 | if (hcreate_r (gi_nrecords * 2, &htab) == 0) |
169 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, | ||
170 | "Allocation of cache failed."); | ||
146 | ENTRY e, *ep; | 171 | ENTRY e, *ep; |
147 | 172 | ||
148 | for (int i = 0; i < gi_nrecords; i++) | 173 | for (int i = 0; i < (int)gi_nrecords; i++) |
149 | { | 174 | { |
150 | char gi_chr[25]; | 175 | char gi_chr[25]; |
151 | snprintf (gi_chr, 25, "%i", gi_buf[i].gi); | 176 | snprintf (gi_chr, 25, "%i", old_buf[i].gi); |
152 | e.key = gi_chr; | 177 | e.key = strdup (gi_chr); |
153 | e.data = &gi_buf[i]; | 178 | e.data = &old_buf[i]; |
154 | if (hsearch_r (e, ENTER, &ep, &htab) == 0) | 179 | if (hsearch_r (e, ENTER, &ep, &htab) == 0) |
155 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, | 180 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, |
156 | "Allocation failed."); | 181 | "Allocation failed."); |
@@ -160,19 +185,23 @@ assign_protein_type (hid_t file_id) | |||
160 | * Assign protein types to records for which the field is empty. | 185 | * Assign protein types to records for which the field is empty. |
161 | */ | 186 | */ |
162 | printf ("Records to process: %i\n", (int)faa_nrecords); | 187 | printf ("Records to process: %i\n", (int)faa_nrecords); |
163 | bool updates_pending = false; | 188 | int written = 0; |
164 | for (int i = 0; i < faa_nrecords; i++) | 189 | for (int i = 0; i < (int)faa_nrecords; i++) |
165 | { | 190 | { |
191 | new_buf[i].gi = faa_buf[i].gi; | ||
192 | strncpy (new_buf[i].type, "", sizeof (new_buf[i].type)); | ||
193 | strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein)); | ||
166 | 194 | ||
167 | char gi_chr[25]; | 195 | char gi_chr[25]; |
168 | snprintf (gi_chr, 25, "%i", faa_buf[i].gi); | 196 | snprintf (gi_chr, 25, "%i", faa_buf[i].gi); |
169 | e.key = gi_chr; | 197 | e.key = gi_chr; |
198 | e.data = NULL; | ||
199 | |||
200 | /* | ||
201 | * A record was not found in the cache for this gi. | ||
202 | */ | ||
170 | if (hsearch_r (e, FIND, &ep, &htab) == 0) | 203 | if (hsearch_r (e, FIND, &ep, &htab) == 0) |
171 | { | 204 | { |
172 | |||
173 | gi_buf[i].gi = faa_buf[i].gi; | ||
174 | gi_buf[i].type[0] = '\0'; | ||
175 | gi_buf[i].protein[0] = '\0'; | ||
176 | 205 | ||
177 | /* | 206 | /* |
178 | * Read the sequence from the database by GI. | 207 | * Read the sequence from the database by GI. |
@@ -202,7 +231,7 @@ assign_protein_type (hid_t file_id) | |||
202 | */ | 231 | */ |
203 | if (error_returns != NULL) | 232 | if (error_returns != NULL) |
204 | { | 233 | { |
205 | char *msg = BlastErrorToString (error_returns); | 234 | CharPtr msg = BlastErrorToString (error_returns); |
206 | printf ("Warning: An error has been reported by the NCBI Toolkit " | 235 | printf ("Warning: An error has been reported by the NCBI Toolkit " |
207 | "API for sequence gi|%i: %s", | 236 | "API for sequence gi|%i: %s", |
208 | faa_buf[i].gi, msg); | 237 | faa_buf[i].gi, msg); |
@@ -221,14 +250,12 @@ assign_protein_type (hid_t file_id) | |||
221 | BUFFER_LEN); | 250 | BUFFER_LEN); |
222 | 251 | ||
223 | // Species Type | 252 | // Species Type |
224 | gi_buf[i].type[0] = target_id_buf[4]; | 253 | new_buf[i].type[0] = target_id_buf[4]; |
225 | gi_buf[i].type[1] = '\0'; | 254 | new_buf[i].type[1] = '\0'; |
226 | 255 | ||
227 | // Protein Type | 256 | // Protein Type |
228 | strncpy (gi_buf[i].protein, &target_id_buf[6], | 257 | strncpy (new_buf[i].protein, &target_id_buf[6], |
229 | sizeof (gi_buf[i].protein)); | 258 | sizeof (new_buf[i].protein)); |
230 | |||
231 | updates_pending = true; | ||
232 | } | 259 | } |
233 | 260 | ||
234 | /* | 261 | /* |
@@ -246,16 +273,27 @@ assign_protein_type (hid_t file_id) | |||
246 | seqalign = SeqAlignSetFree (seqalign); | 273 | seqalign = SeqAlignSetFree (seqalign); |
247 | bsp = BioseqFree (bsp); | 274 | bsp = BioseqFree (bsp); |
248 | 275 | ||
276 | } // End existing entry not found. | ||
277 | |||
278 | /* | ||
279 | * Hash table entry found. Keep the old value. | ||
280 | */ | ||
281 | else | ||
282 | { | ||
283 | gi_type_data* old_value = (gi_type_data*)ep->data; | ||
284 | new_buf[i].gi = old_value->gi; | ||
285 | strncpy (new_buf[i].type, old_value->type, sizeof (new_buf[i].type)); | ||
286 | strncpy (new_buf[i].protein, old_value->protein, sizeof (new_buf[i].protein)); | ||
249 | } | 287 | } |
250 | 288 | ||
251 | /* | 289 | /* |
252 | * Write the data out to the file. | 290 | * Write the data out to the file. |
253 | */ | 291 | */ |
254 | if ( (i % 1000 == 0) && (i > 0) && updates_pending) | 292 | if ( (i % 1000 == 0) && (i > 0) ) |
255 | { | 293 | { |
256 | status = H5TBwrite_records (file_id, "gi_type_data", i - 1000, 1000, | 294 | status = H5TBappend_records (file_id, "gi_type_data", 1000, |
257 | gi_size, gi_offset, gi_sizes, | 295 | gi_size, gi_offset, gi_sizes, |
258 | &gi_buf[i-1000]); | 296 | &new_buf[i-1000]); |
259 | if (status < 0) | 297 | if (status < 0) |
260 | check_h5_error (status, __FILE__, __LINE__); | 298 | check_h5_error (status, __FILE__, __LINE__); |
261 | 299 | ||
@@ -263,7 +301,7 @@ assign_protein_type (hid_t file_id) | |||
263 | if (status < 0) | 301 | if (status < 0) |
264 | check_h5_error (status, __FILE__, __LINE__); | 302 | check_h5_error (status, __FILE__, __LINE__); |
265 | 303 | ||
266 | updates_pending = false; | 304 | written = i; |
267 | 305 | ||
268 | printf ("Processed %i of %i records.\n", i, (int)faa_nrecords); | 306 | printf ("Processed %i of %i records.\n", i, (int)faa_nrecords); |
269 | } | 307 | } |
@@ -274,37 +312,34 @@ assign_protein_type (hid_t file_id) | |||
274 | * Write out records from the last bin if it was less than 1000 | 312 | * Write out records from the last bin if it was less than 1000 |
275 | * records in size. | 313 | * records in size. |
276 | */ | 314 | */ |
277 | if (updates_pending) | 315 | if ((int)faa_nrecords < 1000) |
278 | { | 316 | { |
279 | /* | 317 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords, |
280 | if ((int)faa_nrecords < 1000) | 318 | gi_size, gi_offset, gi_sizes, |
281 | { | 319 | new_buf); |
282 | status = H5TBwrite_records (file_id, "influenza.faa", 0, nrecords, | 320 | } |
283 | dst_size, dst_offset, dst_sizes, | 321 | |
284 | dst_buf); | 322 | else |
285 | } | 323 | { |
286 | else | 324 | status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written, |
287 | { | 325 | gi_size, gi_offset, gi_sizes, |
288 | status = H5TBwrite_records (file_id, "influenza.faa", nrecords - 1000, 1000, | 326 | &new_buf[written]); |
289 | dst_size, dst_offset, dst_sizes, | ||
290 | &dst_buf[nrecords-1000]); | ||
291 | } | ||
292 | if (status < 0) | ||
293 | check_h5_error (status, __FILE__, __LINE__); | ||
294 | |||
295 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
296 | if (status < 0) | ||
297 | check_h5_error (status, __FILE__, __LINE__); | ||
298 | |||
299 | updates_pending = false; | ||
300 | */ | ||
301 | } | 327 | } |
328 | |||
329 | if (status < 0) | ||
330 | check_h5_error (status, __FILE__, __LINE__); | ||
331 | |||
332 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
333 | if (status < 0) | ||
334 | check_h5_error (status, __FILE__, __LINE__); | ||
302 | 335 | ||
303 | free (faa_buf); | 336 | free (faa_buf); |
304 | free (gi_buf); | 337 | free (old_buf); |
338 | free (new_buf); | ||
305 | hdestroy_r (&htab); | 339 | hdestroy_r (&htab); |
306 | 340 | ||
307 | options = BLASTOptionDelete (options); | 341 | options = BLASTOptionDelete (options); |
342 | readdb_destruct (seqdb); | ||
308 | 343 | ||
309 | return; | 344 | return; |
310 | } | 345 | } |
diff --git a/src/load/load_influenza_aa_dat.c b/src/load/load_influenza_aa_dat.c index 8bf47aa..3826349 100644 --- a/src/load/load_influenza_aa_dat.c +++ b/src/load/load_influenza_aa_dat.c | |||
@@ -13,10 +13,9 @@ | |||
13 | #include <stdlib.h> | 13 | #include <stdlib.h> |
14 | 14 | ||
15 | #define NFIELDS (hsize_t) 11 | 15 | #define NFIELDS (hsize_t) 11 |
16 | #define TABLE_NAME "influenza_aa.dat" | ||
17 | 16 | ||
18 | void | 17 | void |
19 | load_influenza_aa_dat (hid_t file_id) | 18 | load_influenza_aa_dat (hid_t file_id, const char* file_name) |
20 | { | 19 | { |
21 | /* | 20 | /* |
22 | * Model the data using native types. | 21 | * Model the data using native types. |
@@ -145,8 +144,7 @@ load_influenza_aa_dat (hid_t file_id) | |||
145 | * Insert the records. | 144 | * Insert the records. |
146 | */ | 145 | */ |
147 | supplementary_data p_data; | 146 | supplementary_data p_data; |
148 | FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", | 147 | FILE *dat = fopen (file_name, "r"); |
149 | "r"); | ||
150 | if (dat == NULL) | 148 | if (dat == NULL) |
151 | check_error (__FILE__, __LINE__); | 149 | check_error (__FILE__, __LINE__); |
152 | char *line = NULL; | 150 | char *line = NULL; |
@@ -214,18 +212,49 @@ load_influenza_aa_dat (hid_t file_id) | |||
214 | 212 | ||
215 | if (current_line == 1) | 213 | if (current_line == 1) |
216 | { | 214 | { |
217 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, | 215 | |
218 | TABLE_NAME, NFIELDS, 1, dst_size, | 216 | /* |
219 | field_names, dst_offset, field_type, | 217 | * Dataset already exists. Purge it. |
220 | chunk_size, fill_data, compress, | 218 | */ |
221 | &p_data); | 219 | if (H5LTfind_dataset (file_id, "influenza_aa.dat") == 1) |
222 | if (status < 0) | 220 | { |
223 | check_h5_error (status, __FILE__, __LINE__); | 221 | hsize_t nfields = 0; |
222 | hsize_t nrecords = 0; | ||
223 | herr_t status = H5TBget_table_info (file_id, "influenza_aa.dat", | ||
224 | &nfields, &nrecords); | ||
225 | if (status < 0) | ||
226 | check_h5_error (status, __FILE__, __LINE__); | ||
227 | |||
228 | status = H5TBdelete_record (file_id, "influenza_aa.dat", 0, nrecords); | ||
229 | if (status < 0) | ||
230 | check_h5_error (status, __FILE__, __LINE__); | ||
231 | |||
232 | status = | ||
233 | H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size, | ||
234 | dst_offset, dst_sizes, &p_data); | ||
235 | if (status < 0) | ||
236 | check_h5_error (status, __FILE__, __LINE__); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Dataset does not exist. Create it. | ||
241 | */ | ||
242 | else | ||
243 | { | ||
244 | herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, | ||
245 | "influenza_aa.dat", NFIELDS, 1, dst_size, | ||
246 | field_names, dst_offset, field_type, | ||
247 | chunk_size, fill_data, compress, | ||
248 | &p_data); | ||
249 | if (status < 0) | ||
250 | check_h5_error (status, __FILE__, __LINE__); | ||
251 | } | ||
224 | } | 252 | } |
253 | |||
225 | else | 254 | else |
226 | { | 255 | { |
227 | herr_t status = | 256 | herr_t status = |
228 | H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, | 257 | H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size, |
229 | dst_offset, dst_sizes, &p_data); | 258 | dst_offset, dst_sizes, &p_data); |
230 | if (status < 0) | 259 | if (status < 0) |
231 | check_h5_error (status, __FILE__, __LINE__); | 260 | check_h5_error (status, __FILE__, __LINE__); |
diff --git a/src/load/load_influenza_aa_dat.h b/src/load/load_influenza_aa_dat.h index f6c60be..97e36f8 100644 --- a/src/load/load_influenza_aa_dat.h +++ b/src/load/load_influenza_aa_dat.h | |||
@@ -7,6 +7,6 @@ | |||
7 | * Load the supplementary protein data from the NCBI influenza_aa.dat | 7 | * Load the supplementary protein data from the NCBI influenza_aa.dat |
8 | * file. | 8 | * file. |
9 | */ | 9 | */ |
10 | void load_influenza_aa_dat (hid_t file_id); | 10 | void load_influenza_aa_dat (hid_t file_id, const char* file_name); |
11 | 11 | ||
12 | #endif // LOAD_INFLUENZA_AA_DAT_H | 12 | #endif // LOAD_INFLUENZA_AA_DAT_H |
diff --git a/src/load/load_influenza_faa.c b/src/load/load_influenza_faa.c index a217989..04bf05b 100644 --- a/src/load/load_influenza_faa.c +++ b/src/load/load_influenza_faa.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <stdlib.h> | 8 | #include <stdlib.h> |
9 | 9 | ||
10 | void | 10 | void |
11 | load_influenza_faa (hid_t file_id) | 11 | load_influenza_faa (hid_t file_id, const char* file_name) |
12 | { | 12 | { |
13 | size_t dst_size; | 13 | size_t dst_size; |
14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; | 14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; |
@@ -22,8 +22,7 @@ load_influenza_faa (hid_t file_id) | |||
22 | int compress = 0; | 22 | int compress = 0; |
23 | 23 | ||
24 | sequence_data p_data; | 24 | sequence_data p_data; |
25 | FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", | 25 | FILE *dat = fopen (file_name, "r"); |
26 | "r"); | ||
27 | if (dat == NULL) | 26 | if (dat == NULL) |
28 | check_error (__FILE__, __LINE__); | 27 | check_error (__FILE__, __LINE__); |
29 | char *line = NULL; | 28 | char *line = NULL; |
@@ -62,16 +61,46 @@ load_influenza_faa (hid_t file_id) | |||
62 | 61 | ||
63 | if (current_line == 1) | 62 | if (current_line == 1) |
64 | { | 63 | { |
65 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | 64 | /* |
66 | "influenza.faa", | 65 | * Dataset already exists. Purge it. |
67 | SEQUENCE_DATA_FIELD_NUM, 1, | 66 | */ |
68 | dst_size, sequence_data_field_names, | 67 | if (H5LTfind_dataset (file_id, "influenza.faa") == 1) |
69 | dst_offset, field_type, | 68 | { |
70 | chunk_size, fill_data, compress, | 69 | hsize_t nfields = 0; |
71 | &p_data); | 70 | hsize_t nrecords = 0; |
72 | if (status < 0) | 71 | herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields, |
73 | check_h5_error (status, __FILE__, __LINE__); | 72 | &nrecords); |
73 | if (status < 0) | ||
74 | check_h5_error (status, __FILE__, __LINE__); | ||
75 | |||
76 | status = H5TBdelete_record (file_id, "influenza.faa", 0, nrecords); | ||
77 | if (status < 0) | ||
78 | check_h5_error (status, __FILE__, __LINE__); | ||
79 | |||
80 | status = | ||
81 | H5TBappend_records (file_id, "influenza.faa", 1, dst_size, | ||
82 | dst_offset, dst_sizes, &p_data); | ||
83 | if (status < 0) | ||
84 | check_h5_error (status, __FILE__, __LINE__); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Dataset does not exist. Create it. | ||
89 | */ | ||
90 | else | ||
91 | { | ||
92 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | ||
93 | "influenza.faa", | ||
94 | SEQUENCE_DATA_FIELD_NUM, 1, | ||
95 | dst_size, sequence_data_field_names, | ||
96 | dst_offset, field_type, | ||
97 | chunk_size, fill_data, compress, | ||
98 | &p_data); | ||
99 | if (status < 0) | ||
100 | check_h5_error (status, __FILE__, __LINE__); | ||
101 | } | ||
74 | } | 102 | } |
103 | |||
75 | else | 104 | else |
76 | { | 105 | { |
77 | herr_t status = | 106 | herr_t status = |
diff --git a/src/load/load_influenza_faa.h b/src/load/load_influenza_faa.h index 569c411..1ad5797 100644 --- a/src/load/load_influenza_faa.h +++ b/src/load/load_influenza_faa.h | |||
@@ -6,6 +6,6 @@ | |||
6 | /* | 6 | /* |
7 | * Load the protein sequence data from the NCBI influenza.faa file. | 7 | * Load the protein sequence data from the NCBI influenza.faa file. |
8 | */ | 8 | */ |
9 | void load_influenza_faa (hid_t file_id); | 9 | void load_influenza_faa (hid_t file_id, const char* file_name); |
10 | 10 | ||
11 | #endif // LOAD_INFLUENZA_FAA_H | 11 | #endif // LOAD_INFLUENZA_FAA_H |
diff --git a/src/updator.c b/src/updator.c index 591d2f6..9a5ad18 100644 --- a/src/updator.c +++ b/src/updator.c | |||
@@ -4,6 +4,8 @@ | |||
4 | 4 | ||
5 | #include "assign/assign_protein_type.h" | 5 | #include "assign/assign_protein_type.h" |
6 | #include "error/check_h5_error.h" | 6 | #include "error/check_h5_error.h" |
7 | #include <stdio.h> | ||
8 | #include <signal.h> | ||
7 | 9 | ||
8 | #define FILE "influenza.h5" | 10 | #define FILE "influenza.h5" |
9 | 11 | ||