summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-16 19:20:46 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-16 19:20:46 (GMT)
commit1501b0b828fab86848cbfc242925964685da3e83 (patch) (unidiff)
treeca58264c82f68fba0ec60e8a429ac5a840a46a9a
parent7f8fe8f1c76afd369ef34955453cca386a1dd792 (diff)
downloadexp007-1501b0b828fab86848cbfc242925964685da3e83.zip
exp007-1501b0b828fab86848cbfc242925964685da3e83.tar.gz
exp007-1501b0b828fab86848cbfc242925964685da3e83.tar.bz2
Modified to convert the year field from text to numeric during the load.
-rw-r--r--src/load_influenza_aa_dat.c26
1 files changed, 17 insertions, 9 deletions
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index 5af8a72..493c7db 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -11,7 +11,7 @@
11#include <stdlib.h>11#include <stdlib.h>
1212
13#define NFIELDS (hsize_t) 1113#define NFIELDS (hsize_t) 11
14#define TABLE_NAME "Protein Sequences"14#define TABLE_NAME "influenza_aa.dat"
1515
16void16void
17load_influenza_aa_dat (hid_t file_id)17load_influenza_aa_dat (hid_t file_id)
@@ -26,7 +26,7 @@ load_influenza_aa_dat (hid_t file_id)
26 int genome_segment_number;26 int genome_segment_number;
27 char subtype[7];27 char subtype[7];
28 char country[25];28 char country[25];
29 char year[8];29 int year;
30 int sequence_length;30 int sequence_length;
31 char virus_name[196];31 char virus_name[196];
32 char age[17];32 char age[17];
@@ -93,9 +93,7 @@ load_influenza_aa_dat (hid_t file_id)
93 H5Tset_size (country_type, 25 );93 H5Tset_size (country_type, 25 );
94 field_type[4] = country_type;94 field_type[4] = country_type;
9595
96 hid_t year_type = H5Tcopy ( H5T_C_S1 );96 field_type[5] = H5T_NATIVE_INT;
97 H5Tset_size (year_type, 8);
98 field_type[5] = year_type;
9997
100 field_type[6] = H5T_NATIVE_INT;98 field_type[6] = H5T_NATIVE_INT;
10199
@@ -175,8 +173,19 @@ load_influenza_aa_dat (hid_t file_id)
175 strncpy(p_data.country, strsep (&running, "\t"),173 strncpy(p_data.country, strsep (&running, "\t"),
176 sizeof(p_data.country));174 sizeof(p_data.country));
177 175
178 strncpy (p_data.year, strsep (&running, "\t"),176 /*
179 sizeof(p_data.year));177 * Convert the year field from text to numeric. Unknown and empty
178 * values are assigned a numeric value of zero.
179 */
180 token = strsep (&running, "\t");
181 if (strcmp (token, "\0") == 0)
182 p_data.year = 0;
183 else if (strcmp (token, "unknown") == 0)
184 p_data.year = 0;
185 else if (strcmp (token, "NON") == 0)
186 p_data.year = 0;
187 else
188 p_data.year = atoi(token);
180189
181 token = strsep (&running, "\t");190 token = strsep (&running, "\t");
182 if (strcmp (token, "\0") == 0)191 if (strcmp (token, "\0") == 0)
@@ -197,7 +206,7 @@ load_influenza_aa_dat (hid_t file_id)
197 sizeof(p_data.full_length_indicator));206 sizeof(p_data.full_length_indicator));
198207
199 if (current_line == 1) 208 if (current_line == 1)
200 H5TBmake_table ("Protein Sequences", file_id, TABLE_NAME,NFIELDS,1,209 H5TBmake_table ("influenza_aa.dat", file_id, TABLE_NAME,NFIELDS,1,
201 dst_size,field_names, dst_offset, field_type,210 dst_size,field_names, dst_offset, field_type,
202 chunk_size, fill_data, compress, &p_data);211 chunk_size, fill_data, compress, &p_data);
203 else 212 else
@@ -218,7 +227,6 @@ load_influenza_aa_dat (hid_t file_id)
218 H5Tclose (host_type);227 H5Tclose (host_type);
219 H5Tclose (subtype_type);228 H5Tclose (subtype_type);
220 H5Tclose (country_type);229 H5Tclose (country_type);
221 H5Tclose (year_type);
222 H5Tclose (virus_name_type);230 H5Tclose (virus_name_type);
223 H5Tclose (age_type);231 H5Tclose (age_type);
224 H5Tclose (gender_type);232 H5Tclose (gender_type);

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.