summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-16 19:20:46 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-16 19:20:46 (GMT)
commit1501b0b828fab86848cbfc242925964685da3e83 (patch) (side-by-side diff)
treeca58264c82f68fba0ec60e8a429ac5a840a46a9a
parent7f8fe8f1c76afd369ef34955453cca386a1dd792 (diff)
downloadexp007-1501b0b828fab86848cbfc242925964685da3e83.zip
exp007-1501b0b828fab86848cbfc242925964685da3e83.tar.gz
exp007-1501b0b828fab86848cbfc242925964685da3e83.tar.bz2
Modified to convert the year field from text to numeric during the load.
-rw-r--r--src/load_influenza_aa_dat.c26
1 files changed, 17 insertions, 9 deletions
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index 5af8a72..493c7db 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -11,7 +11,7 @@
#include <stdlib.h>
#define NFIELDS (hsize_t) 11
-#define TABLE_NAME "Protein Sequences"
+#define TABLE_NAME "influenza_aa.dat"
void
load_influenza_aa_dat (hid_t file_id)
@@ -26,7 +26,7 @@ load_influenza_aa_dat (hid_t file_id)
int genome_segment_number;
char subtype[7];
char country[25];
- char year[8];
+ int year;
int sequence_length;
char virus_name[196];
char age[17];
@@ -93,9 +93,7 @@ load_influenza_aa_dat (hid_t file_id)
H5Tset_size (country_type, 25 );
field_type[4] = country_type;
- hid_t year_type = H5Tcopy ( H5T_C_S1 );
- H5Tset_size (year_type, 8);
- field_type[5] = year_type;
+ field_type[5] = H5T_NATIVE_INT;
field_type[6] = H5T_NATIVE_INT;
@@ -175,8 +173,19 @@ load_influenza_aa_dat (hid_t file_id)
strncpy(p_data.country, strsep (&running, "\t"),
sizeof(p_data.country));
- strncpy (p_data.year, strsep (&running, "\t"),
- sizeof(p_data.year));
+ /*
+ * Convert the year field from text to numeric. Unknown and empty
+ * values are assigned a numeric value of zero.
+ */
+ token = strsep (&running, "\t");
+ if (strcmp (token, "\0") == 0)
+ p_data.year = 0;
+ else if (strcmp (token, "unknown") == 0)
+ p_data.year = 0;
+ else if (strcmp (token, "NON") == 0)
+ p_data.year = 0;
+ else
+ p_data.year = atoi(token);
token = strsep (&running, "\t");
if (strcmp (token, "\0") == 0)
@@ -197,7 +206,7 @@ load_influenza_aa_dat (hid_t file_id)
sizeof(p_data.full_length_indicator));
if (current_line == 1)
- H5TBmake_table ("Protein Sequences", file_id, TABLE_NAME,NFIELDS,1,
+ H5TBmake_table ("influenza_aa.dat", file_id, TABLE_NAME,NFIELDS,1,
dst_size,field_names, dst_offset, field_type,
chunk_size, fill_data, compress, &p_data);
else
@@ -218,7 +227,6 @@ load_influenza_aa_dat (hid_t file_id)
H5Tclose (host_type);
H5Tclose (subtype_type);
H5Tclose (country_type);
- H5Tclose (year_type);
H5Tclose (virus_name_type);
H5Tclose (age_type);
H5Tclose (gender_type);

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.