-rw-r--r-- | src/aggregator.c | 2 | ||||
-rw-r--r-- | src/assign_protein_type.c | 7 | ||||
-rw-r--r-- | src/assign_protein_type.h | 3 | ||||
-rw-r--r-- | src/check_error.c | 2 | ||||
-rw-r--r-- | src/check_error.h | 3 | ||||
-rw-r--r-- | src/check_h5_error.c | 2 | ||||
-rw-r--r-- | src/check_h5_error.h | 2 | ||||
-rw-r--r-- | src/check_ncbi_error.c | 3 | ||||
-rw-r--r-- | src/check_ncbi_error.h | 2 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 259 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.h | 3 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 2 | ||||
-rw-r--r-- | src/load_influenza_faa.h | 3 |
13 files changed, 145 insertions, 148 deletions
diff --git a/src/aggregator.c b/src/aggregator.c index 20da6df..5fb9d4a 100644 --- a/src/aggregator.c +++ b/src/aggregator.c @@ -11,7 +11,7 @@ #define FILE "influenza.h5" int -main() +main () { /* * Create the HDF5 file. diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c index 643ea3f..54db84e 100644 --- a/src/assign_protein_type.c +++ b/src/assign_protein_type.c @@ -48,8 +48,8 @@ assign_protein_type (hid_t file_id) * Read the sequence from the database by GI. */ Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL); - BioseqPtr bsp = readdb_get_bioseq(seqdb, sequence_number); - + BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); + SeqAlignPtr seqalign = BioseqBlastEngine (bsp, "blastp", REFDB, @@ -66,8 +66,7 @@ assign_protein_type (hid_t file_id) Char target_id_buf[BUFFER_LEN + 1]; SeqIdPtr target_id = SeqAlignId (seqalign, 1); SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); - printf ("%s\n", - target_id_buf); + printf ("%s\n", target_id_buf); } // Clean up memory for the next ieration. diff --git a/src/assign_protein_type.h b/src/assign_protein_type.h index 1dfb8e6..221154f 100644 --- a/src/assign_protein_type.h +++ b/src/assign_protein_type.h @@ -10,7 +10,6 @@ * sequence is BLASTed against this database. The first hit is used * to assign a protein type to sequence. */ -void -assign_protein_type (hid_t file_id); +void assign_protein_type (hid_t file_id); #endif // ASSIGN_PROTEIN_TYPE_H diff --git a/src/check_error.c b/src/check_error.c index 70c62c4..4630b50 100644 --- a/src/check_error.c +++ b/src/check_error.c @@ -7,7 +7,7 @@ void check_error (const char *filename, const unsigned int linenum) { if (errno) - error_at_line (EXIT_FAILURE, errno, filename, linenum, + error_at_line (EXIT_FAILURE, errno, filename, linenum, "An error has been detected within the application."); return; diff --git a/src/check_error.h b/src/check_error.h index 33acc63..2250c59 100644 --- a/src/check_error.h +++ b/src/check_error.h @@ -5,7 +5,6 @@ * Check the error state. Reports and error message and exits if an * error has occured. */ -void -check_error (const char *filename, unsigned int linenum); +void check_error (const char *filename, unsigned int linenum); #endif // CHECK_ERROR_H diff --git a/src/check_h5_error.c b/src/check_h5_error.c index 30fc87c..d90b21f 100644 --- a/src/check_h5_error.c +++ b/src/check_h5_error.c @@ -3,7 +3,7 @@ #include <stdlib.h> void -check_h5_error (herr_t status, const char* filename, unsigned int linenum) +check_h5_error (herr_t status, const char *filename, unsigned int linenum) { error_at_line (EXIT_FAILURE, 0, filename, linenum, "An error has been reported by the HDF5 API."); diff --git a/src/check_h5_error.h b/src/check_h5_error.h index 74730cd..e460e97 100644 --- a/src/check_h5_error.h +++ b/src/check_h5_error.h @@ -7,6 +7,6 @@ * Handle errors from the HDF5 API. */ void -check_h5_error (herr_t status, const char* filename, unsigned int linenum); +check_h5_error (herr_t status, const char *filename, unsigned int linenum); #endif // CHECK_H5_ERROR_H diff --git a/src/check_ncbi_error.c b/src/check_ncbi_error.c index 3caa7a9..8e1c3b2 100644 --- a/src/check_ncbi_error.c +++ b/src/check_ncbi_error.c @@ -2,8 +2,7 @@ void check_ncbi_error (ValNodePtr error_returns, - const char* filename, - unsigned int linenum) + const char *filename, unsigned int linenum) { error_at_line (EXIT_FAILURE, 0, filename, linenum, "An error has been reported by the NCBI Toolkit API: %s", diff --git a/src/check_ncbi_error.h b/src/check_ncbi_error.h index c27c56d..45ac0ca 100644 --- a/src/check_ncbi_error.h +++ b/src/check_ncbi_error.h @@ -8,6 +8,6 @@ */ void check_ncbi_error (ValNodePtr error_returns, - const char* filename, unsigned int linenum); + const char *filename, unsigned int linenum); #endif // CHECK_NCBI_ERROR_H diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c index 91ef415..9ee3c46 100644 --- a/src/load_influenza_aa_dat.c +++ b/src/load_influenza_aa_dat.c @@ -46,60 +46,63 @@ load_influenza_aa_dat (hid_t file_id) * memory." */ size_t dst_size = sizeof (supplementary_data); - size_t dst_offset[NFIELDS] = { HOFFSET ( supplementary_data, genbank_accession_number ), - HOFFSET ( supplementary_data, host ), - HOFFSET ( supplementary_data, genome_segment_number ), - HOFFSET ( supplementary_data, subtype ), - HOFFSET ( supplementary_data, country ), - HOFFSET ( supplementary_data, year ), - HOFFSET ( supplementary_data, sequence_length ), - HOFFSET ( supplementary_data, virus_name ), - HOFFSET ( supplementary_data, age ), - HOFFSET ( supplementary_data, gender ), - HOFFSET ( supplementary_data, full_length_indicator )}; + size_t dst_offset[NFIELDS] = + { HOFFSET (supplementary_data, genbank_accession_number), + HOFFSET (supplementary_data, host), + HOFFSET (supplementary_data, genome_segment_number), + HOFFSET (supplementary_data, subtype), + HOFFSET (supplementary_data, country), + HOFFSET (supplementary_data, year), + HOFFSET (supplementary_data, sequence_length), + HOFFSET (supplementary_data, virus_name), + HOFFSET (supplementary_data, age), + HOFFSET (supplementary_data, gender), + HOFFSET (supplementary_data, full_length_indicator) + }; supplementary_data dst_buf[1]; - size_t dst_sizes[NFIELDS] = { sizeof ( dst_buf[0].genbank_accession_number ), - sizeof ( dst_buf[0].host ), - sizeof ( dst_buf[0].genome_segment_number ), - sizeof ( dst_buf[0].subtype ), - sizeof ( dst_buf[0].country ), - sizeof ( dst_buf[0].year ), - sizeof ( dst_buf[0].sequence_length ), - sizeof ( dst_buf[0].virus_name ), - sizeof ( dst_buf[0].age ), - sizeof ( dst_buf[0].gender ), - sizeof ( dst_buf[0].full_length_indicator)}; + size_t dst_sizes[NFIELDS] = { sizeof (dst_buf[0].genbank_accession_number), + sizeof (dst_buf[0].host), + sizeof (dst_buf[0].genome_segment_number), + sizeof (dst_buf[0].subtype), + sizeof (dst_buf[0].country), + sizeof (dst_buf[0].year), + sizeof (dst_buf[0].sequence_length), + sizeof (dst_buf[0].virus_name), + sizeof (dst_buf[0].age), + sizeof (dst_buf[0].gender), + sizeof (dst_buf[0].full_length_indicator) + }; /* * Map the native types to HDF5 types for each field. */ hid_t field_type[NFIELDS]; - hid_t genbank_accession_number_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size ( genbank_accession_number_type, 9 ); + hid_t genbank_accession_number_type = H5Tcopy (H5T_C_S1); + H5Tset_size (genbank_accession_number_type, 9); field_type[0] = genbank_accession_number_type; - hid_t host_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size ( host_type, 15 ); + hid_t host_type = H5Tcopy (H5T_C_S1); + H5Tset_size (host_type, 15); field_type[1] = host_type; - + field_type[2] = H5T_NATIVE_INT; - hid_t subtype_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size (subtype_type, 7 ); + hid_t subtype_type = H5Tcopy (H5T_C_S1); + H5Tset_size (subtype_type, 7); field_type[3] = subtype_type; - hid_t country_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size (country_type, 25 ); + hid_t country_type = H5Tcopy (H5T_C_S1); + H5Tset_size (country_type, 25); field_type[4] = country_type; field_type[5] = H5T_NATIVE_INT; field_type[6] = H5T_NATIVE_INT; - hid_t virus_name_type = H5Tcopy ( H5T_C_S1 ); + hid_t virus_name_type = H5Tcopy (H5T_C_S1); H5Tset_size (virus_name_type, 196); field_type[7] = virus_name_type; @@ -118,18 +121,18 @@ load_influenza_aa_dat (hid_t file_id) /* * Labels used for the fields in the table. */ - const char *field_names[NFIELDS] = - { "GenBank accession number", - "Host", - "Genome segment number", - "Subtype", - "Country", - "Year", - "Sequence length", - "Virus name", - "Age", - "Gender", - "Full-length Indicator" }; + const char *field_names[NFIELDS] = { "GenBank accession number", + "Host", + "Genome segment number", + "Subtype", + "Country", + "Year", + "Sequence length", + "Virus name", + "Age", + "Gender", + "Full-length Indicator" + }; /* * Table storage options. @@ -142,7 +145,7 @@ load_influenza_aa_dat (hid_t file_id) * Insert the records. */ supplementary_data p_data; - FILE* dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", + FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", "r"); if (dat == NULL) check_error (__FILE__, __LINE__); @@ -150,89 +153,89 @@ load_influenza_aa_dat (hid_t file_id) size_t len = 0; int current_line = 0; - while (getline (&line, &len, dat) != -1) { - - current_line++; - char *running = strdup (line); - char *token; - - /* - * Parse the line, handling the case of empty fields represented - * by sequential delimiters. - */ - strncpy(p_data.genbank_accession_number, strsep (&running, "\t"), - sizeof(p_data.genbank_accession_number)); - - strncpy(p_data.host, strsep (&running, "\t"), - sizeof(p_data.host)); - - token = strsep (&running, "\t"); - if (strcmp (token, "\0") == 0) - p_data.genome_segment_number = 0; - else - p_data.genome_segment_number = atoi(token); - - strncpy(p_data.subtype, strsep (&running, "\t"), - sizeof(p_data.subtype)); - - strncpy(p_data.country, strsep (&running, "\t"), - sizeof(p_data.country)); - - /* - * Convert the year field from text to numeric. Unknown and empty - * values are assigned a numeric value of zero. - */ - token = strsep (&running, "\t"); - if (strcmp (token, "\0") == 0) - p_data.year = 0; - else if (strcmp (token, "unknown") == 0) - p_data.year = 0; - else if (strcmp (token, "NON") == 0) - p_data.year = 0; - else - p_data.year = atoi(token); - - token = strsep (&running, "\t"); - if (strcmp (token, "\0") == 0) - p_data.sequence_length = 0; - else - p_data.sequence_length = atoi(token); - - strncpy(p_data.virus_name, strsep (&running, "\t"), - sizeof(p_data.virus_name)); - - strncpy(p_data.age, strsep (&running, "\t"), - sizeof(p_data.age)); - - strncpy(p_data.gender, strsep (&running, "\t"), - sizeof(p_data.gender)); - - strncpy(p_data.full_length_indicator, strsep (&running, "\t"), - sizeof(p_data.full_length_indicator)); - - if (current_line == 1) - { - herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, - TABLE_NAME, NFIELDS, 1, dst_size, - field_names, dst_offset, field_type, - chunk_size, fill_data, compress, - &p_data); - if (status < 0) - check_h5_error (status, __FILE__, __LINE__); - } - else - { - herr_t status = H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, - dst_offset, dst_sizes, &p_data); - if (status < 0) - check_h5_error (status, __FILE__, __LINE__); - } - - if (running) - free (running); - - } - + while (getline (&line, &len, dat) != -1) + { + + current_line++; + char *running = strdup (line); + char *token; + + /* + * Parse the line, handling the case of empty fields represented + * by sequential delimiters. + */ + strncpy (p_data.genbank_accession_number, strsep (&running, "\t"), + sizeof (p_data.genbank_accession_number)); + + strncpy (p_data.host, strsep (&running, "\t"), sizeof (p_data.host)); + + token = strsep (&running, "\t"); + if (strcmp (token, "\0") == 0) + p_data.genome_segment_number = 0; + else + p_data.genome_segment_number = atoi (token); + + strncpy (p_data.subtype, strsep (&running, "\t"), + sizeof (p_data.subtype)); + + strncpy (p_data.country, strsep (&running, "\t"), + sizeof (p_data.country)); + + /* + * Convert the year field from text to numeric. Unknown and empty + * values are assigned a numeric value of zero. + */ + token = strsep (&running, "\t"); + if (strcmp (token, "\0") == 0) + p_data.year = 0; + else if (strcmp (token, "unknown") == 0) + p_data.year = 0; + else if (strcmp (token, "NON") == 0) + p_data.year = 0; + else + p_data.year = atoi (token); + + token = strsep (&running, "\t"); + if (strcmp (token, "\0") == 0) + p_data.sequence_length = 0; + else + p_data.sequence_length = atoi (token); + + strncpy (p_data.virus_name, strsep (&running, "\t"), + sizeof (p_data.virus_name)); + + strncpy (p_data.age, strsep (&running, "\t"), sizeof (p_data.age)); + + strncpy (p_data.gender, strsep (&running, "\t"), + sizeof (p_data.gender)); + + strncpy (p_data.full_length_indicator, strsep (&running, "\t"), + sizeof (p_data.full_length_indicator)); + + if (current_line == 1) + { + herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, + TABLE_NAME, NFIELDS, 1, dst_size, + field_names, dst_offset, field_type, + chunk_size, fill_data, compress, + &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + else + { + herr_t status = + H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, + dst_offset, dst_sizes, &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + + if (running) + free (running); + + } + if (line) free (line); diff --git a/src/load_influenza_aa_dat.h b/src/load_influenza_aa_dat.h index c431e67..f6c60be 100644 --- a/src/load_influenza_aa_dat.h +++ b/src/load_influenza_aa_dat.h @@ -7,7 +7,6 @@ * Load the supplementary protein data from the NCBI influenza_aa.dat * file. */ -void -load_influenza_aa_dat (hid_t file_id); +void load_influenza_aa_dat (hid_t file_id); #endif // LOAD_INFLUENZA_AA_DAT_H diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 8c43992..8fd0cd7 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c @@ -3,7 +3,7 @@ void load_influenza_faa (hid_t file_id) { - + return; } diff --git a/src/load_influenza_faa.h b/src/load_influenza_faa.h index 4b1dbdd..569c411 100644 --- a/src/load_influenza_faa.h +++ b/src/load_influenza_faa.h @@ -6,7 +6,6 @@ /* * Load the protein sequence data from the NCBI influenza.faa file. */ -void -load_influenza_faa (hid_t file_id); +void load_influenza_faa (hid_t file_id); #endif // LOAD_INFLUENZA_FAA_H |