summaryrefslogtreecommitdiffstats
Side-by-side diff
-rw-r--r--src/aggregator.c3
-rw-r--r--src/assign/assign_protein_type.c110
-rw-r--r--src/load/load_influenza_aa_dat.c13
-rw-r--r--src/load/load_influenza_aa_dat.h2
-rw-r--r--src/load/load_influenza_faa.c30
-rw-r--r--src/load/load_influenza_faa.h2
-rw-r--r--src/model/gi_type_data_init.c4
-rw-r--r--src/model/gi_type_data_init.h4
-rw-r--r--src/model/sequence_data_init.c18
-rw-r--r--src/model/sequence_data_init.h6
10 files changed, 99 insertions, 93 deletions
diff --git a/src/assign/assign_protein_type.c b/src/assign/assign_protein_type.c
index 9a0717b..1df4c8d 100644
--- a/src/assign/assign_protein_type.c
+++ b/src/assign/assign_protein_type.c
@@ -70,7 +70,7 @@ assign_protein_type (hid_t file_id)
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
- sequence_data* faa_buf = malloc (sizeof(sequence_data) * faa_nrecords);
+ sequence_data *faa_buf = malloc (sizeof (sequence_data) * faa_nrecords);
size_t faa_size;
size_t faa_offset[SEQUENCE_DATA_FIELD_NUM];
@@ -86,7 +86,7 @@ assign_protein_type (hid_t file_id)
/*
* Allocate memory for the new table.
*/
- gi_type_data* new_buf = malloc (sizeof (gi_type_data) * faa_nrecords);
+ gi_type_data *new_buf = malloc (sizeof (gi_type_data) * faa_nrecords);
if (new_buf == NULL)
check_error (__FILE__, __LINE__);
@@ -101,7 +101,7 @@ assign_protein_type (hid_t file_id)
hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM];
gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type);
- gi_type_data* old_buf = NULL;
+ gi_type_data *old_buf = NULL;
/*
* If the table is already present read the values into memory and
@@ -117,10 +117,11 @@ assign_protein_type (hid_t file_id)
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
- printf (" Using gi_type_data cache of %i records.\n", (int)gi_nrecords);
-
- old_buf = malloc (sizeof(gi_type_data) * gi_nrecords);
-
+ printf (" Using gi_type_data cache of %i records.\n",
+ (int) gi_nrecords);
+
+ old_buf = malloc (sizeof (gi_type_data) * gi_nrecords);
+
status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset,
gi_sizes, old_buf);
if (status < 0)
@@ -129,18 +130,18 @@ assign_protein_type (hid_t file_id)
status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
}
/*
* If the table is not already present create it.
*/
else
- {
+ {
printf ("Creating gi_type_data.\n");
- const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] =
+ const char *gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] =
GI_TYPE_DATA_FIELD_NAMES;
hsize_t chunk_size = 10;
@@ -152,8 +153,7 @@ assign_protein_type (hid_t file_id)
GI_TYPE_DATA_FIELD_NUM, 0,
gi_size, gi_type_data_field_names,
gi_offset, gi_field_type,
- chunk_size, fill_data, compress,
- NULL);
+ chunk_size, fill_data, compress, NULL);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
@@ -169,7 +169,7 @@ assign_protein_type (hid_t file_id)
"Allocation of cache failed.");
ENTRY e, *ep;
- for (int i = 0; i < (int)gi_nrecords; i++)
+ for (int i = 0; i < (int) gi_nrecords; i++)
{
char gi_chr[25];
snprintf (gi_chr, 25, "%i", old_buf[i].gi);
@@ -183,14 +183,14 @@ assign_protein_type (hid_t file_id)
/*
* Assign protein types to records for which the field is empty.
*/
- printf ("Records to process: %i\n", (int)faa_nrecords);
+ printf ("Records to process: %i\n", (int) faa_nrecords);
int written = 0;
- for (int i = 0; i < (int)faa_nrecords; i++)
+ for (int i = 0; i < (int) faa_nrecords; i++)
{
new_buf[i].gi = faa_buf[i].gi;
strncpy (new_buf[i].type, "", sizeof (new_buf[i].type));
strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein));
-
+
char gi_chr[25];
snprintf (gi_chr, 25, "%i", faa_buf[i].gi);
e.key = gi_chr;
@@ -199,24 +199,23 @@ assign_protein_type (hid_t file_id)
/*
* A record was not found in the cache for this gi.
*/
- if (hsearch_r (e, FIND, &ep, &htab) == 0)
+ if (hsearch_r (e, FIND, &ep, &htab) == 0)
{
-
+
/*
* Read the sequence from the database by GI.
*/
Int4 sequence_number = readdb_gi2seq (seqdb, faa_buf[i].gi, NULL);
BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number);
- if (bsp == NULL)
+ if (bsp == NULL)
{
error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__,
"Unable to find BLAST record for gi|%i. Ensure "
"the BLAST database is up-to-date with the HDF5 "
"record set. See the BLAST formatdb.log file "
- "for details.\n",
- faa_buf[i].gi);
+ "for details.\n", faa_buf[i].gi);
}
-
+
SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
"blastp",
REFDB,
@@ -224,19 +223,19 @@ assign_protein_type (hid_t file_id)
NULL,
&error_returns,
NULL);
-
+
/*
* BLAST reported an error. Write it out and continue processing.
*/
if (error_returns != NULL)
{
CharPtr msg = BlastErrorToString (error_returns);
- printf ("Warning: An error has been reported by the NCBI Toolkit "
- "API for sequence gi|%i: %s",
- faa_buf[i].gi, msg);
- free (msg);
+ printf
+ ("Warning: An error has been reported by the NCBI Toolkit "
+ "API for sequence gi|%i: %s", faa_buf[i].gi, msg);
+ free (msg);
}
-
+
/*
* A hit was found. Record the first hit as the protein type.
* Skip the first 4 characters and eat the "lcl|".
@@ -245,18 +244,18 @@ assign_protein_type (hid_t file_id)
{
Char target_id_buf[BUFFER_LEN + 1];
SeqIdPtr target_id = SeqAlignId (seqalign, 1);
- SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT,
+ SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT,
BUFFER_LEN);
// Species Type
new_buf[i].type[0] = target_id_buf[4];
new_buf[i].type[1] = '\0';
-
+
// Protein Type (Skip the underscore in the string).
- strncpy (new_buf[i].protein, &target_id_buf[6],
- sizeof (new_buf[i].protein));
+ strncpy (new_buf[i].protein, &target_id_buf[6],
+ sizeof (new_buf[i].protein));
}
-
+
/*
* BLAST did not find any hits.
*/
@@ -265,73 +264,74 @@ assign_protein_type (hid_t file_id)
printf ("Warning: Unable to identify protein type for sequence "
"gi|%i\n", faa_buf[i].gi);
}
-
+
/*
* Clean up memory for the next ieration.
*/
seqalign = SeqAlignSetFree (seqalign);
bsp = BioseqFree (bsp);
-
- } // End existing entry not found.
+
+ } // End existing entry not found.
/*
* Hash table entry found. Keep the old value.
*/
else
{
- gi_type_data* old_value = (gi_type_data*)ep->data;
+ gi_type_data *old_value = (gi_type_data *) ep->data;
new_buf[i].gi = old_value->gi;
- strncpy (new_buf[i].type, old_value->type, sizeof (new_buf[i].type));
- strncpy (new_buf[i].protein, old_value->protein, sizeof (new_buf[i].protein));
+ strncpy (new_buf[i].type, old_value->type,
+ sizeof (new_buf[i].type));
+ strncpy (new_buf[i].protein, old_value->protein,
+ sizeof (new_buf[i].protein));
}
-
+
/*
* Write the data out to the file.
*/
- if ( (i % 1000 == 0) && (i > 0) )
+ if ((i % 1000 == 0) && (i > 0))
{
status = H5TBappend_records (file_id, "gi_type_data", 1000,
- gi_size, gi_offset, gi_sizes,
- &new_buf[i-1000]);
+ gi_size, gi_offset, gi_sizes,
+ &new_buf[i - 1000]);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
written = i;
- printf ("Processed %i of %i records.\n", i, (int)faa_nrecords);
+ printf ("Processed %i of %i records.\n", i, (int) faa_nrecords);
}
-
+
}
/*
* Write out records from the last bin if it was less than 1000
* records in size.
*/
- if ((int)faa_nrecords < 1000)
+ if ((int) faa_nrecords < 1000)
{
status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords,
- gi_size, gi_offset, gi_sizes,
- new_buf);
+ gi_size, gi_offset, gi_sizes, new_buf);
}
else
{
- status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written,
- gi_size, gi_offset, gi_sizes,
- &new_buf[written]);
+ status =
+ H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written,
+ gi_size, gi_offset, gi_sizes, &new_buf[written]);
}
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
free (faa_buf);
free (old_buf);
free (new_buf);
@@ -339,6 +339,6 @@ assign_protein_type (hid_t file_id)
options = BLASTOptionDelete (options);
readdb_destruct (seqdb);
-
+
return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.