summaryrefslogtreecommitdiffstats
Side-by-side diff
-rw-r--r--src/aggregator.c3
-rw-r--r--src/assign/assign_protein_type.c110
-rw-r--r--src/load/load_influenza_aa_dat.c13
-rw-r--r--src/load/load_influenza_aa_dat.h2
-rw-r--r--src/load/load_influenza_faa.c30
-rw-r--r--src/load/load_influenza_faa.h2
-rw-r--r--src/model/gi_type_data_init.c4
-rw-r--r--src/model/gi_type_data_init.h4
-rw-r--r--src/model/sequence_data_init.c18
-rw-r--r--src/model/sequence_data_init.h6
10 files changed, 99 insertions, 93 deletions
diff --git a/src/aggregator.c b/src/aggregator.c
index c9a03b5..4c4ca8e 100644
--- a/src/aggregator.c
+++ b/src/aggregator.c
@@ -45,8 +45,7 @@ main ()
/*
* Load the FASTA protein sequence data file.
*/
- printf ("Loading \"influenza.faa\" with contents of %s.\n",
- INFLUENZA_FAA);
+ printf ("Loading \"influenza.faa\" with contents of %s.\n", INFLUENZA_FAA);
load_influenza_faa (file_id, INFLUENZA_FAA);
/*
diff --git a/src/assign/assign_protein_type.c b/src/assign/assign_protein_type.c
index 9a0717b..1df4c8d 100644
--- a/src/assign/assign_protein_type.c
+++ b/src/assign/assign_protein_type.c
@@ -70,7 +70,7 @@ assign_protein_type (hid_t file_id)
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
- sequence_data* faa_buf = malloc (sizeof(sequence_data) * faa_nrecords);
+ sequence_data *faa_buf = malloc (sizeof (sequence_data) * faa_nrecords);
size_t faa_size;
size_t faa_offset[SEQUENCE_DATA_FIELD_NUM];
@@ -86,7 +86,7 @@ assign_protein_type (hid_t file_id)
/*
* Allocate memory for the new table.
*/
- gi_type_data* new_buf = malloc (sizeof (gi_type_data) * faa_nrecords);
+ gi_type_data *new_buf = malloc (sizeof (gi_type_data) * faa_nrecords);
if (new_buf == NULL)
check_error (__FILE__, __LINE__);
@@ -101,7 +101,7 @@ assign_protein_type (hid_t file_id)
hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM];
gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type);
- gi_type_data* old_buf = NULL;
+ gi_type_data *old_buf = NULL;
/*
* If the table is already present read the values into memory and
@@ -117,10 +117,11 @@ assign_protein_type (hid_t file_id)
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
- printf (" Using gi_type_data cache of %i records.\n", (int)gi_nrecords);
-
- old_buf = malloc (sizeof(gi_type_data) * gi_nrecords);
-
+ printf (" Using gi_type_data cache of %i records.\n",
+ (int) gi_nrecords);
+
+ old_buf = malloc (sizeof (gi_type_data) * gi_nrecords);
+
status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset,
gi_sizes, old_buf);
if (status < 0)
@@ -129,18 +130,18 @@ assign_protein_type (hid_t file_id)
status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
}
/*
* If the table is not already present create it.
*/
else
- {
+ {
printf ("Creating gi_type_data.\n");
- const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] =
+ const char *gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] =
GI_TYPE_DATA_FIELD_NAMES;
hsize_t chunk_size = 10;
@@ -152,8 +153,7 @@ assign_protein_type (hid_t file_id)
GI_TYPE_DATA_FIELD_NUM, 0,
gi_size, gi_type_data_field_names,
gi_offset, gi_field_type,
- chunk_size, fill_data, compress,
- NULL);
+ chunk_size, fill_data, compress, NULL);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
@@ -169,7 +169,7 @@ assign_protein_type (hid_t file_id)
"Allocation of cache failed.");
ENTRY e, *ep;
- for (int i = 0; i < (int)gi_nrecords; i++)
+ for (int i = 0; i < (int) gi_nrecords; i++)
{
char gi_chr[25];
snprintf (gi_chr, 25, "%i", old_buf[i].gi);
@@ -183,14 +183,14 @@ assign_protein_type (hid_t file_id)
/*
* Assign protein types to records for which the field is empty.
*/
- printf ("Records to process: %i\n", (int)faa_nrecords);
+ printf ("Records to process: %i\n", (int) faa_nrecords);
int written = 0;
- for (int i = 0; i < (int)faa_nrecords; i++)
+ for (int i = 0; i < (int) faa_nrecords; i++)
{
new_buf[i].gi = faa_buf[i].gi;
strncpy (new_buf[i].type, "", sizeof (new_buf[i].type));
strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein));
-
+
char gi_chr[25];
snprintf (gi_chr, 25, "%i", faa_buf[i].gi);
e.key = gi_chr;
@@ -199,24 +199,23 @@ assign_protein_type (hid_t file_id)
/*
* A record was not found in the cache for this gi.
*/
- if (hsearch_r (e, FIND, &ep, &htab) == 0)
+ if (hsearch_r (e, FIND, &ep, &htab) == 0)
{
-
+
/*
* Read the sequence from the database by GI.
*/
Int4 sequence_number = readdb_gi2seq (seqdb, faa_buf[i].gi, NULL);
BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number);
- if (bsp == NULL)
+ if (bsp == NULL)
{
error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__,
"Unable to find BLAST record for gi|%i. Ensure "
"the BLAST database is up-to-date with the HDF5 "
"record set. See the BLAST formatdb.log file "
- "for details.\n",
- faa_buf[i].gi);
+ "for details.\n", faa_buf[i].gi);
}
-
+
SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
"blastp",
REFDB,
@@ -224,19 +223,19 @@ assign_protein_type (hid_t file_id)
NULL,
&error_returns,
NULL);
-
+
/*
* BLAST reported an error. Write it out and continue processing.
*/
if (error_returns != NULL)
{
CharPtr msg = BlastErrorToString (error_returns);
- printf ("Warning: An error has been reported by the NCBI Toolkit "
- "API for sequence gi|%i: %s",
- faa_buf[i].gi, msg);
- free (msg);
+ printf
+ ("Warning: An error has been reported by the NCBI Toolkit "
+ "API for sequence gi|%i: %s", faa_buf[i].gi, msg);
+ free (msg);
}
-
+
/*
* A hit was found. Record the first hit as the protein type.
* Skip the first 4 characters and eat the "lcl|".
@@ -245,18 +244,18 @@ assign_protein_type (hid_t file_id)
{
Char target_id_buf[BUFFER_LEN + 1];
SeqIdPtr target_id = SeqAlignId (seqalign, 1);
- SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT,
+ SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT,
BUFFER_LEN);
// Species Type
new_buf[i].type[0] = target_id_buf[4];
new_buf[i].type[1] = '\0';
-
+
// Protein Type (Skip the underscore in the string).
- strncpy (new_buf[i].protein, &target_id_buf[6],
- sizeof (new_buf[i].protein));
+ strncpy (new_buf[i].protein, &target_id_buf[6],
+ sizeof (new_buf[i].protein));
}
-
+
/*
* BLAST did not find any hits.
*/
@@ -265,73 +264,74 @@ assign_protein_type (hid_t file_id)
printf ("Warning: Unable to identify protein type for sequence "
"gi|%i\n", faa_buf[i].gi);
}
-
+
/*
* Clean up memory for the next ieration.
*/
seqalign = SeqAlignSetFree (seqalign);
bsp = BioseqFree (bsp);
-
- } // End existing entry not found.
+
+ } // End existing entry not found.
/*
* Hash table entry found. Keep the old value.
*/
else
{
- gi_type_data* old_value = (gi_type_data*)ep->data;
+ gi_type_data *old_value = (gi_type_data *) ep->data;
new_buf[i].gi = old_value->gi;
- strncpy (new_buf[i].type, old_value->type, sizeof (new_buf[i].type));
- strncpy (new_buf[i].protein, old_value->protein, sizeof (new_buf[i].protein));
+ strncpy (new_buf[i].type, old_value->type,
+ sizeof (new_buf[i].type));
+ strncpy (new_buf[i].protein, old_value->protein,
+ sizeof (new_buf[i].protein));
}
-
+
/*
* Write the data out to the file.
*/
- if ( (i % 1000 == 0) && (i > 0) )
+ if ((i % 1000 == 0) && (i > 0))
{
status = H5TBappend_records (file_id, "gi_type_data", 1000,
- gi_size, gi_offset, gi_sizes,
- &new_buf[i-1000]);
+ gi_size, gi_offset, gi_sizes,
+ &new_buf[i - 1000]);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
written = i;
- printf ("Processed %i of %i records.\n", i, (int)faa_nrecords);
+ printf ("Processed %i of %i records.\n", i, (int) faa_nrecords);
}
-
+
}
/*
* Write out records from the last bin if it was less than 1000
* records in size.
*/
- if ((int)faa_nrecords < 1000)
+ if ((int) faa_nrecords < 1000)
{
status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords,
- gi_size, gi_offset, gi_sizes,
- new_buf);
+ gi_size, gi_offset, gi_sizes, new_buf);
}
else
{
- status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written,
- gi_size, gi_offset, gi_sizes,
- &new_buf[written]);
+ status =
+ H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written,
+ gi_size, gi_offset, gi_sizes, &new_buf[written]);
}
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
-
+
free (faa_buf);
free (old_buf);
free (new_buf);
@@ -339,6 +339,6 @@ assign_protein_type (hid_t file_id)
options = BLASTOptionDelete (options);
readdb_destruct (seqdb);
-
+
return;
}
diff --git a/src/load/load_influenza_aa_dat.c b/src/load/load_influenza_aa_dat.c
index 3826349..b50fcab 100644
--- a/src/load/load_influenza_aa_dat.c
+++ b/src/load/load_influenza_aa_dat.c
@@ -15,7 +15,7 @@
#define NFIELDS (hsize_t) 11
void
-load_influenza_aa_dat (hid_t file_id, const char* file_name)
+load_influenza_aa_dat (hid_t file_id, const char *file_name)
{
/*
* Model the data using native types.
@@ -225,7 +225,8 @@ load_influenza_aa_dat (hid_t file_id, const char* file_name)
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
- status = H5TBdelete_record (file_id, "influenza_aa.dat", 0, nrecords);
+ status =
+ H5TBdelete_record (file_id, "influenza_aa.dat", 0, nrecords);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
@@ -233,7 +234,7 @@ load_influenza_aa_dat (hid_t file_id, const char* file_name)
H5TBappend_records (file_id, "influenza_aa.dat", 1, dst_size,
dst_offset, dst_sizes, &p_data);
if (status < 0)
- check_h5_error (status, __FILE__, __LINE__);
+ check_h5_error (status, __FILE__, __LINE__);
}
/*
@@ -242,8 +243,10 @@ load_influenza_aa_dat (hid_t file_id, const char* file_name)
else
{
herr_t status = H5TBmake_table ("influenza_aa.dat", file_id,
- "influenza_aa.dat", NFIELDS, 1, dst_size,
- field_names, dst_offset, field_type,
+ "influenza_aa.dat", NFIELDS, 1,
+ dst_size,
+ field_names, dst_offset,
+ field_type,
chunk_size, fill_data, compress,
&p_data);
if (status < 0)
diff --git a/src/load/load_influenza_aa_dat.h b/src/load/load_influenza_aa_dat.h
index 97e36f8..c4ce152 100644
--- a/src/load/load_influenza_aa_dat.h
+++ b/src/load/load_influenza_aa_dat.h
@@ -7,6 +7,6 @@
* Load the supplementary protein data from the NCBI influenza_aa.dat
* file.
*/
-void load_influenza_aa_dat (hid_t file_id, const char* file_name);
+void load_influenza_aa_dat (hid_t file_id, const char *file_name);
#endif // LOAD_INFLUENZA_AA_DAT_H
diff --git a/src/load/load_influenza_faa.c b/src/load/load_influenza_faa.c
index 04bf05b..80eb3ae 100644
--- a/src/load/load_influenza_faa.c
+++ b/src/load/load_influenza_faa.c
@@ -8,7 +8,7 @@
#include <stdlib.h>
void
-load_influenza_faa (hid_t file_id, const char* file_name)
+load_influenza_faa (hid_t file_id, const char *file_name)
{
size_t dst_size;
size_t dst_offset[SEQUENCE_DATA_FIELD_NUM];
@@ -16,7 +16,7 @@ load_influenza_faa (hid_t file_id, const char* file_name)
hid_t field_type[SEQUENCE_DATA_FIELD_NUM];
sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type);
-
+
hsize_t chunk_size = 10;
int *fill_data = NULL;
int compress = 0;
@@ -31,17 +31,17 @@ load_influenza_faa (hid_t file_id, const char* file_name)
while (getline (&line, &len, dat) != -1)
{
- current_line++;
+ current_line++;
// Header line.
if (line[0] == '>')
{
char *running = strdup (line);
char *token = NULL;
-
+
// Eat the ">gi".
strsep (&running, "|");
-
+
// GI value.
token = strsep (&running, "|");
p_data.gi = atoi (token);
@@ -50,13 +50,13 @@ load_influenza_faa (hid_t file_id, const char* file_name)
strsep (&running, "|");
// GB value.
- strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb));
+ strncpy (p_data.gb, strsep (&running, "|"), sizeof (p_data.gb));
// Description value.
strncpy (p_data.description, strsep (&running, "|"),
sizeof (p_data.description));
- const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] =
+ const char *sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] =
SEQUENCE_DATA_FIELD_NAMES;
if (current_line == 1)
@@ -68,12 +68,14 @@ load_influenza_faa (hid_t file_id, const char* file_name)
{
hsize_t nfields = 0;
hsize_t nrecords = 0;
- herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields,
- &nrecords);
+ herr_t status =
+ H5TBget_table_info (file_id, "influenza.faa", &nfields,
+ &nrecords);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
- status = H5TBdelete_record (file_id, "influenza.faa", 0, nrecords);
+ status =
+ H5TBdelete_record (file_id, "influenza.faa", 0, nrecords);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
@@ -92,9 +94,11 @@ load_influenza_faa (hid_t file_id, const char* file_name)
herr_t status = H5TBmake_table ("influenza.faa", file_id,
"influenza.faa",
SEQUENCE_DATA_FIELD_NUM, 1,
- dst_size, sequence_data_field_names,
+ dst_size,
+ sequence_data_field_names,
dst_offset, field_type,
- chunk_size, fill_data, compress,
+ chunk_size, fill_data,
+ compress,
&p_data);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
@@ -114,7 +118,7 @@ load_influenza_faa (hid_t file_id, const char* file_name)
free (running);
}
-
+
}
if (line)
diff --git a/src/load/load_influenza_faa.h b/src/load/load_influenza_faa.h
index 1ad5797..070bdea 100644
--- a/src/load/load_influenza_faa.h
+++ b/src/load/load_influenza_faa.h
@@ -6,6 +6,6 @@
/*
* Load the protein sequence data from the NCBI influenza.faa file.
*/
-void load_influenza_faa (hid_t file_id, const char* file_name);
+void load_influenza_faa (hid_t file_id, const char *file_name);
#endif // LOAD_INFLUENZA_FAA_H
diff --git a/src/model/gi_type_data_init.c b/src/model/gi_type_data_init.c
index 4a161c7..54f47a7 100644
--- a/src/model/gi_type_data_init.c
+++ b/src/model/gi_type_data_init.c
@@ -7,8 +7,8 @@
* struct. Perhaps an HDF5 precompiler could do such a thing.
*/
void
-gi_type_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes,
- hid_t *field_type)
+gi_type_data_init (size_t * dst_size, size_t * dst_offset, size_t * dst_sizes,
+ hid_t * field_type)
{
*dst_size = sizeof (gi_type_data);
diff --git a/src/model/gi_type_data_init.h b/src/model/gi_type_data_init.h
index 5c45cba..080f035 100644
--- a/src/model/gi_type_data_init.h
+++ b/src/model/gi_type_data_init.h
@@ -8,7 +8,7 @@
* structures are used by the HDF5 API.
*/
void
-gi_type_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes,
- hid_t *field_type);
+gi_type_data_init (size_t * dst_size, size_t * dst_offset, size_t * dst_sizes,
+ hid_t * field_type);
#endif // GI_TYPE_DATA_INIT_H
diff --git a/src/model/sequence_data_init.c b/src/model/sequence_data_init.c
index f6b3b1f..21881bf 100644
--- a/src/model/sequence_data_init.c
+++ b/src/model/sequence_data_init.c
@@ -1,28 +1,28 @@
#include "sequence_data_init.h"
#include "sequence_data.h"
-void
-sequence_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes,
- hid_t *field_type)
+void
+sequence_data_init (size_t * dst_size, size_t * dst_offset,
+ size_t * dst_sizes, hid_t * field_type)
{
*dst_size = sizeof (sequence_data);
-
+
dst_offset[0] = HOFFSET (sequence_data, gi);
dst_offset[1] = HOFFSET (sequence_data, gb);
dst_offset[2] = HOFFSET (sequence_data, description);
-
+
sequence_data dst_buf[1];
-
+
dst_sizes[0] = sizeof (dst_buf[0].gi);
dst_sizes[1] = sizeof (dst_buf[0].gb);
dst_sizes[2] = sizeof (dst_buf[0].description);
-
+
field_type[0] = H5T_NATIVE_INT;
-
+
hid_t gb_type = H5Tcopy (H5T_C_S1);
H5Tset_size (gb_type, 9);
field_type[1] = gb_type;
-
+
hid_t description_type = H5Tcopy (H5T_C_S1);
H5Tset_size (description_type, 196);
field_type[2] = description_type;
diff --git a/src/model/sequence_data_init.h b/src/model/sequence_data_init.h
index c87e7e6..1a2c75c 100644
--- a/src/model/sequence_data_init.h
+++ b/src/model/sequence_data_init.h
@@ -7,8 +7,8 @@
* Initialize the structures describing sequence_data. These
* descriptive structures are used by the HDF5 API.
*/
-void
-sequence_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes,
- hid_t *field_type);
+void
+sequence_data_init (size_t * dst_size, size_t * dst_offset,
+ size_t * dst_sizes, hid_t * field_type);
#endif // SEQUENCE_DATA_INIT_H

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.