summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-18 18:46:10 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-18 18:46:10 (GMT)
commit0871f6cf645c20673e45946c3ba3ddaa2ffb47aa (patch) (side-by-side diff)
treef9bbbb4d0e555f2562308f4f0ec0f9d8e3ef39fa
parenta7cc532248146968a9296be7db42830e35525afa (diff)
downloadexp007-0871f6cf645c20673e45946c3ba3ddaa2ffb47aa.zip
exp007-0871f6cf645c20673e45946c3ba3ddaa2ffb47aa.tar.gz
exp007-0871f6cf645c20673e45946c3ba3ddaa2ffb47aa.tar.bz2
Added error handling for the case where a GI record is not found in the BLAST database. Added writing of the last bin of record updates to the file.
-rw-r--r--src/assign_protein_type.c73
1 files changed, 57 insertions, 16 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c
index 166e787..ec3a959 100644
--- a/src/assign_protein_type.c
+++ b/src/assign_protein_type.c
@@ -83,6 +83,7 @@ assign_protein_type (hid_t file_id)
* Assign protein types to records for which the field is empty.
*/
printf ("Records to process: %i\n", (int)nrecords);
+ bool updates_pending = false;
for (int i = 0; i < nrecords; i++)
{
@@ -92,6 +93,14 @@ assign_protein_type (hid_t file_id)
*/
Int4 sequence_number = readdb_gi2seq (seqdb, dst_buf[i].gi, NULL);
BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number);
+ if (bsp == NULL)
+ {
+ error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__,
+ "Unable to find BLAST record for gi|%i. Ensure the BLAST "
+ "database is up-to-date with the HDF5 record set. See the "
+ "BLAST formatdb.log file for details.\n",
+ dst_buf[i].gi);
+ }
SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
"blastp",
@@ -123,6 +132,7 @@ assign_protein_type (hid_t file_id)
SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN);
strncpy (dst_buf[i].protein_type, &target_id_buf[6],
sizeof (dst_buf[i].protein_type));
+ updates_pending = true;
}
/*
@@ -140,25 +150,56 @@ assign_protein_type (hid_t file_id)
seqalign = SeqAlignSetFree (seqalign);
bsp = BioseqFree (bsp);
- /*
- * Write the data out to the file.
- */
- if ( (i % 1000 == 0) && (i > 0) )
- {
- status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000,
- dst_size, dst_offset, dst_sizes,
- &dst_buf[i-1000]);
- if (status < 0)
- check_h5_error (status, __FILE__, __LINE__);
+ }
- status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
- if (status < 0)
- check_h5_error (status, __FILE__, __LINE__);
+ /*
+ * Write the data out to the file.
+ */
+ if ( (i % 1000 == 0) && (i > 0) && updates_pending)
+ {
+ status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000,
+ dst_size, dst_offset, dst_sizes,
+ &dst_buf[i-1000]);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+
+ status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+
+ updates_pending = false;
+
+ printf ("Processed %i of %i records.\n", i, (int)nrecords);
+ }
+
+ }
- printf ("Processed %i of %i records.\n", i, (int)nrecords);
- }
- }
+ /*
+ * Write out records from the last bin if it was less than 1000
+ * records in size.
+ */
+ if (updates_pending)
+ {
+ if ((int)nrecords < 1000)
+ {
+ status = H5TBwrite_records (file_id, "influenza.faa", 0, nrecords,
+ dst_size, dst_offset, dst_sizes,
+ dst_buf);
+ }
+ else
+ {
+ status = H5TBwrite_records (file_id, "influenza.faa", nrecords - 1000, 1000,
+ dst_size, dst_offset, dst_sizes,
+ &dst_buf[nrecords-1000]);
+ }
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+
+ status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+ updates_pending = false;
}
free (dst_buf);

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.