author | Don Pellegrino <don@drexel.edu> | 2010-01-18 18:46:10 (GMT) |
---|---|---|
committer | Don Pellegrino <don@drexel.edu> | 2010-01-18 18:46:10 (GMT) |
commit | 0871f6cf645c20673e45946c3ba3ddaa2ffb47aa (patch) (side-by-side diff) | |
tree | f9bbbb4d0e555f2562308f4f0ec0f9d8e3ef39fa | |
parent | a7cc532248146968a9296be7db42830e35525afa (diff) | |
download | exp007-0871f6cf645c20673e45946c3ba3ddaa2ffb47aa.zip exp007-0871f6cf645c20673e45946c3ba3ddaa2ffb47aa.tar.gz exp007-0871f6cf645c20673e45946c3ba3ddaa2ffb47aa.tar.bz2 |
Added error handling for the case where a GI record is not found in the BLAST database. Added writing of the last bin of record updates to the file.
-rw-r--r-- | src/assign_protein_type.c | 73 |
1 files changed, 57 insertions, 16 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c index 166e787..ec3a959 100644 --- a/src/assign_protein_type.c +++ b/src/assign_protein_type.c @@ -83,6 +83,7 @@ assign_protein_type (hid_t file_id) * Assign protein types to records for which the field is empty. */ printf ("Records to process: %i\n", (int)nrecords); + bool updates_pending = false; for (int i = 0; i < nrecords; i++) { @@ -92,6 +93,14 @@ assign_protein_type (hid_t file_id) */ Int4 sequence_number = readdb_gi2seq (seqdb, dst_buf[i].gi, NULL); BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); + if (bsp == NULL) + { + error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, + "Unable to find BLAST record for gi|%i. Ensure the BLAST " + "database is up-to-date with the HDF5 record set. See the " + "BLAST formatdb.log file for details.\n", + dst_buf[i].gi); + } SeqAlignPtr seqalign = BioseqBlastEngine (bsp, "blastp", @@ -123,6 +132,7 @@ assign_protein_type (hid_t file_id) SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); strncpy (dst_buf[i].protein_type, &target_id_buf[6], sizeof (dst_buf[i].protein_type)); + updates_pending = true; } /* @@ -140,25 +150,56 @@ assign_protein_type (hid_t file_id) seqalign = SeqAlignSetFree (seqalign); bsp = BioseqFree (bsp); - /* - * Write the data out to the file. - */ - if ( (i % 1000 == 0) && (i > 0) ) - { - status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000, - dst_size, dst_offset, dst_sizes, - &dst_buf[i-1000]); - if (status < 0) - check_h5_error (status, __FILE__, __LINE__); + } - status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); - if (status < 0) - check_h5_error (status, __FILE__, __LINE__); + /* + * Write the data out to the file. + */ + if ( (i % 1000 == 0) && (i > 0) && updates_pending) + { + status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000, + dst_size, dst_offset, dst_sizes, + &dst_buf[i-1000]); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + + status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + + updates_pending = false; + + printf ("Processed %i of %i records.\n", i, (int)nrecords); + } + + } - printf ("Processed %i of %i records.\n", i, (int)nrecords); - } - } + /* + * Write out records from the last bin if it was less than 1000 + * records in size. + */ + if (updates_pending) + { + if ((int)nrecords < 1000) + { + status = H5TBwrite_records (file_id, "influenza.faa", 0, nrecords, + dst_size, dst_offset, dst_sizes, + dst_buf); + } + else + { + status = H5TBwrite_records (file_id, "influenza.faa", nrecords - 1000, 1000, + dst_size, dst_offset, dst_sizes, + &dst_buf[nrecords-1000]); + } + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + + status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + updates_pending = false; } free (dst_buf); |