#include "assign_protein_type.h" #include "check_ncbi_error.h" #include "check_h5_error.h" #include #include #include #include #include #include /* * BLAST database containing all of the influenza protein sequences. */ #define SEQDB "/home/don/exp004/influenzadb/influenzadb" /* * BLAST reference database of prototypical protein types. */ #define REFDB "/home/don/exp004/influenzadb/proteinnames" #define BUFFER_LEN 50 void assign_protein_type (hid_t file_id) { /* * Iterate through the records for which no protein type has been * assigned. Create a BioSeq Pointer to the data and then use this * as input to the BLAST run against the reference database of * prototypical sequence records. */ /* * Make BLAST messages reportable. */ ErrSetMessageLevel (SEV_WARNING); /* * Open the BLAST sequence database. */ ReadDBFILEPtr seqdb = readdb_new (SEQDB, true); /* * Get default BLAST options. */ BLAST_OptionsBlkPtr options = BLASTOptionNew ("blastp", true); ValNodePtr error_returns = NULL; /* * Read the data from HDF5 file. */ hsize_t nfields; hsize_t nrecords; herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields, &nrecords); if (status < 0) check_h5_error (status, __FILE__, __LINE__); /* * todo: Allocate memory of nrecords for dst_buf. * * todo: Refactor code to share structres in read and write HDF5 * calls. */ status = H5TBread_table (file_id, "influenza.faa", dst_size, dst_offset, dst_sizes, dst_buf); if (status < 0) check_h5_error (status, __FILE__, __LINE__); for (int i = 0; i < nrecords; i++) { } /* * Read the sequence from the database by GI. */ Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL); BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); SeqAlignPtr seqalign = BioseqBlastEngine (bsp, "blastp", REFDB, options, NULL, &error_returns, NULL); if (error_returns != NULL) check_ncbi_error (error_returns, __FILE__, __LINE__); if (seqalign != NULL) { Char target_id_buf[BUFFER_LEN + 1]; SeqIdPtr target_id = SeqAlignId (seqalign, 1); SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); printf ("%s\n", target_id_buf); } // Clean up memory for the next ieration. seqalign = SeqAlignSetFree (seqalign); bsp = BioseqFree (bsp); options = BLASTOptionDelete (options); return; }