summaryrefslogtreecommitdiffstats
Side-by-side diff
-rw-r--r--configure.ac20
-rw-r--r--src/Makefile.am6
-rw-r--r--src/aggregator.c14
-rw-r--r--src/assign_protein_type.c72
-rw-r--r--src/assign_protein_type.h6
-rw-r--r--src/check_error.c14
-rw-r--r--src/check_error.h11
-rw-r--r--src/check_h5_error.c12
-rw-r--r--src/check_h5_error.h12
-rw-r--r--src/check_ncbi_error.c13
-rw-r--r--src/check_ncbi_error.h13
-rw-r--r--src/load_influenza_aa_dat.c31
12 files changed, 211 insertions, 13 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c
index 2ba59a7..643ea3f 100644
--- a/src/assign_protein_type.c
+++ b/src/assign_protein_type.c
@@ -1,8 +1,80 @@
#include "assign_protein_type.h"
+#include "check_ncbi_error.h"
+#include <ncbi.h>
+#include <readdb.h>
+#include <blast.h>
+#include <salpacc.h>
+#include <stdbool.h>
+
+/*
+ * BLAST database containing all of the influenza protein sequences.
+ */
+#define SEQDB "/home/don/exp004/influenzadb/influenzadb"
+
+/*
+ * BLAST reference database of prototypical protein types.
+ */
+#define REFDB "/home/don/exp004/influenzadb/proteinnames"
+
+#define BUFFER_LEN 50
void
assign_protein_type (hid_t file_id)
{
+ /*
+ * Iterate through the records for which no protein type has been
+ * assigned. Create a BioSeq Pointer to the data and then use this
+ * as input to the BLAST run against the reference database of
+ * prototypical sequence records.
+ */
+
+ /*
+ * Make BLAST messages reportable.
+ */
+ ErrSetMessageLevel (SEV_WARNING);
+
+ /*
+ * Open the BLAST sequence database.
+ */
+ ReadDBFILEPtr seqdb = readdb_new (SEQDB, true);
+
+ /*
+ * Get default BLAST options.
+ */
+ BLAST_OptionsBlkPtr options = BLASTOptionNew ("blastp", true);
+ ValNodePtr error_returns = NULL;
+
+ /*
+ * Read the sequence from the database by GI.
+ */
+ Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL);
+ BioseqPtr bsp = readdb_get_bioseq(seqdb, sequence_number);
+
+ SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
+ "blastp",
+ REFDB,
+ options,
+ NULL,
+ &error_returns,
+ NULL);
+
+ if (error_returns != NULL)
+ check_ncbi_error (error_returns, __FILE__, __LINE__);
+
+ if (seqalign != NULL)
+ {
+ Char target_id_buf[BUFFER_LEN + 1];
+ SeqIdPtr target_id = SeqAlignId (seqalign, 1);
+ SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN);
+ printf ("%s\n",
+ target_id_buf);
+ }
+
+ // Clean up memory for the next ieration.
+ seqalign = SeqAlignSetFree (seqalign);
+ bsp = BioseqFree (bsp);
+
+ options = BLASTOptionDelete (options);
return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.