summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-17 23:44:57 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-17 23:44:57 (GMT)
commitb2e4ace6b0265f4575733bfaa38519167074c477 (patch) (unidiff)
tree5d7dc32c19e6a33ac45dc1fc9b702919a4c7721b
parent22bc29abeb6c0c3354bbd857cac53af14153bb2f (diff)
downloadexp007-b2e4ace6b0265f4575733bfaa38519167074c477.zip
exp007-b2e4ace6b0265f4575733bfaa38519167074c477.tar.gz
exp007-b2e4ace6b0265f4575733bfaa38519167074c477.tar.bz2
Added error checking routines. Added implementation of
assign_protein_type tested against a single hard-coded sequence input. The next step is to iterate over all records in the HDF5 collection that don't have a type assigned and to assign a type value.
-rw-r--r--configure.ac20
-rw-r--r--src/Makefile.am6
-rw-r--r--src/aggregator.c14
-rw-r--r--src/assign_protein_type.c72
-rw-r--r--src/assign_protein_type.h6
-rw-r--r--src/check_error.c14
-rw-r--r--src/check_error.h11
-rw-r--r--src/check_h5_error.c12
-rw-r--r--src/check_h5_error.h12
-rw-r--r--src/check_ncbi_error.c13
-rw-r--r--src/check_ncbi_error.h13
-rw-r--r--src/load_influenza_aa_dat.c31
12 files changed, 211 insertions, 13 deletions
diff --git a/configure.ac b/configure.ac
index 3104df8..f45958c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -24,4 +24,24 @@ AC_CHECK_HEADERS([mpi.h],[],
24AC_CHECK_HEADERS([hdf5.h],[],24AC_CHECK_HEADERS([hdf5.h],[],
25[AC_MSG_ERROR(The HDF5 headers are needed to build the system.)])25[AC_MSG_ERROR(The HDF5 headers are needed to build the system.)])
2626
27########################
28# MODULE: NCBI Toolkit #
29########################
30
31# Check for the NCBI ToolBox libraries.
32AC_SEARCH_LIBS([log10],[m],[],
33[AC_MSG_ERROR(The C Math Library is needed to build the system.)])
34
35AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[],
36[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
37
38AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[],
39[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
40
41AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[],
42[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
43
44AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[],
45[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
46
27AC_OUTPUT47AC_OUTPUT
diff --git a/src/Makefile.am b/src/Makefile.am
index 8098f33..fcbcdd5 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -3,6 +3,9 @@ bin_PROGRAMS = aggregator
3aggregator_SOURCES = \3aggregator_SOURCES = \
4 aggregator.c \4 aggregator.c \
5 assign_protein_type.c \5 assign_protein_type.c \
6 check_error.c \
7 check_h5_error.c \
8 check_ncbi_error.c \
6 load_influenza_aa_dat.c \9 load_influenza_aa_dat.c \
7 load_influenza_faa.c10 load_influenza_faa.c
811
@@ -10,6 +13,9 @@ aggregator_LDADD = -lhdf5
1013
11noinst_HEADERS = \14noinst_HEADERS = \
12 assign_protein_type.h \15 assign_protein_type.h \
16 check_error.h \
17 check_h5_error.h \
18 check_ncbi_error.h \
13 load_influenza_aa_dat.h \19 load_influenza_aa_dat.h \
14 load_influenza_faa.h20 load_influenza_faa.h
1521
diff --git a/src/aggregator.c b/src/aggregator.c
index 228e5c6..20da6df 100644
--- a/src/aggregator.c
+++ b/src/aggregator.c
@@ -3,6 +3,8 @@
3 * container.3 * container.
4 */4 */
55
6#include "assign_protein_type.h"
7#include "check_h5_error.h"
6#include "load_influenza_aa_dat.h"8#include "load_influenza_aa_dat.h"
7#include "load_influenza_faa.h"9#include "load_influenza_faa.h"
810
@@ -14,22 +16,26 @@ main()
14 /*16 /*
15 * Create the HDF5 file.17 * Create the HDF5 file.
16 */18 */
17 hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);19 // hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
1820
19 /*21 /*
20 * Load the supplementary protein data file.22 * Load the supplementary protein data file.
21 */23 */
22 load_influenza_aa_dat (file_id);24 // load_influenza_aa_dat (file_id);
2325
24 /*26 /*
25 * Load the FASTA protein sequence data file.27 * Load the FASTA protein sequence data file.
26 */28 */
27 load_influenza_faa (file_id);29 // load_influenza_faa (file_id);
2830
29 /*31 /*
30 * Close the HD5 file.32 * Close the HD5 file.
31 */33 */
32 H5Fclose (file_id);34 // herr_t status = H5Fclose (file_id);
35 // if (status < 0)
36 // check_h5_error (status, __FILE__, __LINE__);
37
38 assign_protein_type (0);
3339
34 return 0;40 return 0;
35}41}
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c
index 2ba59a7..643ea3f 100644
--- a/src/assign_protein_type.c
+++ b/src/assign_protein_type.c
@@ -1,8 +1,80 @@
1#include "assign_protein_type.h"1#include "assign_protein_type.h"
2#include "check_ncbi_error.h"
3#include <ncbi.h>
4#include <readdb.h>
5#include <blast.h>
6#include <salpacc.h>
7#include <stdbool.h>
8
9/*
10 * BLAST database containing all of the influenza protein sequences.
11 */
12#define SEQDB "/home/don/exp004/influenzadb/influenzadb"
13
14/*
15 * BLAST reference database of prototypical protein types.
16 */
17#define REFDB "/home/don/exp004/influenzadb/proteinnames"
18
19#define BUFFER_LEN 50
220
3void21void
4assign_protein_type (hid_t file_id)22assign_protein_type (hid_t file_id)
5{23{
24 /*
25 * Iterate through the records for which no protein type has been
26 * assigned. Create a BioSeq Pointer to the data and then use this
27 * as input to the BLAST run against the reference database of
28 * prototypical sequence records.
29 */
30
31 /*
32 * Make BLAST messages reportable.
33 */
34 ErrSetMessageLevel (SEV_WARNING);
35
36 /*
37 * Open the BLAST sequence database.
38 */
39 ReadDBFILEPtr seqdb = readdb_new (SEQDB, true);
40
41 /*
42 * Get default BLAST options.
43 */
44 BLAST_OptionsBlkPtr options = BLASTOptionNew ("blastp", true);
45 ValNodePtr error_returns = NULL;
46
47 /*
48 * Read the sequence from the database by GI.
49 */
50 Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL);
51 BioseqPtr bsp = readdb_get_bioseq(seqdb, sequence_number);
52
53 SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
54 "blastp",
55 REFDB,
56 options,
57 NULL,
58 &error_returns,
59 NULL);
60
61 if (error_returns != NULL)
62 check_ncbi_error (error_returns, __FILE__, __LINE__);
63
64 if (seqalign != NULL)
65 {
66 Char target_id_buf[BUFFER_LEN + 1];
67 SeqIdPtr target_id = SeqAlignId (seqalign, 1);
68 SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN);
69 printf ("%s\n",
70 target_id_buf);
71 }
72
73 // Clean up memory for the next ieration.
74 seqalign = SeqAlignSetFree (seqalign);
75 bsp = BioseqFree (bsp);
76
77 options = BLASTOptionDelete (options);
678
7 return;79 return;
8}80}
diff --git a/src/assign_protein_type.h b/src/assign_protein_type.h
index 312b774..1dfb8e6 100644
--- a/src/assign_protein_type.h
+++ b/src/assign_protein_type.h
@@ -4,7 +4,11 @@
4#include <hdf5.h>4#include <hdf5.h>
55
6/*6/*
7 * Determine the protein type for each protein sequence record.7 * Determine the protein type for each protein sequence record. The
8 * technique used by NCBI is used here. A BLAST database of
9 * prototypical protein sequences serves as the reference. Each input
10 * sequence is BLASTed against this database. The first hit is used
11 * to assign a protein type to sequence.
8 */12 */
9void13void
10assign_protein_type (hid_t file_id);14assign_protein_type (hid_t file_id);
diff --git a/src/check_error.c b/src/check_error.c
new file mode 100644
index 0000000..70c62c4
--- a/dev/null
+++ b/src/check_error.c
@@ -0,0 +1,14 @@
1#include "check_error.h"
2#include <error.h>
3#include <errno.h>
4#include <stdlib.h>
5
6void
7check_error (const char *filename, const unsigned int linenum)
8{
9 if (errno)
10 error_at_line (EXIT_FAILURE, errno, filename, linenum,
11 "An error has been detected within the application.");
12
13 return;
14}
diff --git a/src/check_error.h b/src/check_error.h
new file mode 100644
index 0000000..33acc63
--- a/dev/null
+++ b/src/check_error.h
@@ -0,0 +1,11 @@
1#ifndef CHECK_ERROR_H
2#define CHECK_ERROR_H
3
4/*
5 * Check the error state. Reports and error message and exits if an
6 * error has occured.
7 */
8void
9check_error (const char *filename, unsigned int linenum);
10
11#endif // CHECK_ERROR_H
diff --git a/src/check_h5_error.c b/src/check_h5_error.c
new file mode 100644
index 0000000..30fc87c
--- a/dev/null
+++ b/src/check_h5_error.c
@@ -0,0 +1,12 @@
1#include "check_h5_error.h"
2#include <error.h>
3#include <stdlib.h>
4
5void
6check_h5_error (herr_t status, const char* filename, unsigned int linenum)
7{
8 error_at_line (EXIT_FAILURE, 0, filename, linenum,
9 "An error has been reported by the HDF5 API.");
10
11 return;
12}
diff --git a/src/check_h5_error.h b/src/check_h5_error.h
new file mode 100644
index 0000000..74730cd
--- a/dev/null
+++ b/src/check_h5_error.h
@@ -0,0 +1,12 @@
1#ifndef CHECK_H5_ERROR_H
2#define CHECK_H5_ERROR_H
3
4#include <hdf5.h>
5
6/*
7 * Handle errors from the HDF5 API.
8 */
9void
10check_h5_error (herr_t status, const char* filename, unsigned int linenum);
11
12#endif // CHECK_H5_ERROR_H
diff --git a/src/check_ncbi_error.c b/src/check_ncbi_error.c
new file mode 100644
index 0000000..3caa7a9
--- a/dev/null
+++ b/src/check_ncbi_error.c
@@ -0,0 +1,13 @@
1#include "check_ncbi_error.h"
2
3void
4check_ncbi_error (ValNodePtr error_returns,
5 const char* filename,
6 unsigned int linenum)
7{
8 error_at_line (EXIT_FAILURE, 0, filename, linenum,
9 "An error has been reported by the NCBI Toolkit API: %s",
10 BlastErrorToString (error_returns));
11
12 return;
13}
diff --git a/src/check_ncbi_error.h b/src/check_ncbi_error.h
new file mode 100644
index 0000000..c27c56d
--- a/dev/null
+++ b/src/check_ncbi_error.h
@@ -0,0 +1,13 @@
1#ifndef CHECK_NCBI_ERROR_H
2#define CHECK_NCBI_ERROR_H
3
4#include <ncbi.h>
5
6/*
7 * Handle errors from the NCBI Toolkit API.
8 */
9void
10check_ncbi_error (ValNodePtr error_returns,
11 const char* filename, unsigned int linenum);
12
13#endif // CHECK_NCBI_ERROR_H
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index 493c7db..91ef415 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -6,6 +6,8 @@
6 */6 */
77
8#include "load_influenza_aa_dat.h"8#include "load_influenza_aa_dat.h"
9#include "check_error.h"
10#include "check_h5_error.h"
9#include "hdf5_hl.h"11#include "hdf5_hl.h"
10#include <string.h>12#include <string.h>
11#include <stdlib.h>13#include <stdlib.h>
@@ -140,7 +142,10 @@ load_influenza_aa_dat (hid_t file_id)
140 * Insert the records.142 * Insert the records.
141 */143 */
142 supplementary_data p_data;144 supplementary_data p_data;
143 FILE* dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", "r");145 FILE* dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat",
146 "r");
147 if (dat == NULL)
148 check_error (__FILE__, __LINE__);
144 char *line = NULL;149 char *line = NULL;
145 size_t len = 0;150 size_t len = 0;
146 int current_line = 0;151 int current_line = 0;
@@ -205,13 +210,23 @@ load_influenza_aa_dat (hid_t file_id)
205 strncpy(p_data.full_length_indicator, strsep (&running, "\t"),210 strncpy(p_data.full_length_indicator, strsep (&running, "\t"),
206 sizeof(p_data.full_length_indicator));211 sizeof(p_data.full_length_indicator));
207212
208 if (current_line == 1) 213 if (current_line == 1)
209 H5TBmake_table ("influenza_aa.dat", file_id, TABLE_NAME,NFIELDS,1,214 {
210 dst_size,field_names, dst_offset, field_type,215 herr_t status = H5TBmake_table ("influenza_aa.dat", file_id,
211 chunk_size, fill_data, compress, &p_data);216 TABLE_NAME, NFIELDS, 1,dst_size,
212 else 217 field_names, dst_offset, field_type,
213 H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, dst_offset,218 chunk_size, fill_data, compress,
214 dst_sizes, &p_data);219 &p_data);
220 if (status < 0)
221 check_h5_error (status, __FILE__, __LINE__);
222 }
223 else
224 {
225 herr_t status = H5TBappend_records (file_id, TABLE_NAME, 1, dst_size,
226 dst_offset, dst_sizes, &p_data);
227 if (status < 0)
228 check_h5_error (status, __FILE__, __LINE__);
229 }
215230
216 if (running)231 if (running)
217 free (running);232 free (running);

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.