summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-25 05:17:28 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-25 05:17:28 (GMT)
commit72edf83cbac32ea26f056ae85373ad7179707333 (patch) (unidiff)
treed65248cdfedf95ae296f87a1abe16403ddd18bb8
parentc51d559601371ac79e7157c0b35752b6141cac40 (diff)
downloadexp007-72edf83cbac32ea26f056ae85373ad7179707333.zip
exp007-72edf83cbac32ea26f056ae85373ad7179707333.tar.gz
exp007-72edf83cbac32ea26f056ae85373ad7179707333.tar.bz2
Added the initialization of a sparse matrix to be used to track the
pairwise BLAST scores for all influenza sequences. Modified the Autoconf input so that libraries are automatically added to LIBS rather than manually in Makefile.am. Added the use of the PETSc API which is used for the sparse matrix implementation.
-rw-r--r--configure.ac62
-rw-r--r--doc/build-cobalt.txt24
-rw-r--r--src/Makefile.am6
-rw-r--r--src/assign/assign_blast_scores.c27
-rw-r--r--src/assign/assign_blast_scores.h12
-rw-r--r--src/updator.c20
6 files changed, 127 insertions, 24 deletions
diff --git a/configure.ac b/configure.ac
index 59c5f36..f1ecdf9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7,20 +7,16 @@ AC_CONFIG_FILES([
7 src/Makefile7 src/Makefile
8])8])
99
10################10# Headers Checks
11# MODULE: HDF5 #
12################
1311
14AC_SEARCH_LIBS([H5Fcreate],[hdf5],[],12AC_CHECK_HEADERS([hdf5.h],[],
15[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)],13[AC_MSG_ERROR("The HDF5 headers are needed to build the system.")])
16[-lirc -lmpi -lz -lsz])
1714
18AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[],15AC_CHECK_HEADERS([petscconf.h],[],
19[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)],16[AC_MSG_ERROR("The PETSc headers are needed to build the system.")])
20[-lirc -lhdf5 -lmpi -lz -lsz])
2117
22AC_CHECK_HEADERS([hdf5.h],[],18AC_CHECK_HEADERS([petscmat.h],[],
23[AC_MSG_ERROR(The HDF5 headers are needed to build the system.)])19[AC_MSG_ERROR("The PETSc headers are needed to build the system.")])
2420
25########################21########################
26# MODULE: NCBI Toolkit #22# MODULE: NCBI Toolkit #
@@ -28,18 +24,52 @@ AC_CHECK_HEADERS([hdf5.h],[],
2824
29# Check for the NCBI ToolBox libraries.25# Check for the NCBI ToolBox libraries.
30AC_SEARCH_LIBS([log10],[m],[],26AC_SEARCH_LIBS([log10],[m],[],
31[AC_MSG_ERROR(The C Math Library is needed to build the system.)])27[AC_MSG_ERROR("The C Math Library is needed to build the system.")])
3228
33AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[],29AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[],
34[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])30[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
3531
36AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[],32AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[],
37[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])33[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
3834
39AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[],35AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[],
40[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])36[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
4137
42AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[],38AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[],
43[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])39[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
40
41################
42# MODULE: HDF5 #
43################
44
45AC_SEARCH_LIBS([__intel_rtc_uninit_use],[irc])
46AC_SEARCH_LIBS([MPI_File_open],[mpi])
47AC_SEARCH_LIBS([compress2],[z])
48AC_SEARCH_LIBS([SZ_BufftoBuffCompress],[sz])
49
50AC_SEARCH_LIBS([H5Fcreate],[hdf5],[],
51[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")])
52
53AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[],
54[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")])
55
56#################
57# MODULE: PETSc #
58#################
59
60# The AC_SEARCH_LIBS other-libraries list is not particularly useful.
61# Autoconf determines if the other-libraries are necessary however in
62# the cases where they are indeed necessary they are not actually
63# added to the library list.
64
65# These are the dependencies. Random functions are selected from
66# these libraries from the list of otherwise undefined references at
67# link time.
68AC_SEARCH_LIBS([PetscInitialize],[petsc])
69AC_SEARCH_LIBS([VecNorm],[petscvec])
70
71# MatCreateSeqAIJ is acutally used in the code for this project.
72AC_SEARCH_LIBS([MatCreateSeqAIJ],[petscmat],[],
73[AC_MSG_ERROR("The Portable Extensible Tookit for Scientific Computation PETSc is needed to build the system.")])
4474
45AC_OUTPUT75AC_OUTPUT
diff --git a/doc/build-cobalt.txt b/doc/build-cobalt.txt
index 91b356b..76612a4 100644
--- a/doc/build-cobalt.txt
+++ b/doc/build-cobalt.txt
@@ -19,6 +19,24 @@ Building on NCSA Cobalt.
19+phdf5-1.8.419+phdf5-1.8.4
2020
21-- Configure21-- Configure
22export CPPFLAGS="-I$HDF5_HOME/include -I$NCBI_DIR/include"22export PETSC_DIR=/u/ac/dpellegr/apps/Installers/petsc-3.0.0-p10
23export LDFLAGS="-L/usr/apps/hdf/szip/lib -L$HDF5_HOME/lib -L$NCBI_DIR/lib -L/usr/local/intel/10.1.017/lib"23
24export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/apps/hdf/szip/lib24export CPPFLAGS="\
25-I$HDF5_HOME/include \
26-I$NCBI_DIR/include \
27-I$PETSC_DIR/include \
28-I$PETSC_DIR/linux-gnu-c-debug/include"
29
30export LDFLAGS="\
31-L$HDF5_HOME/lib \
32-L/usr/apps/hdf/szip/lib \
33-L/usr/local/intel/10.1.017/lib \
34-L$NCBI_DIR/lib \
35-L$PETSC_DIR/linux-gnu-c-debug/lib"
36
37export LD_LIBRARY_PATH=\
38$LD_LIBRARY_PATH:\
39/usr/apps/hdf/szip/lib:\
40$PETSC_DIR/linux-gnu-c-debug/lib
41
42
diff --git a/src/Makefile.am b/src/Makefile.am
index 752c0fe..407d7cd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -10,10 +10,9 @@ aggregator_SOURCES = \
10 model/blast_scores_data_init.c \10 model/blast_scores_data_init.c \
11 model/sequence_data_init.c11 model/sequence_data_init.c
1212
13aggregator_LDADD = -lirc -lmpi -lsz -lz
14
15updator_SOURCES = \13updator_SOURCES = \
16 updator.c \14 updator.c \
15 assign/assign_blast_scores.c \
17 assign/assign_protein_type.c \16 assign/assign_protein_type.c \
18 error/check_error.c \17 error/check_error.c \
19 error/check_h5_error.c \18 error/check_h5_error.c \
@@ -21,9 +20,8 @@ updator_SOURCES = \
21 model/gi_type_data_init.c \20 model/gi_type_data_init.c \
22 model/sequence_data_init.c21 model/sequence_data_init.c
2322
24updator_LDADD = -lirc -lmpi -lsz -lz
25
26noinst_HEADERS = \23noinst_HEADERS = \
24 assign/assign_blast_scores.h \
27 assign/assign_protein_type.h \25 assign/assign_protein_type.h \
28 error/check_error.h \26 error/check_error.h \
29 error/check_h5_error.h \27 error/check_h5_error.h \
diff --git a/src/assign/assign_blast_scores.c b/src/assign/assign_blast_scores.c
new file mode 100644
index 0000000..1cd491a
--- a/dev/null
+++ b/src/assign/assign_blast_scores.c
@@ -0,0 +1,27 @@
1#include "assign_blast_scores.h"
2#include <petscmat.h>
3#include <stdio.h>
4
5void
6assign_blast_scores (hid_t file_id)
7{
8 /*
9 * Create a sparse matrix for the pairwise protein BLAST scores.
10 *
11 * 138,769 proteins x 138,769 proteins = 19,256,835,361 pairs. At 8
12 * bytes (double) per pair this will require 154.1 GB of memory to
13 * hold the full structure. This is available on the compute nodes
14 * of Cobalt however use of a sparse matrix should reduce this size
15 * dramatically.
16 */
17
18 // http://www.netlib.org/blas/blast-forum/blas_sparse_proto.h
19 // blas_sparse_matrix M = BLAS_duscr_begin (138769, 138769);
20
21 Mat M;
22 MatCreateSeqAIJ (PETSC_COMM_SELF, 138769, 138769, 50, NULL, &M);
23
24 MatDestroy (M);
25
26 return;
27}
diff --git a/src/assign/assign_blast_scores.h b/src/assign/assign_blast_scores.h
new file mode 100644
index 0000000..2c52192
--- a/dev/null
+++ b/src/assign/assign_blast_scores.h
@@ -0,0 +1,12 @@
1#ifndef ASSIGN_BLAST_SCORES_H
2#define ASSIGN_BLAST_SCORES_H
3
4#include <hdf5.h>
5
6/*
7 * Pairwise BLAST of each protein in the NCBI Influenza Sequence
8 * Database against the full database.
9 */
10void assign_blast_scores (hid_t file_id);
11
12#endif // ASSIGN_BLAST_SCORES_H
diff --git a/src/updator.c b/src/updator.c
index f93e205..0bade65 100644
--- a/src/updator.c
+++ b/src/updator.c
@@ -3,16 +3,25 @@
3 */3 */
44
5#include "assign/assign_protein_type.h"5#include "assign/assign_protein_type.h"
6#include "assign/assign_blast_scores.h"
6#include "error/check_h5_error.h"7#include "error/check_h5_error.h"
8#include <petsc.h>
7#include <stdio.h>9#include <stdio.h>
8#include <signal.h>10#include <signal.h>
911
10#define FILE "influenza.h5"12#define FILE "influenza.h5"
1113
12int14int
13main ()15main (int argc, char **argv)
14{16{
15 /*17 /*
18 * Initialize the PETSc database and MPI.
19 *
20 * http://www.mcs.anl.gov/petsc/petsc-2/snapshots/petsc-dev/docs/manualpages/Sys/PetscInitialize.html#PetscInitialize
21 */
22 PetscInitialize (&argc, &argv, 0, 0);
23
24 /*
16 * Open the HDF5 file.25 * Open the HDF5 file.
17 */26 */
18 hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT);27 hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT);
@@ -22,7 +31,14 @@ main ()
22 /*31 /*
23 * Assign protein type values to the sequence records.32 * Assign protein type values to the sequence records.
24 */33 */
34 /*
25 assign_protein_type (file_id);35 assign_protein_type (file_id);
36 */
37
38 /*
39 * Assign pairwise BLAST scores.
40 */
41 assign_blast_scores (file_id);
2642
27 /*43 /*
28 * Close the HDF5 file.44 * Close the HDF5 file.
@@ -31,5 +47,7 @@ main ()
31 if (status < 0)47 if (status < 0)
32 check_h5_error (__FILE__, __LINE__);48 check_h5_error (__FILE__, __LINE__);
3349
50 PetscFinalize ();
51
34 return 0;52 return 0;
35}53}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.