summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-25 05:17:28 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-25 05:17:28 (GMT)
commit72edf83cbac32ea26f056ae85373ad7179707333 (patch) (side-by-side diff)
treed65248cdfedf95ae296f87a1abe16403ddd18bb8
parentc51d559601371ac79e7157c0b35752b6141cac40 (diff)
downloadexp007-72edf83cbac32ea26f056ae85373ad7179707333.zip
exp007-72edf83cbac32ea26f056ae85373ad7179707333.tar.gz
exp007-72edf83cbac32ea26f056ae85373ad7179707333.tar.bz2
Added the initialization of a sparse matrix to be used to track the
pairwise BLAST scores for all influenza sequences. Modified the Autoconf input so that libraries are automatically added to LIBS rather than manually in Makefile.am. Added the use of the PETSc API which is used for the sparse matrix implementation.
-rw-r--r--configure.ac62
-rw-r--r--doc/build-cobalt.txt24
-rw-r--r--src/Makefile.am6
-rw-r--r--src/assign/assign_blast_scores.c27
-rw-r--r--src/assign/assign_blast_scores.h12
-rw-r--r--src/updator.c20
6 files changed, 127 insertions, 24 deletions
diff --git a/configure.ac b/configure.ac
index 59c5f36..f1ecdf9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7,20 +7,16 @@ AC_CONFIG_FILES([
src/Makefile
])
-################
-# MODULE: HDF5 #
-################
+# Headers Checks
-AC_SEARCH_LIBS([H5Fcreate],[hdf5],[],
-[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)],
-[-lirc -lmpi -lz -lsz])
+AC_CHECK_HEADERS([hdf5.h],[],
+[AC_MSG_ERROR("The HDF5 headers are needed to build the system.")])
-AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[],
-[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)],
-[-lirc -lhdf5 -lmpi -lz -lsz])
+AC_CHECK_HEADERS([petscconf.h],[],
+[AC_MSG_ERROR("The PETSc headers are needed to build the system.")])
-AC_CHECK_HEADERS([hdf5.h],[],
-[AC_MSG_ERROR(The HDF5 headers are needed to build the system.)])
+AC_CHECK_HEADERS([petscmat.h],[],
+[AC_MSG_ERROR("The PETSc headers are needed to build the system.")])
########################
# MODULE: NCBI Toolkit #
@@ -28,18 +24,52 @@ AC_CHECK_HEADERS([hdf5.h],[],
# Check for the NCBI ToolBox libraries.
AC_SEARCH_LIBS([log10],[m],[],
-[AC_MSG_ERROR(The C Math Library is needed to build the system.)])
+[AC_MSG_ERROR("The C Math Library is needed to build the system.")])
AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
+
+################
+# MODULE: HDF5 #
+################
+
+AC_SEARCH_LIBS([__intel_rtc_uninit_use],[irc])
+AC_SEARCH_LIBS([MPI_File_open],[mpi])
+AC_SEARCH_LIBS([compress2],[z])
+AC_SEARCH_LIBS([SZ_BufftoBuffCompress],[sz])
+
+AC_SEARCH_LIBS([H5Fcreate],[hdf5],[],
+[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")])
+
+AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[],
+[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")])
+
+#################
+# MODULE: PETSc #
+#################
+
+# The AC_SEARCH_LIBS other-libraries list is not particularly useful.
+# Autoconf determines if the other-libraries are necessary however in
+# the cases where they are indeed necessary they are not actually
+# added to the library list.
+
+# These are the dependencies. Random functions are selected from
+# these libraries from the list of otherwise undefined references at
+# link time.
+AC_SEARCH_LIBS([PetscInitialize],[petsc])
+AC_SEARCH_LIBS([VecNorm],[petscvec])
+
+# MatCreateSeqAIJ is acutally used in the code for this project.
+AC_SEARCH_LIBS([MatCreateSeqAIJ],[petscmat],[],
+[AC_MSG_ERROR("The Portable Extensible Tookit for Scientific Computation PETSc is needed to build the system.")])
AC_OUTPUT
diff --git a/doc/build-cobalt.txt b/doc/build-cobalt.txt
index 91b356b..76612a4 100644
--- a/doc/build-cobalt.txt
+++ b/doc/build-cobalt.txt
@@ -19,6 +19,24 @@ Building on NCSA Cobalt.
+phdf5-1.8.4
-- Configure
-export CPPFLAGS="-I$HDF5_HOME/include -I$NCBI_DIR/include"
-export LDFLAGS="-L/usr/apps/hdf/szip/lib -L$HDF5_HOME/lib -L$NCBI_DIR/lib -L/usr/local/intel/10.1.017/lib"
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/apps/hdf/szip/lib
+export PETSC_DIR=/u/ac/dpellegr/apps/Installers/petsc-3.0.0-p10
+
+export CPPFLAGS="\
+-I$HDF5_HOME/include \
+-I$NCBI_DIR/include \
+-I$PETSC_DIR/include \
+-I$PETSC_DIR/linux-gnu-c-debug/include"
+
+export LDFLAGS="\
+-L$HDF5_HOME/lib \
+-L/usr/apps/hdf/szip/lib \
+-L/usr/local/intel/10.1.017/lib \
+-L$NCBI_DIR/lib \
+-L$PETSC_DIR/linux-gnu-c-debug/lib"
+
+export LD_LIBRARY_PATH=\
+$LD_LIBRARY_PATH:\
+/usr/apps/hdf/szip/lib:\
+$PETSC_DIR/linux-gnu-c-debug/lib
+
+
diff --git a/src/Makefile.am b/src/Makefile.am
index 752c0fe..407d7cd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -10,10 +10,9 @@ aggregator_SOURCES = \
model/blast_scores_data_init.c \
model/sequence_data_init.c
-aggregator_LDADD = -lirc -lmpi -lsz -lz
-
updator_SOURCES = \
updator.c \
+ assign/assign_blast_scores.c \
assign/assign_protein_type.c \
error/check_error.c \
error/check_h5_error.c \
@@ -21,9 +20,8 @@ updator_SOURCES = \
model/gi_type_data_init.c \
model/sequence_data_init.c
-updator_LDADD = -lirc -lmpi -lsz -lz
-
noinst_HEADERS = \
+ assign/assign_blast_scores.h \
assign/assign_protein_type.h \
error/check_error.h \
error/check_h5_error.h \
diff --git a/src/assign/assign_blast_scores.c b/src/assign/assign_blast_scores.c
new file mode 100644
index 0000000..1cd491a
--- a/dev/null
+++ b/src/assign/assign_blast_scores.c
@@ -0,0 +1,27 @@
+#include "assign_blast_scores.h"
+#include <petscmat.h>
+#include <stdio.h>
+
+void
+assign_blast_scores (hid_t file_id)
+{
+ /*
+ * Create a sparse matrix for the pairwise protein BLAST scores.
+ *
+ * 138,769 proteins x 138,769 proteins = 19,256,835,361 pairs. At 8
+ * bytes (double) per pair this will require 154.1 GB of memory to
+ * hold the full structure. This is available on the compute nodes
+ * of Cobalt however use of a sparse matrix should reduce this size
+ * dramatically.
+ */
+
+ // http://www.netlib.org/blas/blast-forum/blas_sparse_proto.h
+ // blas_sparse_matrix M = BLAS_duscr_begin (138769, 138769);
+
+ Mat M;
+ MatCreateSeqAIJ (PETSC_COMM_SELF, 138769, 138769, 50, NULL, &M);
+
+ MatDestroy (M);
+
+ return;
+}
diff --git a/src/assign/assign_blast_scores.h b/src/assign/assign_blast_scores.h
new file mode 100644
index 0000000..2c52192
--- a/dev/null
+++ b/src/assign/assign_blast_scores.h
@@ -0,0 +1,12 @@
+#ifndef ASSIGN_BLAST_SCORES_H
+#define ASSIGN_BLAST_SCORES_H
+
+#include <hdf5.h>
+
+/*
+ * Pairwise BLAST of each protein in the NCBI Influenza Sequence
+ * Database against the full database.
+ */
+void assign_blast_scores (hid_t file_id);
+
+#endif // ASSIGN_BLAST_SCORES_H
diff --git a/src/updator.c b/src/updator.c
index f93e205..0bade65 100644
--- a/src/updator.c
+++ b/src/updator.c
@@ -3,16 +3,25 @@
*/
#include "assign/assign_protein_type.h"
+#include "assign/assign_blast_scores.h"
#include "error/check_h5_error.h"
+#include <petsc.h>
#include <stdio.h>
#include <signal.h>
#define FILE "influenza.h5"
int
-main ()
+main (int argc, char **argv)
{
/*
+ * Initialize the PETSc database and MPI.
+ *
+ * http://www.mcs.anl.gov/petsc/petsc-2/snapshots/petsc-dev/docs/manualpages/Sys/PetscInitialize.html#PetscInitialize
+ */
+ PetscInitialize (&argc, &argv, 0, 0);
+
+ /*
* Open the HDF5 file.
*/
hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT);
@@ -22,7 +31,14 @@ main ()
/*
* Assign protein type values to the sequence records.
*/
+ /*
assign_protein_type (file_id);
+ */
+
+ /*
+ * Assign pairwise BLAST scores.
+ */
+ assign_blast_scores (file_id);
/*
* Close the HDF5 file.
@@ -31,5 +47,7 @@ main ()
if (status < 0)
check_h5_error (__FILE__, __LINE__);
+ PetscFinalize ();
+
return 0;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.