summaryrefslogtreecommitdiffstats
Side-by-side diff
-rw-r--r--configure.ac62
-rw-r--r--doc/build-cobalt.txt24
-rw-r--r--src/Makefile.am6
-rw-r--r--src/assign/assign_blast_scores.c27
-rw-r--r--src/assign/assign_blast_scores.h12
-rw-r--r--src/updator.c20
6 files changed, 127 insertions, 24 deletions
diff --git a/configure.ac b/configure.ac
index 59c5f36..f1ecdf9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7,20 +7,16 @@ AC_CONFIG_FILES([
src/Makefile
])
-################
-# MODULE: HDF5 #
-################
+# Headers Checks
-AC_SEARCH_LIBS([H5Fcreate],[hdf5],[],
-[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)],
-[-lirc -lmpi -lz -lsz])
+AC_CHECK_HEADERS([hdf5.h],[],
+[AC_MSG_ERROR("The HDF5 headers are needed to build the system.")])
-AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[],
-[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)],
-[-lirc -lhdf5 -lmpi -lz -lsz])
+AC_CHECK_HEADERS([petscconf.h],[],
+[AC_MSG_ERROR("The PETSc headers are needed to build the system.")])
-AC_CHECK_HEADERS([hdf5.h],[],
-[AC_MSG_ERROR(The HDF5 headers are needed to build the system.)])
+AC_CHECK_HEADERS([petscmat.h],[],
+[AC_MSG_ERROR("The PETSc headers are needed to build the system.")])
########################
# MODULE: NCBI Toolkit #
@@ -28,18 +24,52 @@ AC_CHECK_HEADERS([hdf5.h],[],
# Check for the NCBI ToolBox libraries.
AC_SEARCH_LIBS([log10],[m],[],
-[AC_MSG_ERROR(The C Math Library is needed to build the system.)])
+[AC_MSG_ERROR("The C Math Library is needed to build the system.")])
AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[],
-[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)])
+[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")])
+
+################
+# MODULE: HDF5 #
+################
+
+AC_SEARCH_LIBS([__intel_rtc_uninit_use],[irc])
+AC_SEARCH_LIBS([MPI_File_open],[mpi])
+AC_SEARCH_LIBS([compress2],[z])
+AC_SEARCH_LIBS([SZ_BufftoBuffCompress],[sz])
+
+AC_SEARCH_LIBS([H5Fcreate],[hdf5],[],
+[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")])
+
+AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[],
+[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")])
+
+#################
+# MODULE: PETSc #
+#################
+
+# The AC_SEARCH_LIBS other-libraries list is not particularly useful.
+# Autoconf determines if the other-libraries are necessary however in
+# the cases where they are indeed necessary they are not actually
+# added to the library list.
+
+# These are the dependencies. Random functions are selected from
+# these libraries from the list of otherwise undefined references at
+# link time.
+AC_SEARCH_LIBS([PetscInitialize],[petsc])
+AC_SEARCH_LIBS([VecNorm],[petscvec])
+
+# MatCreateSeqAIJ is acutally used in the code for this project.
+AC_SEARCH_LIBS([MatCreateSeqAIJ],[petscmat],[],
+[AC_MSG_ERROR("The Portable Extensible Tookit for Scientific Computation PETSc is needed to build the system.")])
AC_OUTPUT
diff --git a/doc/build-cobalt.txt b/doc/build-cobalt.txt
index 91b356b..76612a4 100644
--- a/doc/build-cobalt.txt
+++ b/doc/build-cobalt.txt
@@ -19,6 +19,24 @@ Building on NCSA Cobalt.
+phdf5-1.8.4
-- Configure
-export CPPFLAGS="-I$HDF5_HOME/include -I$NCBI_DIR/include"
-export LDFLAGS="-L/usr/apps/hdf/szip/lib -L$HDF5_HOME/lib -L$NCBI_DIR/lib -L/usr/local/intel/10.1.017/lib"
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/apps/hdf/szip/lib
+export PETSC_DIR=/u/ac/dpellegr/apps/Installers/petsc-3.0.0-p10
+
+export CPPFLAGS="\
+-I$HDF5_HOME/include \
+-I$NCBI_DIR/include \
+-I$PETSC_DIR/include \
+-I$PETSC_DIR/linux-gnu-c-debug/include"
+
+export LDFLAGS="\
+-L$HDF5_HOME/lib \
+-L/usr/apps/hdf/szip/lib \
+-L/usr/local/intel/10.1.017/lib \
+-L$NCBI_DIR/lib \
+-L$PETSC_DIR/linux-gnu-c-debug/lib"
+
+export LD_LIBRARY_PATH=\
+$LD_LIBRARY_PATH:\
+/usr/apps/hdf/szip/lib:\
+$PETSC_DIR/linux-gnu-c-debug/lib
+
+
diff --git a/src/Makefile.am b/src/Makefile.am
index 752c0fe..407d7cd 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -10,10 +10,9 @@ aggregator_SOURCES = \
model/blast_scores_data_init.c \
model/sequence_data_init.c
-aggregator_LDADD = -lirc -lmpi -lsz -lz
-
updator_SOURCES = \
updator.c \
+ assign/assign_blast_scores.c \
assign/assign_protein_type.c \
error/check_error.c \
error/check_h5_error.c \
@@ -21,9 +20,8 @@ updator_SOURCES = \
model/gi_type_data_init.c \
model/sequence_data_init.c
-updator_LDADD = -lirc -lmpi -lsz -lz
-
noinst_HEADERS = \
+ assign/assign_blast_scores.h \
assign/assign_protein_type.h \
error/check_error.h \
error/check_h5_error.h \
diff --git a/src/assign/assign_blast_scores.c b/src/assign/assign_blast_scores.c
new file mode 100644
index 0000000..1cd491a
--- a/dev/null
+++ b/src/assign/assign_blast_scores.c
@@ -0,0 +1,27 @@
+#include "assign_blast_scores.h"
+#include <petscmat.h>
+#include <stdio.h>
+
+void
+assign_blast_scores (hid_t file_id)
+{
+ /*
+ * Create a sparse matrix for the pairwise protein BLAST scores.
+ *
+ * 138,769 proteins x 138,769 proteins = 19,256,835,361 pairs. At 8
+ * bytes (double) per pair this will require 154.1 GB of memory to
+ * hold the full structure. This is available on the compute nodes
+ * of Cobalt however use of a sparse matrix should reduce this size
+ * dramatically.
+ */
+
+ // http://www.netlib.org/blas/blast-forum/blas_sparse_proto.h
+ // blas_sparse_matrix M = BLAS_duscr_begin (138769, 138769);
+
+ Mat M;
+ MatCreateSeqAIJ (PETSC_COMM_SELF, 138769, 138769, 50, NULL, &M);
+
+ MatDestroy (M);
+
+ return;
+}
diff --git a/src/assign/assign_blast_scores.h b/src/assign/assign_blast_scores.h
new file mode 100644
index 0000000..2c52192
--- a/dev/null
+++ b/src/assign/assign_blast_scores.h
@@ -0,0 +1,12 @@
+#ifndef ASSIGN_BLAST_SCORES_H
+#define ASSIGN_BLAST_SCORES_H
+
+#include <hdf5.h>
+
+/*
+ * Pairwise BLAST of each protein in the NCBI Influenza Sequence
+ * Database against the full database.
+ */
+void assign_blast_scores (hid_t file_id);
+
+#endif // ASSIGN_BLAST_SCORES_H
diff --git a/src/updator.c b/src/updator.c
index f93e205..0bade65 100644
--- a/src/updator.c
+++ b/src/updator.c
@@ -3,16 +3,25 @@
*/
#include "assign/assign_protein_type.h"
+#include "assign/assign_blast_scores.h"
#include "error/check_h5_error.h"
+#include <petsc.h>
#include <stdio.h>
#include <signal.h>
#define FILE "influenza.h5"
int
-main ()
+main (int argc, char **argv)
{
/*
+ * Initialize the PETSc database and MPI.
+ *
+ * http://www.mcs.anl.gov/petsc/petsc-2/snapshots/petsc-dev/docs/manualpages/Sys/PetscInitialize.html#PetscInitialize
+ */
+ PetscInitialize (&argc, &argv, 0, 0);
+
+ /*
* Open the HDF5 file.
*/
hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT);
@@ -22,7 +31,14 @@ main ()
/*
* Assign protein type values to the sequence records.
*/
+ /*
assign_protein_type (file_id);
+ */
+
+ /*
+ * Assign pairwise BLAST scores.
+ */
+ assign_blast_scores (file_id);
/*
* Close the HDF5 file.
@@ -31,5 +47,7 @@ main ()
if (status < 0)
check_h5_error (__FILE__, __LINE__);
+ PetscFinalize ();
+
return 0;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.