summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-15 22:28:47 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-15 22:28:47 (GMT)
commit75da0318d72283af810a54546ebc2a802f366e6f (patch) (side-by-side diff)
treedbbf065580f79841f40b4fb3173f07716d2a8a04
parentf9eeda9ed7a9cf414b42549a2b0771ea850761b0 (diff)
downloadexp007-75da0318d72283af810a54546ebc2a802f366e6f.zip
exp007-75da0318d72283af810a54546ebc2a802f366e6f.tar.gz
exp007-75da0318d72283af810a54546ebc2a802f366e6f.tar.bz2
Code compiles and creates an HDF5 file containing a table with one
record of influenza_aa.dat populated.
-rw-r--r--Makefile.am1
-rw-r--r--src/Makefile.am12
-rw-r--r--src/aggregator.c30
-rw-r--r--src/load_influenza_aa_dat.c152
-rw-r--r--src/load_influenza_aa_dat.h13
5 files changed, 208 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..af437a6
--- a/dev/null
+++ b/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..7cb4282
--- a/dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,12 @@
+bin_PROGRAMS = aggregator
+
+aggregator_SOURCES = \
+ aggregator.c \
+ load_influenza_aa_dat.c
+
+aggregator_LDADD = -lhdf5
+
+noinst_HEADERS = \
+ load_influenza_aa_dat.h
+
+AM_CFLAGS = -Wall -std=gnu99 -ggdb
diff --git a/src/aggregator.c b/src/aggregator.c
new file mode 100644
index 0000000..ae5aa60
--- a/dev/null
+++ b/src/aggregator.c
@@ -0,0 +1,30 @@
+/*
+ * Aggregate the collected influenza data into a single HDF5
+ * container.
+ */
+
+#include <hdf5.h>
+#include "load_influenza_aa_dat.h"
+
+#define FILE "influenza.h5"
+
+int
+main()
+{
+ /*
+ * Create the HDF5 file.
+ */
+ hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+ /*
+ * Load the supplementary protein data file.
+ */
+ load_influenza_aa_dat (file_id);
+
+ /*
+ * Close the HD5 file.
+ */
+ herr_t status = H5Fclose (file_id);
+
+ return 0;
+}
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
new file mode 100644
index 0000000..72aacb5
--- a/dev/null
+++ b/src/load_influenza_aa_dat.c
@@ -0,0 +1,152 @@
+#include "load_influenza_aa_dat.h"
+#include "hdf5_hl.h"
+
+#define NFIELDS (hsize_t) 11
+//#define NRECORDS (hsize_t) 138052
+#define NRECORDS (hsize_t) 1
+#define TABLE_NAME "influenza_aa.dat"
+
+void
+load_influenza_aa_dat (hid_t file_id)
+{
+ /*
+ * Model the data using native types.
+ */
+ typedef struct supplementary_data
+ {
+ char genbank_accession_number[9];
+ char host[15];
+ int genome_segment_number;
+ char subtype[7];
+ char country[25];
+ int year;
+ int sequence_length;
+ char virus_name[196];
+ char age[17];
+ char gender[6];
+ char full_length_indicator[4];
+ } supplementary_data;
+
+ /*
+ * Use an HDF5 Table for storage.
+ * http://www.hdfgroup.org/HDF5/Tutor/h5table.html
+ */
+
+ /*
+ * "Calculate the size and the offsets of our struct members in
+ * memory."
+ */
+ size_t dst_size = sizeof (supplementary_data);
+ size_t dst_offset[NFIELDS] = { HOFFSET ( supplementary_data, genbank_accession_number ),
+ HOFFSET ( supplementary_data, host ),
+ HOFFSET ( supplementary_data, genome_segment_number ),
+ HOFFSET ( supplementary_data, subtype ),
+ HOFFSET ( supplementary_data, country ),
+ HOFFSET ( supplementary_data, year ),
+ HOFFSET ( supplementary_data, sequence_length ),
+ HOFFSET ( supplementary_data, virus_name ),
+ HOFFSET ( supplementary_data, age ),
+ HOFFSET ( supplementary_data, gender ),
+ HOFFSET ( supplementary_data, full_length_indicator )};
+
+ /*
+
+ Only needed for reading?
+
+ supplementary_data dst_buf[NRECORDS];
+
+ size_t dst_sizes[NFIELDS] = { sizeof ( dst_buf[0].genbank_accession_number ),
+ sizeof ( dst_buf[0].host ),
+ sizeof ( dst_buf[0].genome_segment_number ),
+ sizeof ( dst_buf[0].subtype ),
+ sizeof ( dst_buf[0].country ),
+ sizeof ( dst_buf[0].year ),
+ sizeof ( dst_buf[0].sequence_length ),
+ sizeof ( dst_buf[0].virus_name ),
+ sizeof ( dst_buf[0].age ),
+ sizeof ( dst_buf[0].gender ),
+ sizeof ( dst_buf[0].full_length_indicator)};
+ */
+
+ /*
+ * "Define field information."
+ */
+ const char *field_names[NFIELDS] =
+ { "GenBank accession number",
+ "Host",
+ "Genome segment number",
+ "Subtype",
+ "Country",
+ "Year",
+ "Sequence length",
+ "Virus name",
+ "Age",
+ "Gender",
+ "Full-length Indicator" };
+ hsize_t chunk_size = 10;
+ int *fill_data = NULL;
+ int compress = 0;
+
+ /*
+ * "Initialize field type."
+ */
+ hid_t field_type[NFIELDS];
+
+ hid_t genbank_accession_number_type = H5Tcopy ( H5T_C_S1 );
+ H5Tset_size ( genbank_accession_number_type, 9 );
+ field_type[0] = genbank_accession_number_type;
+
+ hid_t host_type = H5Tcopy ( H5T_C_S1 );
+ H5Tset_size ( host_type, 15 );
+ field_type[1] = host_type;
+
+ field_type[2] = H5T_NATIVE_INT;
+
+ hid_t subtype_type = H5Tcopy ( H5T_C_S1 );
+ H5Tset_size (subtype_type, 7 );
+ field_type[3] = subtype_type;
+
+ hid_t country_type = H5Tcopy ( H5T_C_S1 );
+ H5Tset_size (country_type, 25 );
+ field_type[4] = country_type;
+
+ field_type[5] = H5T_NATIVE_INT;
+
+ field_type[6] = H5T_NATIVE_INT;
+
+ hid_t virus_name_type = H5Tcopy ( H5T_C_S1 );
+ H5Tset_size (virus_name_type, 196);
+ field_type[7] = virus_name_type;
+
+ hid_t age_type = H5Tcopy (H5T_C_S1);
+ H5Tset_size (age_type, 17);
+ field_type[8] = age_type;
+
+ hid_t gender_type = H5Tcopy (H5T_C_S1);
+ H5Tset_size (gender_type, 6);
+ field_type[9] = gender_type;
+
+ hid_t full_length_indicator_type = H5Tcopy (H5T_C_S1);
+ H5Tset_size (full_length_indicator_type, 4);
+ field_type[10] = full_length_indicator_type;
+
+ supplementary_data p_data[NRECORDS] = {
+ {"BAC53999", "Human", 7, "", "Zambia", 1999, 109, "Influenza B virus (B/Lusaka/270/99)",
+ "", "", "yes"}
+ };
+
+ herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, TABLE_NAME,NFIELDS,NRECORDS,
+ dst_size,field_names, dst_offset, field_type,
+ chunk_size, fill_data, compress, p_data);
+
+ H5Tclose (genbank_accession_number_type);
+ H5Tclose (host_type);
+ H5Tclose (subtype_type);
+ H5Tclose (country_type);
+ H5Tclose (virus_name_type);
+ H5Tclose (age_type);
+ H5Tclose (gender_type);
+ H5Tclose (full_length_indicator_type);
+
+ return;
+}
diff --git a/src/load_influenza_aa_dat.h b/src/load_influenza_aa_dat.h
new file mode 100644
index 0000000..c431e67
--- a/dev/null
+++ b/src/load_influenza_aa_dat.h
@@ -0,0 +1,13 @@
+#ifndef LOAD_INFLUENZA_AA_DAT_H
+#define LOAD_INFLUENZA_AA_DAT_H
+
+#include <hdf5.h>
+
+/*
+ * Load the supplementary protein data from the NCBI influenza_aa.dat
+ * file.
+ */
+void
+load_influenza_aa_dat (hid_t file_id);
+
+#endif // LOAD_INFLUENZA_AA_DAT_H

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.