summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-15 22:28:47 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-15 22:28:47 (GMT)
commit75da0318d72283af810a54546ebc2a802f366e6f (patch) (unidiff)
treedbbf065580f79841f40b4fb3173f07716d2a8a04
parentf9eeda9ed7a9cf414b42549a2b0771ea850761b0 (diff)
downloadexp007-75da0318d72283af810a54546ebc2a802f366e6f.zip
exp007-75da0318d72283af810a54546ebc2a802f366e6f.tar.gz
exp007-75da0318d72283af810a54546ebc2a802f366e6f.tar.bz2
Code compiles and creates an HDF5 file containing a table with one
record of influenza_aa.dat populated.
-rw-r--r--Makefile.am1
-rw-r--r--src/Makefile.am12
-rw-r--r--src/aggregator.c30
-rw-r--r--src/load_influenza_aa_dat.c152
-rw-r--r--src/load_influenza_aa_dat.h13
5 files changed, 208 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..af437a6
--- a/dev/null
+++ b/Makefile.am
@@ -0,0 +1 @@
SUBDIRS = src
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..7cb4282
--- a/dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,12 @@
1bin_PROGRAMS = aggregator
2
3aggregator_SOURCES = \
4 aggregator.c \
5 load_influenza_aa_dat.c
6
7aggregator_LDADD = -lhdf5
8
9noinst_HEADERS = \
10 load_influenza_aa_dat.h
11
12AM_CFLAGS = -Wall -std=gnu99 -ggdb
diff --git a/src/aggregator.c b/src/aggregator.c
new file mode 100644
index 0000000..ae5aa60
--- a/dev/null
+++ b/src/aggregator.c
@@ -0,0 +1,30 @@
1/*
2 * Aggregate the collected influenza data into a single HDF5
3 * container.
4 */
5
6#include <hdf5.h>
7#include "load_influenza_aa_dat.h"
8
9#define FILE "influenza.h5"
10
11int
12main()
13{
14 /*
15 * Create the HDF5 file.
16 */
17 hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
18
19 /*
20 * Load the supplementary protein data file.
21 */
22 load_influenza_aa_dat (file_id);
23
24 /*
25 * Close the HD5 file.
26 */
27 herr_t status = H5Fclose (file_id);
28
29 return 0;
30}
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
new file mode 100644
index 0000000..72aacb5
--- a/dev/null
+++ b/src/load_influenza_aa_dat.c
@@ -0,0 +1,152 @@
1#include "load_influenza_aa_dat.h"
2#include "hdf5_hl.h"
3
4#define NFIELDS (hsize_t) 11
5//#define NRECORDS (hsize_t) 138052
6#define NRECORDS (hsize_t) 1
7#define TABLE_NAME "influenza_aa.dat"
8
9void
10load_influenza_aa_dat (hid_t file_id)
11{
12 /*
13 * Model the data using native types.
14 */
15 typedef struct supplementary_data
16 {
17 char genbank_accession_number[9];
18 char host[15];
19 int genome_segment_number;
20 char subtype[7];
21 char country[25];
22 int year;
23 int sequence_length;
24 char virus_name[196];
25 char age[17];
26 char gender[6];
27 char full_length_indicator[4];
28 } supplementary_data;
29
30 /*
31 * Use an HDF5 Table for storage.
32 * http://www.hdfgroup.org/HDF5/Tutor/h5table.html
33 */
34
35 /*
36 * "Calculate the size and the offsets of our struct members in
37 * memory."
38 */
39 size_t dst_size = sizeof (supplementary_data);
40 size_t dst_offset[NFIELDS] = { HOFFSET ( supplementary_data, genbank_accession_number ),
41 HOFFSET ( supplementary_data, host ),
42 HOFFSET ( supplementary_data, genome_segment_number ),
43 HOFFSET ( supplementary_data, subtype ),
44 HOFFSET ( supplementary_data, country ),
45 HOFFSET ( supplementary_data, year ),
46 HOFFSET ( supplementary_data, sequence_length ),
47 HOFFSET ( supplementary_data, virus_name ),
48 HOFFSET ( supplementary_data, age ),
49 HOFFSET ( supplementary_data, gender ),
50 HOFFSET ( supplementary_data, full_length_indicator )};
51
52 /*
53
54 Only needed for reading?
55
56 supplementary_data dst_buf[NRECORDS];
57
58 size_t dst_sizes[NFIELDS] = { sizeof ( dst_buf[0].genbank_accession_number ),
59 sizeof ( dst_buf[0].host ),
60 sizeof ( dst_buf[0].genome_segment_number ),
61 sizeof ( dst_buf[0].subtype ),
62 sizeof ( dst_buf[0].country ),
63 sizeof ( dst_buf[0].year ),
64 sizeof ( dst_buf[0].sequence_length ),
65 sizeof ( dst_buf[0].virus_name ),
66 sizeof ( dst_buf[0].age ),
67 sizeof ( dst_buf[0].gender ),
68 sizeof ( dst_buf[0].full_length_indicator)};
69 */
70
71 /*
72 * "Define field information."
73 */
74 const char *field_names[NFIELDS] =
75 { "GenBank accession number",
76 "Host",
77 "Genome segment number",
78 "Subtype",
79 "Country",
80 "Year",
81 "Sequence length",
82 "Virus name",
83 "Age",
84 "Gender",
85 "Full-length Indicator" };
86 hsize_t chunk_size = 10;
87 int *fill_data = NULL;
88 int compress = 0;
89
90 /*
91 * "Initialize field type."
92 */
93 hid_t field_type[NFIELDS];
94
95 hid_t genbank_accession_number_type = H5Tcopy ( H5T_C_S1 );
96 H5Tset_size ( genbank_accession_number_type, 9 );
97 field_type[0] = genbank_accession_number_type;
98
99 hid_t host_type = H5Tcopy ( H5T_C_S1 );
100 H5Tset_size ( host_type, 15 );
101 field_type[1] = host_type;
102
103 field_type[2] = H5T_NATIVE_INT;
104
105 hid_t subtype_type = H5Tcopy ( H5T_C_S1 );
106 H5Tset_size (subtype_type, 7 );
107 field_type[3] = subtype_type;
108
109 hid_t country_type = H5Tcopy ( H5T_C_S1 );
110 H5Tset_size (country_type, 25 );
111 field_type[4] = country_type;
112
113 field_type[5] = H5T_NATIVE_INT;
114
115 field_type[6] = H5T_NATIVE_INT;
116
117 hid_t virus_name_type = H5Tcopy ( H5T_C_S1 );
118 H5Tset_size (virus_name_type, 196);
119 field_type[7] = virus_name_type;
120
121 hid_t age_type = H5Tcopy (H5T_C_S1);
122 H5Tset_size (age_type, 17);
123 field_type[8] = age_type;
124
125 hid_t gender_type = H5Tcopy (H5T_C_S1);
126 H5Tset_size (gender_type, 6);
127 field_type[9] = gender_type;
128
129 hid_t full_length_indicator_type = H5Tcopy (H5T_C_S1);
130 H5Tset_size (full_length_indicator_type, 4);
131 field_type[10] = full_length_indicator_type;
132
133 supplementary_data p_data[NRECORDS] = {
134 {"BAC53999", "Human", 7, "", "Zambia", 1999, 109, "Influenza B virus (B/Lusaka/270/99)",
135 "", "", "yes"}
136 };
137
138 herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, TABLE_NAME,NFIELDS,NRECORDS,
139 dst_size,field_names, dst_offset, field_type,
140 chunk_size, fill_data, compress, p_data);
141
142 H5Tclose (genbank_accession_number_type);
143 H5Tclose (host_type);
144 H5Tclose (subtype_type);
145 H5Tclose (country_type);
146 H5Tclose (virus_name_type);
147 H5Tclose (age_type);
148 H5Tclose (gender_type);
149 H5Tclose (full_length_indicator_type);
150
151 return;
152}
diff --git a/src/load_influenza_aa_dat.h b/src/load_influenza_aa_dat.h
new file mode 100644
index 0000000..c431e67
--- a/dev/null
+++ b/src/load_influenza_aa_dat.h
@@ -0,0 +1,13 @@
1#ifndef LOAD_INFLUENZA_AA_DAT_H
2#define LOAD_INFLUENZA_AA_DAT_H
3
4#include <hdf5.h>
5
6/*
7 * Load the supplementary protein data from the NCBI influenza_aa.dat
8 * file.
9 */
10void
11load_influenza_aa_dat (hid_t file_id);
12
13#endif // LOAD_INFLUENZA_AA_DAT_H

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.