-rw-r--r-- | analysis/year.R | 20 | ||||
-rw-r--r-- | src/Makefile.am | 39 | ||||
-rw-r--r-- | src/aggregator.c | 6 | ||||
-rw-r--r-- | src/assign/assign_protein_type.c (renamed from src/assign_protein_type.c) | 87 | ||||
-rw-r--r-- | src/assign/assign_protein_type.h (renamed from src/assign_protein_type.h) | 0 | ||||
-rw-r--r-- | src/error/check_error.c (renamed from src/check_error.c) | 0 | ||||
-rw-r--r-- | src/error/check_error.h (renamed from src/check_error.h) | 0 | ||||
-rw-r--r-- | src/error/check_h5_error.c (renamed from src/check_h5_error.c) | 0 | ||||
-rw-r--r-- | src/error/check_h5_error.h (renamed from src/check_h5_error.h) | 0 | ||||
-rw-r--r-- | src/error/check_ncbi_error.c (renamed from src/check_ncbi_error.c) | 0 | ||||
-rw-r--r-- | src/error/check_ncbi_error.h (renamed from src/check_ncbi_error.h) | 0 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.c (renamed from src/load_influenza_aa_dat.c) | 4 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.h (renamed from src/load_influenza_aa_dat.h) | 0 | ||||
-rw-r--r-- | src/load/load_influenza_faa.c (renamed from src/load_influenza_faa.c) | 10 | ||||
-rw-r--r-- | src/load/load_influenza_faa.h (renamed from src/load_influenza_faa.h) | 0 | ||||
-rw-r--r-- | src/model/gi_type_data.h | 21 | ||||
-rw-r--r-- | src/model/gi_type_data_init.c | 36 | ||||
-rw-r--r-- | src/model/gi_type_data_init.h | 14 | ||||
-rw-r--r-- | src/model/sequence_data.h (renamed from src/sequence_data.h) | 5 | ||||
-rw-r--r-- | src/model/sequence_data_init.c (renamed from src/sequence_data_init.c) | 6 | ||||
-rw-r--r-- | src/model/sequence_data_init.h (renamed from src/sequence_data_init.h) | 0 | ||||
-rw-r--r-- | src/updator.c | 4 |
22 files changed, 181 insertions, 71 deletions
diff --git a/analysis/year.R b/analysis/year.R index b7da3c8..6d68925 100644 --- a/analysis/year.R +++ b/analysis/year.R | |||
@@ -5,21 +5,23 @@ require(hdf5); | |||
5 | hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE); | 5 | hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE); |
6 | 6 | ||
7 | A <- influenza.aa.dat; | 7 | A <- influenza.aa.dat; |
8 | |||
9 | B <- influenza.faa; | 8 | B <- influenza.faa; |
10 | 9 | ||
10 | # Join the two tables by GB value. | ||
11 | C <- merge (A, B, by.x = "GenBank accession number", by.y = "GB"); | ||
12 | |||
11 | # All records for 1918. Based on code from | 13 | # All records for 1918. Based on code from |
12 | # http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations | 14 | # http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations |
13 | C <- A[A$Year == 1918, ] | 15 | D <- C[C$Year == 1918, ] |
14 | 16 | ||
15 | # Countries represented in the 1918 dataset. | 17 | summary (D); |
16 | C$Country; | ||
17 | 18 | ||
18 | # All record with a year value. | 19 | # Countries represented in the 1918 dataset. |
19 | D <- A[A$Year != 0, ]; | 20 | D$Country; |
20 | 21 | ||
21 | hist(D$Year); | 22 | D[D$"Protein Type" == "HA", ] |
22 | 23 | ||
23 | B[B$GI == 305182, ] | 24 | # All record with a year value. |
25 | E <- A[A$Year != 0, ]; | ||
24 | 26 | ||
25 | B[B$"Protein Type" == "HA", ] | 27 | hist(E$Year); |
diff --git a/src/Makefile.am b/src/Makefile.am index 6dd8e16..06a6f5c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am | |||
@@ -2,28 +2,31 @@ bin_PROGRAMS = aggregator updator | |||
2 | 2 | ||
3 | aggregator_SOURCES = \ | 3 | aggregator_SOURCES = \ |
4 | aggregator.c \ | 4 | aggregator.c \ |
5 | check_error.c \ | 5 | error/check_error.c \ |
6 | check_h5_error.c \ | 6 | error/check_h5_error.c \ |
7 | load_influenza_aa_dat.c \ | 7 | load/load_influenza_aa_dat.c \ |
8 | load_influenza_faa.c \ | 8 | load/load_influenza_faa.c \ |
9 | sequence_data_init.c | 9 | model/sequence_data_init.c |
10 | 10 | ||
11 | updator_SOURCES = \ | 11 | updator_SOURCES = \ |
12 | updator.c \ | 12 | updator.c \ |
13 | check_error.c \ | 13 | assign/assign_protein_type.c \ |
14 | check_h5_error.c \ | 14 | error/check_error.c \ |
15 | check_ncbi_error.c \ | 15 | error/check_h5_error.c \ |
16 | assign_protein_type.c \ | 16 | error/check_ncbi_error.c \ |
17 | sequence_data_init.c | 17 | model/gi_type_data_init.c \ |
18 | model/sequence_data_init.c | ||
18 | 19 | ||
19 | noinst_HEADERS = \ | 20 | noinst_HEADERS = \ |
20 | assign_protein_type.h \ | 21 | assign/assign_protein_type.h \ |
21 | check_error.h \ | 22 | error/check_error.h \ |
22 | check_h5_error.h \ | 23 | error/check_h5_error.h \ |
23 | check_ncbi_error.h \ | 24 | error/check_ncbi_error.h \ |
24 | load_influenza_aa_dat.h \ | 25 | load/load_influenza_aa_dat.h \ |
25 | load_influenza_faa.h \ | 26 | load/load_influenza_faa.h \ |
26 | sequence_data.h \ | 27 | model/gi_type_data.h \ |
27 | sequence_data_init.h | 28 | model/gi_type_data_init.h \ |
29 | model/sequence_data.h \ | ||
30 | model/sequence_data_init.h | ||
28 | 31 | ||
29 | AM_CFLAGS = -Wall -std=gnu99 -ggdb | 32 | AM_CFLAGS = -Wall -std=gnu99 -ggdb |
diff --git a/src/aggregator.c b/src/aggregator.c index 07aba39..c00d912 100644 --- a/src/aggregator.c +++ b/src/aggregator.c | |||
@@ -3,9 +3,9 @@ | |||
3 | * container. | 3 | * container. |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include "check_h5_error.h" | 6 | #include "error/check_h5_error.h" |
7 | #include "load_influenza_aa_dat.h" | 7 | #include "load/load_influenza_aa_dat.h" |
8 | #include "load_influenza_faa.h" | 8 | #include "load/load_influenza_faa.h" |
9 | 9 | ||
10 | #define FILE "influenza.h5" | 10 | #define FILE "influenza.h5" |
11 | 11 | ||
diff --git a/src/assign_protein_type.c b/src/assign/assign_protein_type.c index ec3a959..0a11c23 100644 --- a/src/assign_protein_type.c +++ b/src/assign/assign_protein_type.c | |||
@@ -1,8 +1,10 @@ | |||
1 | #include "assign_protein_type.h" | 1 | #include "assign_protein_type.h" |
2 | #include "check_ncbi_error.h" | 2 | #include "error/check_ncbi_error.h" |
3 | #include "check_h5_error.h" | 3 | #include "error/check_h5_error.h" |
4 | #include "sequence_data.h" | 4 | #include "model/gi_type_data.h" |
5 | #include "sequence_data_init.h" | 5 | #include "model/gi_type_data_init.h" |
6 | #include "model/sequence_data.h" | ||
7 | #include "model/sequence_data_init.h" | ||
6 | #include <ncbi.h> | 8 | #include <ncbi.h> |
7 | #include <readdb.h> | 9 | #include <readdb.h> |
8 | #include <blast.h> | 10 | #include <blast.h> |
@@ -12,6 +14,16 @@ | |||
12 | #include <error.h> | 14 | #include <error.h> |
13 | 15 | ||
14 | /* | 16 | /* |
17 | * todo: Write results to an independent table by GI so that they can | ||
18 | * be reused when the input tables influenza.faa and influenza_aa.dat | ||
19 | * are replaced with new data. Since GI values are unique and | ||
20 | * persistent the assignment of a protein type should stay valid | ||
21 | * unless the list of reference sequences is updated. | ||
22 | * | ||
23 | * todo: Save the species type as a new field { A, B, C }. | ||
24 | */ | ||
25 | |||
26 | /* | ||
15 | * BLAST database containing all of the influenza protein sequences. | 27 | * BLAST database containing all of the influenza protein sequences. |
16 | */ | 28 | */ |
17 | #define SEQDB "/home/don/exp004/influenzadb/influenzadb" | 29 | #define SEQDB "/home/don/exp004/influenzadb/influenzadb" |
@@ -57,34 +69,57 @@ assign_protein_type (hid_t file_id) | |||
57 | ValNodePtr error_returns = NULL; | 69 | ValNodePtr error_returns = NULL; |
58 | 70 | ||
59 | /* | 71 | /* |
60 | * Read the data from HDF5 file. | 72 | * Read the data from HDF5 influenza.faa. |
61 | */ | 73 | */ |
62 | hsize_t nfields; | 74 | hsize_t faa_nfields; |
63 | hsize_t nrecords; | 75 | hsize_t faa_nrecords; |
64 | herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields, | 76 | herr_t status = H5TBget_table_info (file_id, "influenza.faa", &faa_nfields, |
65 | &nrecords); | 77 | &faa_nrecords); |
78 | if (status < 0) | ||
79 | check_h5_error (status, __FILE__, __LINE__); | ||
80 | |||
81 | sequence_data* faa_buf = malloc (sizeof(sequence_data) * faa_nrecords); | ||
82 | |||
83 | size_t faa_size; | ||
84 | size_t faa_offset[SEQUENCE_DATA_FIELD_NUM]; | ||
85 | size_t faa_sizes[SEQUENCE_DATA_FIELD_NUM]; | ||
86 | hid_t faa_field_type[SEQUENCE_DATA_FIELD_NUM]; | ||
87 | sequence_data_init (&faa_size, faa_offset, faa_sizes, faa_field_type); | ||
88 | |||
89 | status = H5TBread_table (file_id, "influenza.faa", faa_size, faa_offset, | ||
90 | faa_sizes, faa_buf); | ||
66 | if (status < 0) | 91 | if (status < 0) |
67 | check_h5_error (status, __FILE__, __LINE__); | 92 | check_h5_error (status, __FILE__, __LINE__); |
68 | 93 | ||
69 | sequence_data* dst_buf = malloc (sizeof(sequence_data) * nrecords); | 94 | /* |
95 | * Read the data from HDF5 gi_type_data. | ||
96 | */ | ||
97 | hsize_t gi_nfields; | ||
98 | hsize_t gi_nrecords; | ||
99 | status = H5TBget_table_info (file_id, "gi_type_data", &gi_nfields, | ||
100 | &gi_nrecords); | ||
101 | if (status < 0) | ||
102 | check_h5_error (status, __FILE__, __LINE__); | ||
103 | |||
104 | gi_type_data* gi_buf = malloc (sizeof(gi_type_data) * gi_nrecords); | ||
70 | 105 | ||
71 | size_t dst_size; | 106 | size_t gi_size; |
72 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; | 107 | size_t gi_offset[GI_TYPE_DATA_FIELD_NUM]; |
73 | size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM]; | 108 | size_t gi_sizes[GI_TYPE_DATA_FIELD_NUM]; |
74 | hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; | 109 | hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; |
75 | sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type); | 110 | gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); |
76 | 111 | ||
77 | status = H5TBread_table (file_id, "influenza.faa", dst_size, dst_offset, | 112 | status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, |
78 | dst_sizes, dst_buf); | 113 | gi_sizes, gi_buf); |
79 | if (status < 0) | 114 | if (status < 0) |
80 | check_h5_error (status, __FILE__, __LINE__); | 115 | check_h5_error (status, __FILE__, __LINE__); |
81 | 116 | ||
82 | /* | 117 | /* |
83 | * Assign protein types to records for which the field is empty. | 118 | * Assign protein types to records for which the field is empty. |
84 | */ | 119 | */ |
85 | printf ("Records to process: %i\n", (int)nrecords); | 120 | printf ("Records to process: %i\n", (int)faa_nrecords); |
86 | bool updates_pending = false; | 121 | bool updates_pending = false; |
87 | for (int i = 0; i < nrecords; i++) | 122 | for (int i = 0; i < faa_nrecords; i++) |
88 | { | 123 | { |
89 | 124 | ||
90 | if (dst_buf[i].protein_type[0] == '\0') { | 125 | if (dst_buf[i].protein_type[0] == '\0') { |
@@ -130,8 +165,15 @@ assign_protein_type (hid_t file_id) | |||
130 | Char target_id_buf[BUFFER_LEN + 1]; | 165 | Char target_id_buf[BUFFER_LEN + 1]; |
131 | SeqIdPtr target_id = SeqAlignId (seqalign, 1); | 166 | SeqIdPtr target_id = SeqAlignId (seqalign, 1); |
132 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); | 167 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); |
133 | strncpy (dst_buf[i].protein_type, &target_id_buf[6], | 168 | |
134 | sizeof (dst_buf[i].protein_type)); | 169 | // Species Type |
170 | dst_buf[i].type[0] = &target_id_buf[4]; | ||
171 | dst_buf[i].type[1] = '\0'; | ||
172 | |||
173 | // Protein Type | ||
174 | strncpy (dst_buf[i].protein, &target_id_buf[6], | ||
175 | sizeof (dst_buf[i].protein)); | ||
176 | |||
135 | updates_pending = true; | 177 | updates_pending = true; |
136 | } | 178 | } |
137 | 179 | ||
@@ -202,7 +244,8 @@ assign_protein_type (hid_t file_id) | |||
202 | updates_pending = false; | 244 | updates_pending = false; |
203 | } | 245 | } |
204 | 246 | ||
205 | free (dst_buf); | 247 | free (faa_buf); |
248 | free (gi_buf); | ||
206 | 249 | ||
207 | options = BLASTOptionDelete (options); | 250 | options = BLASTOptionDelete (options); |
208 | 251 | ||
diff --git a/src/assign_protein_type.h b/src/assign/assign_protein_type.h index 221154f..221154f 100644 --- a/src/assign_protein_type.h +++ b/src/assign/assign_protein_type.h | |||
diff --git a/src/check_error.c b/src/error/check_error.c index 4630b50..4630b50 100644 --- a/src/check_error.c +++ b/src/error/check_error.c | |||
diff --git a/src/check_error.h b/src/error/check_error.h index 2250c59..2250c59 100644 --- a/src/check_error.h +++ b/src/error/check_error.h | |||
diff --git a/src/check_h5_error.c b/src/error/check_h5_error.c index d90b21f..d90b21f 100644 --- a/src/check_h5_error.c +++ b/src/error/check_h5_error.c | |||
diff --git a/src/check_h5_error.h b/src/error/check_h5_error.h index e460e97..e460e97 100644 --- a/src/check_h5_error.h +++ b/src/error/check_h5_error.h | |||
diff --git a/src/check_ncbi_error.c b/src/error/check_ncbi_error.c index 6071d1a..6071d1a 100644 --- a/src/check_ncbi_error.c +++ b/src/error/check_ncbi_error.c | |||
diff --git a/src/check_ncbi_error.h b/src/error/check_ncbi_error.h index 45ac0ca..45ac0ca 100644 --- a/src/check_ncbi_error.h +++ b/src/error/check_ncbi_error.h | |||
diff --git a/src/load_influenza_aa_dat.c b/src/load/load_influenza_aa_dat.c index aed33e8..8bf47aa 100644 --- a/src/load_influenza_aa_dat.c +++ b/src/load/load_influenza_aa_dat.c | |||
@@ -6,8 +6,8 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "load_influenza_aa_dat.h" | 8 | #include "load_influenza_aa_dat.h" |
9 | #include "check_error.h" | 9 | #include "error/check_error.h" |
10 | #include "check_h5_error.h" | 10 | #include "error/check_h5_error.h" |
11 | #include <hdf5_hl.h> | 11 | #include <hdf5_hl.h> |
12 | #include <string.h> | 12 | #include <string.h> |
13 | #include <stdlib.h> | 13 | #include <stdlib.h> |
diff --git a/src/load_influenza_aa_dat.h b/src/load/load_influenza_aa_dat.h index f6c60be..f6c60be 100644 --- a/src/load_influenza_aa_dat.h +++ b/src/load/load_influenza_aa_dat.h | |||
diff --git a/src/load_influenza_faa.c b/src/load/load_influenza_faa.c index fd35254..a217989 100644 --- a/src/load_influenza_faa.c +++ b/src/load/load_influenza_faa.c | |||
@@ -1,8 +1,8 @@ | |||
1 | #include "check_error.h" | 1 | #include "error/check_error.h" |
2 | #include "check_h5_error.h" | 2 | #include "error/check_h5_error.h" |
3 | #include "load_influenza_faa.h" | 3 | #include "load_influenza_faa.h" |
4 | #include "sequence_data.h" | 4 | #include "model/sequence_data.h" |
5 | #include "sequence_data_init.h" | 5 | #include "model/sequence_data_init.h" |
6 | #include <hdf5_hl.h> | 6 | #include <hdf5_hl.h> |
7 | #include <string.h> | 7 | #include <string.h> |
8 | #include <stdlib.h> | 8 | #include <stdlib.h> |
@@ -57,8 +57,6 @@ load_influenza_faa (hid_t file_id) | |||
57 | strncpy (p_data.description, strsep (&running, "|"), | 57 | strncpy (p_data.description, strsep (&running, "|"), |
58 | sizeof (p_data.description)); | 58 | sizeof (p_data.description)); |
59 | 59 | ||
60 | strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); | ||
61 | |||
62 | const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] = | 60 | const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] = |
63 | SEQUENCE_DATA_FIELD_NAMES; | 61 | SEQUENCE_DATA_FIELD_NAMES; |
64 | 62 | ||
diff --git a/src/load_influenza_faa.h b/src/load/load_influenza_faa.h index 569c411..569c411 100644 --- a/src/load_influenza_faa.h +++ b/src/load/load_influenza_faa.h | |||
diff --git a/src/model/gi_type_data.h b/src/model/gi_type_data.h new file mode 100644 index 0000000..bf149c3 --- a/dev/null +++ b/src/model/gi_type_data.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef GI_TYPE_DATA_H | ||
2 | #define GI_TYPE_DATA_H | ||
3 | |||
4 | #define GI_TYPE_DATA_FIELD_NUM 3 | ||
5 | |||
6 | /* | ||
7 | * Field names selected to match the names of fields on the "Influenza | ||
8 | * Virus Resource - Influenza Virus Sequence Database" search form | ||
9 | * online at | ||
10 | * http://www.ncbi.nlm.nih.gov/genomes/FLU/Database/select.cgi?go=database. | ||
11 | */ | ||
12 | #define GI_TYPE_DATA_FIELD_NAMES { "GI", "Type", "Protein" } | ||
13 | |||
14 | typedef struct | ||
15 | { | ||
16 | int gi; | ||
17 | char type[2]; | ||
18 | char protein[7]; | ||
19 | } gi_type_data; | ||
20 | |||
21 | #endif // GI_TYPE_DATA_H | ||
diff --git a/src/model/gi_type_data_init.c b/src/model/gi_type_data_init.c new file mode 100644 index 0000000..4a161c7 --- a/dev/null +++ b/src/model/gi_type_data_init.c | |||
@@ -0,0 +1,36 @@ | |||
1 | #include "gi_type_data_init.h" | ||
2 | #include "gi_type_data.h" | ||
3 | |||
4 | /* | ||
5 | * There should be some way to automate the generation of these values | ||
6 | * or at least this source file based on just the definition of the | ||
7 | * struct. Perhaps an HDF5 precompiler could do such a thing. | ||
8 | */ | ||
9 | void | ||
10 | gi_type_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | ||
11 | hid_t *field_type) | ||
12 | { | ||
13 | *dst_size = sizeof (gi_type_data); | ||
14 | |||
15 | dst_offset[0] = HOFFSET (gi_type_data, gi); | ||
16 | dst_offset[1] = HOFFSET (gi_type_data, type); | ||
17 | dst_offset[2] = HOFFSET (gi_type_data, protein); | ||
18 | |||
19 | gi_type_data dst_buf[1]; | ||
20 | |||
21 | dst_sizes[0] = sizeof (dst_buf[0].gi); | ||
22 | dst_sizes[1] = sizeof (dst_buf[0].type); | ||
23 | dst_sizes[2] = sizeof (dst_buf[0].protein); | ||
24 | |||
25 | field_type[0] = H5T_NATIVE_INT; | ||
26 | |||
27 | hid_t type_h5t = H5Tcopy (H5T_C_S1); | ||
28 | H5Tset_size (type_h5t, 2); | ||
29 | field_type[1] = type_h5t; | ||
30 | |||
31 | hid_t protein_h5t = H5Tcopy (H5T_C_S1); | ||
32 | H5Tset_size (protein_h5t, 7); | ||
33 | field_type[2] = protein_h5t; | ||
34 | |||
35 | return; | ||
36 | } | ||
diff --git a/src/model/gi_type_data_init.h b/src/model/gi_type_data_init.h new file mode 100644 index 0000000..5c45cba --- a/dev/null +++ b/src/model/gi_type_data_init.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef GI_TYPE_DATA_INIT_H | ||
2 | #define GI_TYPE_DATA_INIT_H | ||
3 | |||
4 | #include <hdf5.h> | ||
5 | |||
6 | /* | ||
7 | * Initialize the structures describing the struct. These descriptive | ||
8 | * structures are used by the HDF5 API. | ||
9 | */ | ||
10 | void | ||
11 | gi_type_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | ||
12 | hid_t *field_type); | ||
13 | |||
14 | #endif // GI_TYPE_DATA_INIT_H | ||
diff --git a/src/sequence_data.h b/src/model/sequence_data.h index fb4ff78..9db91ac 100644 --- a/src/sequence_data.h +++ b/src/model/sequence_data.h | |||
@@ -1,16 +1,15 @@ | |||
1 | #ifndef SEQUENCE_DATA_H | 1 | #ifndef SEQUENCE_DATA_H |
2 | #define SEQUENCE_DATA_H | 2 | #define SEQUENCE_DATA_H |
3 | 3 | ||
4 | #define SEQUENCE_DATA_FIELD_NUM 4 | 4 | #define SEQUENCE_DATA_FIELD_NUM 3 |
5 | 5 | ||
6 | #define SEQUENCE_DATA_FIELD_NAMES { "GI", "GB", "Description", "Protein Type" } | 6 | #define SEQUENCE_DATA_FIELD_NAMES { "GI", "GB", "Description" } |
7 | 7 | ||
8 | typedef struct | 8 | typedef struct |
9 | { | 9 | { |
10 | int gi; | 10 | int gi; |
11 | char gb[9]; | 11 | char gb[9]; |
12 | char description[196]; | 12 | char description[196]; |
13 | char protein_type[7]; | ||
14 | } sequence_data; | 13 | } sequence_data; |
15 | 14 | ||
16 | #endif // SEQUENCE_DATA_H | 15 | #endif // SEQUENCE_DATA_H |
diff --git a/src/sequence_data_init.c b/src/model/sequence_data_init.c index 09ba189..f6b3b1f 100644 --- a/src/sequence_data_init.c +++ b/src/model/sequence_data_init.c | |||
@@ -10,14 +10,12 @@ sequence_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | |||
10 | dst_offset[0] = HOFFSET (sequence_data, gi); | 10 | dst_offset[0] = HOFFSET (sequence_data, gi); |
11 | dst_offset[1] = HOFFSET (sequence_data, gb); | 11 | dst_offset[1] = HOFFSET (sequence_data, gb); |
12 | dst_offset[2] = HOFFSET (sequence_data, description); | 12 | dst_offset[2] = HOFFSET (sequence_data, description); |
13 | dst_offset[3] = HOFFSET (sequence_data, protein_type); | ||
14 | 13 | ||
15 | sequence_data dst_buf[1]; | 14 | sequence_data dst_buf[1]; |
16 | 15 | ||
17 | dst_sizes[0] = sizeof (dst_buf[0].gi); | 16 | dst_sizes[0] = sizeof (dst_buf[0].gi); |
18 | dst_sizes[1] = sizeof (dst_buf[0].gb); | 17 | dst_sizes[1] = sizeof (dst_buf[0].gb); |
19 | dst_sizes[2] = sizeof (dst_buf[0].description); | 18 | dst_sizes[2] = sizeof (dst_buf[0].description); |
20 | dst_sizes[3] = sizeof (dst_buf[0].protein_type); | ||
21 | 19 | ||
22 | field_type[0] = H5T_NATIVE_INT; | 20 | field_type[0] = H5T_NATIVE_INT; |
23 | 21 | ||
@@ -28,10 +26,6 @@ sequence_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | |||
28 | hid_t description_type = H5Tcopy (H5T_C_S1); | 26 | hid_t description_type = H5Tcopy (H5T_C_S1); |
29 | H5Tset_size (description_type, 196); | 27 | H5Tset_size (description_type, 196); |
30 | field_type[2] = description_type; | 28 | field_type[2] = description_type; |
31 | |||
32 | hid_t protein_type_type = H5Tcopy (H5T_C_S1); | ||
33 | H5Tset_size (protein_type_type, 7); | ||
34 | field_type[3] = protein_type_type; | ||
35 | 29 | ||
36 | return; | 30 | return; |
37 | } | 31 | } |
diff --git a/src/sequence_data_init.h b/src/model/sequence_data_init.h index c87e7e6..c87e7e6 100644 --- a/src/sequence_data_init.h +++ b/src/model/sequence_data_init.h | |||
diff --git a/src/updator.c b/src/updator.c index 8d556da..591d2f6 100644 --- a/src/updator.c +++ b/src/updator.c | |||
@@ -2,8 +2,8 @@ | |||
2 | * Update derived fields. | 2 | * Update derived fields. |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include "assign_protein_type.h" | 5 | #include "assign/assign_protein_type.h" |
6 | #include "check_h5_error.h" | 6 | #include "error/check_h5_error.h" |
7 | 7 | ||
8 | #define FILE "influenza.h5" | 8 | #define FILE "influenza.h5" |
9 | 9 | ||