-rw-r--r-- | src/assign_protein_type.c | 29 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 2 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 42 |
3 files changed, 59 insertions, 14 deletions
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 61bb99d..749b7ad 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c | |||
@@ -5,6 +5,8 @@ | |||
5 | #include <string.h> | 5 | #include <string.h> |
6 | #include <stdlib.h> | 6 | #include <stdlib.h> |
7 | 7 | ||
8 | #define SEQUENCE_DATA_FIELD_NUM 4 | ||
9 | |||
8 | void | 10 | void |
9 | load_influenza_faa (hid_t file_id) | 11 | load_influenza_faa (hid_t file_id) |
10 | { | 12 | { |
@@ -13,24 +15,27 @@ load_influenza_faa (hid_t file_id) | |||
13 | int gi; | 15 | int gi; |
14 | char gb[9]; | 16 | char gb[9]; |
15 | char description[196]; | 17 | char description[196]; |
18 | char protein_type[7]; | ||
16 | } sequence_data; | 19 | } sequence_data; |
17 | 20 | ||
18 | size_t dst_size = sizeof (sequence_data); | 21 | size_t dst_size = sizeof (sequence_data); |
19 | size_t dst_offset[3] = | 22 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] = |
20 | { HOFFSET (sequence_data, gi), | 23 | { HOFFSET (sequence_data, gi), |
21 | HOFFSET (sequence_data, gb), | 24 | HOFFSET (sequence_data, gb), |
22 | HOFFSET (sequence_data, description) | 25 | HOFFSET (sequence_data, description), |
26 | HOFFSET (sequence_data, protein_type) | ||
23 | }; | 27 | }; |
24 | 28 | ||
25 | sequence_data dst_buf[1]; | 29 | sequence_data dst_buf[1]; |
26 | 30 | ||
27 | size_t dst_sizes[3] = { | 31 | size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = { |
28 | sizeof (dst_buf[0].gi), | 32 | sizeof (dst_buf[0].gi), |
29 | sizeof (dst_buf[0].gb), | 33 | sizeof (dst_buf[0].gb), |
30 | sizeof (dst_buf[0].description) | 34 | sizeof (dst_buf[0].description), |
35 | sizeof (dst_buf[0].protein_type) | ||
31 | }; | 36 | }; |
32 | 37 | ||
33 | hid_t field_type[3]; | 38 | hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; |
34 | 39 | ||
35 | field_type[0] = H5T_NATIVE_INT; | 40 | field_type[0] = H5T_NATIVE_INT; |
36 | 41 | ||
@@ -42,9 +47,15 @@ load_influenza_faa (hid_t file_id) | |||
42 | H5Tset_size (description_type, 196); | 47 | H5Tset_size (description_type, 196); |
43 | field_type[2] = description_type; | 48 | field_type[2] = description_type; |
44 | 49 | ||
45 | const char *field_names[3] = { "GI", | 50 | hid_t protein_type_type = H5Tcopy (H5T_C_S1); |
46 | "GB", | 51 | H5Tset_size (protein_type_type, 7); |
47 | "Description" }; | 52 | field_type[3] = protein_type_type; |
53 | |||
54 | const char *field_names[SEQUENCE_DATA_FIELD_NUM] = | ||
55 | { "GI", | ||
56 | "GB", | ||
57 | "Description", | ||
58 | "Protein Type" }; | ||
48 | 59 | ||
49 | hsize_t chunk_size = 10; | 60 | hsize_t chunk_size = 10; |
50 | int *fill_data = NULL; | 61 | int *fill_data = NULL; |
@@ -86,13 +97,17 @@ load_influenza_faa (hid_t file_id) | |||
86 | strncpy (p_data.description, strsep (&running, "|"), | 97 | strncpy (p_data.description, strsep (&running, "|"), |
87 | sizeof (p_data.description)); | 98 | sizeof (p_data.description)); |
88 | 99 | ||
100 | strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); | ||
101 | |||
89 | if (current_line == 1) | 102 | if (current_line == 1) |
90 | { | 103 | { |
91 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | 104 | herr_t status = H5TBmake_table ("influenza.faa", file_id, |
92 | "influenza.faa", 3, 1, dst_size, | 105 | "influenza.faa", |
93 | field_names, dst_offset, | 106 | SEQUENCE_DATA_FIELD_NUM, 1, |
94 | field_type, chunk_size, | 107 | dst_size, field_names, |
95 | fill_data, compress, &p_data); | 108 | dst_offset, field_type, |
109 | chunk_size, fill_data, compress, | ||
110 | &p_data); | ||
96 | if (status < 0) | 111 | if (status < 0) |
97 | check_h5_error (status, __FILE__, __LINE__); | 112 | check_h5_error (status, __FILE__, __LINE__); |
98 | } | 113 | } |
@@ -119,6 +134,7 @@ load_influenza_faa (hid_t file_id) | |||
119 | 134 | ||
120 | H5Tclose (gb_type); | 135 | H5Tclose (gb_type); |
121 | H5Tclose (description_type); | 136 | H5Tclose (description_type); |
137 | H5Tclose (protein_type_type); | ||
122 | 138 | ||
123 | return; | 139 | return; |
124 | } | 140 | } |