author | Don Pellegrino <don@drexel.edu> | 2010-01-24 04:18:36 (GMT) |
---|---|---|
committer | Don Pellegrino <don@drexel.edu> | 2010-01-24 04:18:36 (GMT) |
commit | 6f7b615d1e0a2aac33712792d6146b42f2623e8f (patch) (unidiff) | |
tree | fc261f842fcf6cc17f3b0a85ab563d9d2382e95f | |
parent | f34c2e06e985c72b8b431e98b4aa865c8d74b7ba (diff) | |
download | exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.zip exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.tar.gz exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.tar.bz2 |
Modified to load the blast scores from a text file into the HDF5 file.
-rw-r--r-- | src/load/load_blast_scores.c | 53 |
1 files changed, 45 insertions, 8 deletions
diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c index 42e6bd9..e6a6fd3 100644 --- a/src/load/load_blast_scores.c +++ b/src/load/load_blast_scores.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #define _GNU_SOURCE | ||
1 | #include "error/check_error.h" | 2 | #include "error/check_error.h" |
2 | #include "error/check_h5_error.h" | 3 | #include "error/check_h5_error.h" |
3 | #include "model/blast_scores_data.h" | 4 | #include "model/blast_scores_data.h" |
@@ -6,7 +7,19 @@ | |||
6 | #include <hdf5_hl.h> | 7 | #include <hdf5_hl.h> |
7 | #include <string.h> | 8 | #include <string.h> |
8 | #include <stdlib.h> | 9 | #include <stdlib.h> |
9 | 10 | #include <stdio.h> | |
11 | |||
12 | /* | ||
13 | * A simple sanity check can be performed by comparing the HDF5 | ||
14 | * content with the content of the input text file. Data block output | ||
15 | * from the first command should be equal to the first 5 lines of the | ||
16 | * input file while data block output from the second command should | ||
17 | * be equal to the last 5 lines of the input file. | ||
18 | * | ||
19 | * h5dump --dataset=blast --start "0" --count "5" influenza.h5 | ||
20 | * | ||
21 | * h5dump --dataset=blast --start "5749892" --count "5" influenza.h5 | ||
22 | */ | ||
10 | void | 23 | void |
11 | load_blast_scores (hid_t file_id, const char *file_name) | 24 | load_blast_scores (hid_t file_id, const char *file_name) |
12 | { | 25 | { |
@@ -21,14 +34,15 @@ load_blast_scores (hid_t file_id, const char *file_name) | |||
21 | int *fill_data = NULL; | 34 | int *fill_data = NULL; |
22 | int compress = 0; | 35 | int compress = 0; |
23 | 36 | ||
24 | blast_scores_data p_data[1000]; | 37 | blast_scores_data p_data[10000]; |
25 | FILE *dat = fopen (file_name, "r"); | 38 | FILE *dat = fopen (file_name, "r"); |
26 | if (dat == NULL) | 39 | if (dat == NULL) |
27 | check_error (__FILE__, __LINE__); | 40 | check_error (__FILE__, __LINE__); |
28 | char *line = NULL; | 41 | char *line = NULL; |
29 | size_t len = 0; | 42 | size_t len = 0; |
30 | int current_line = 0; | ||
31 | int i = -1; | 43 | int i = -1; |
44 | int written = 0; | ||
45 | int current_line = 0; | ||
32 | 46 | ||
33 | while (getline (&line, &len, dat) != -1) | 47 | while (getline (&line, &len, dat) != -1) |
34 | { | 48 | { |
@@ -65,8 +79,10 @@ load_blast_scores (hid_t file_id, const char *file_name) | |||
65 | token = strsep (&running, ","); | 79 | token = strsep (&running, ","); |
66 | p_data[i].evalue = strtod (token, NULL); | 80 | p_data[i].evalue = strtod (token, NULL); |
67 | 81 | ||
68 | if (current_line == 1) | 82 | if (written == 0) |
69 | { | 83 | { |
84 | |||
85 | printf ("Creating blast table.\n"); | ||
70 | 86 | ||
71 | const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] = | 87 | const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] = |
72 | BLAST_SCORES_DATA_FIELD_NAMES; | 88 | BLAST_SCORES_DATA_FIELD_NAMES; |
@@ -84,13 +100,17 @@ load_blast_scores (hid_t file_id, const char *file_name) | |||
84 | if (status < 0) | 100 | if (status < 0) |
85 | check_h5_error (__FILE__, __LINE__); | 101 | check_h5_error (__FILE__, __LINE__); |
86 | 102 | ||
103 | written = 1; | ||
104 | i = -1; | ||
105 | current_line = 0; | ||
106 | |||
87 | } | 107 | } |
88 | 108 | ||
89 | if ((i % 1000 == 0) && (i > 0)) | 109 | if ((current_line % 10000 == 0) && (current_line > 0)) |
90 | { | 110 | { |
91 | 111 | ||
92 | herr_t status = | 112 | herr_t status = |
93 | H5TBappend_records (file_id, "blast", 1000, | 113 | H5TBappend_records (file_id, "blast", 10000, |
94 | dst_size, dst_offset, dst_sizes, | 114 | dst_size, dst_offset, dst_sizes, |
95 | &p_data[0]); | 115 | &p_data[0]); |
96 | if(status < 0) | 116 | if(status < 0) |
@@ -100,9 +120,11 @@ load_blast_scores (hid_t file_id, const char *file_name) | |||
100 | if (status < 0) | 120 | if (status < 0) |
101 | check_h5_error (__FILE__, __LINE__); | 121 | check_h5_error (__FILE__, __LINE__); |
102 | 122 | ||
103 | printf ("Processed %i of records.\n", current_line); | 123 | printf ("Processed %i records.\n", current_line); |
104 | 124 | ||
125 | written += 10000; | ||
105 | i = -1; | 126 | i = -1; |
127 | |||
106 | } | 128 | } |
107 | 129 | ||
108 | if (running) | 130 | if (running) |
@@ -110,6 +132,21 @@ load_blast_scores (hid_t file_id, const char *file_name) | |||
110 | 132 | ||
111 | } // End for each line of the input file. | 133 | } // End for each line of the input file. |
112 | 134 | ||
135 | if (i >= 0) | ||
136 | { | ||
137 | herr_t status = | ||
138 | H5TBappend_records (file_id, "blast", i+1, | ||
139 | dst_size, dst_offset, dst_sizes, &p_data[0]); | ||
140 | if (status < 0) | ||
141 | check_h5_error (__FILE__, __LINE__); | ||
142 | |||
143 | printf ("Wrote %i records.\n", written + i + 1); | ||
144 | } | ||
145 | |||
146 | herr_t status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
147 | if (status < 0) | ||
148 | check_h5_error (__FILE__, __LINE__); | ||
149 | |||
113 | if (line) | 150 | if (line) |
114 | free (line); | 151 | free (line); |
115 | 152 | ||