-rw-r--r-- | src/assign_protein_type.c | 73 |
1 files changed, 57 insertions, 16 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c index 166e787..ec3a959 100644 --- a/src/assign_protein_type.c +++ b/src/assign_protein_type.c | |||
@@ -83,6 +83,7 @@ assign_protein_type (hid_t file_id) | |||
83 | * Assign protein types to records for which the field is empty. | 83 | * Assign protein types to records for which the field is empty. |
84 | */ | 84 | */ |
85 | printf ("Records to process: %i\n", (int)nrecords); | 85 | printf ("Records to process: %i\n", (int)nrecords); |
86 | bool updates_pending = false; | ||
86 | for (int i = 0; i < nrecords; i++) | 87 | for (int i = 0; i < nrecords; i++) |
87 | { | 88 | { |
88 | 89 | ||
@@ -92,6 +93,14 @@ assign_protein_type (hid_t file_id) | |||
92 | */ | 93 | */ |
93 | Int4 sequence_number = readdb_gi2seq (seqdb, dst_buf[i].gi, NULL); | 94 | Int4 sequence_number = readdb_gi2seq (seqdb, dst_buf[i].gi, NULL); |
94 | BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); | 95 | BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); |
96 | if (bsp == NULL) | ||
97 | { | ||
98 | error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, | ||
99 | "Unable to find BLAST record for gi|%i. Ensure the BLAST " | ||
100 | "database is up-to-date with the HDF5 record set. See the " | ||
101 | "BLAST formatdb.log file for details.\n", | ||
102 | dst_buf[i].gi); | ||
103 | } | ||
95 | 104 | ||
96 | SeqAlignPtr seqalign = BioseqBlastEngine (bsp, | 105 | SeqAlignPtr seqalign = BioseqBlastEngine (bsp, |
97 | "blastp", | 106 | "blastp", |
@@ -123,6 +132,7 @@ assign_protein_type (hid_t file_id) | |||
123 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); | 132 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); |
124 | strncpy (dst_buf[i].protein_type, &target_id_buf[6], | 133 | strncpy (dst_buf[i].protein_type, &target_id_buf[6], |
125 | sizeof (dst_buf[i].protein_type)); | 134 | sizeof (dst_buf[i].protein_type)); |
135 | updates_pending = true; | ||
126 | } | 136 | } |
127 | 137 | ||
128 | /* | 138 | /* |
@@ -140,25 +150,56 @@ assign_protein_type (hid_t file_id) | |||
140 | seqalign = SeqAlignSetFree (seqalign); | 150 | seqalign = SeqAlignSetFree (seqalign); |
141 | bsp = BioseqFree (bsp); | 151 | bsp = BioseqFree (bsp); |
142 | 152 | ||
143 | /* | 153 | } |
144 | * Write the data out to the file. | ||
145 | */ | ||
146 | if ( (i % 1000 == 0) && (i > 0) ) | ||
147 | { | ||
148 | status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000, | ||
149 | dst_size, dst_offset, dst_sizes, | ||
150 | &dst_buf[i-1000]); | ||
151 | if (status < 0) | ||
152 | check_h5_error (status, __FILE__, __LINE__); | ||
153 | 154 | ||
154 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | 155 | /* |
155 | if (status < 0) | 156 | * Write the data out to the file. |
156 | check_h5_error (status, __FILE__, __LINE__); | 157 | */ |
158 | if ( (i % 1000 == 0) && (i > 0) && updates_pending) | ||
159 | { | ||
160 | status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000, | ||
161 | dst_size, dst_offset, dst_sizes, | ||
162 | &dst_buf[i-1000]); | ||
163 | if (status < 0) | ||
164 | check_h5_error (status, __FILE__, __LINE__); | ||
165 | |||
166 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
167 | if (status < 0) | ||
168 | check_h5_error (status, __FILE__, __LINE__); | ||
169 | |||
170 | updates_pending = false; | ||
171 | |||
172 | printf ("Processed %i of %i records.\n", i, (int)nrecords); | ||
173 | } | ||
174 | |||
175 | } | ||
157 | 176 | ||
158 | printf ("Processed %i of %i records.\n", i, (int)nrecords); | 177 | /* |
159 | } | 178 | * Write out records from the last bin if it was less than 1000 |
160 | } | 179 | * records in size. |
180 | */ | ||
181 | if (updates_pending) | ||
182 | { | ||
183 | if ((int)nrecords < 1000) | ||
184 | { | ||
185 | status = H5TBwrite_records (file_id, "influenza.faa", 0, nrecords, | ||
186 | dst_size, dst_offset, dst_sizes, | ||
187 | dst_buf); | ||
188 | } | ||
189 | else | ||
190 | { | ||
191 | status = H5TBwrite_records (file_id, "influenza.faa", nrecords - 1000, 1000, | ||
192 | dst_size, dst_offset, dst_sizes, | ||
193 | &dst_buf[nrecords-1000]); | ||
194 | } | ||
195 | if (status < 0) | ||
196 | check_h5_error (status, __FILE__, __LINE__); | ||
197 | |||
198 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
199 | if (status < 0) | ||
200 | check_h5_error (status, __FILE__, __LINE__); | ||
161 | 201 | ||
202 | updates_pending = false; | ||
162 | } | 203 | } |
163 | 204 | ||
164 | free (dst_buf); | 205 | free (dst_buf); |