summaryrefslogtreecommitdiffstats
Unidiff
-rw-r--r--src/assign_protein_type.c73
1 files changed, 57 insertions, 16 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c
index 166e787..ec3a959 100644
--- a/src/assign_protein_type.c
+++ b/src/assign_protein_type.c
@@ -83,6 +83,7 @@ assign_protein_type (hid_t file_id)
83 * Assign protein types to records for which the field is empty.83 * Assign protein types to records for which the field is empty.
84 */84 */
85 printf ("Records to process: %i\n", (int)nrecords);85 printf ("Records to process: %i\n", (int)nrecords);
86 bool updates_pending = false;
86 for (int i = 0; i < nrecords; i++)87 for (int i = 0; i < nrecords; i++)
87 {88 {
8889
@@ -92,6 +93,14 @@ assign_protein_type (hid_t file_id)
92 */93 */
93 Int4 sequence_number = readdb_gi2seq (seqdb, dst_buf[i].gi, NULL);94 Int4 sequence_number = readdb_gi2seq (seqdb, dst_buf[i].gi, NULL);
94 BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number);95 BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number);
96 if (bsp == NULL)
97 {
98 error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__,
99 "Unable to find BLAST record for gi|%i. Ensure the BLAST "
100 "database is up-to-date with the HDF5 record set. See the "
101 "BLAST formatdb.log file for details.\n",
102 dst_buf[i].gi);
103 }
95 104
96 SeqAlignPtr seqalign = BioseqBlastEngine (bsp,105 SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
97 "blastp",106 "blastp",
@@ -123,6 +132,7 @@ assign_protein_type (hid_t file_id)
123 SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN);132 SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN);
124 strncpy (dst_buf[i].protein_type, &target_id_buf[6], 133 strncpy (dst_buf[i].protein_type, &target_id_buf[6],
125 sizeof (dst_buf[i].protein_type));134 sizeof (dst_buf[i].protein_type));
135 updates_pending = true;
126 }136 }
127137
128 /*138 /*
@@ -140,25 +150,56 @@ assign_protein_type (hid_t file_id)
140 seqalign = SeqAlignSetFree (seqalign);150 seqalign = SeqAlignSetFree (seqalign);
141 bsp = BioseqFree (bsp);151 bsp = BioseqFree (bsp);
142152
143 /*153 }
144 * Write the data out to the file.
145 */
146 if ( (i % 1000 == 0) && (i > 0) )
147 {
148 status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000,
149 dst_size, dst_offset, dst_sizes,
150 &dst_buf[i-1000]);
151 if (status < 0)
152 check_h5_error (status, __FILE__, __LINE__);
153154
154 status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);155 /*
155 if (status < 0)156 * Write the data out to the file.
156 check_h5_error (status, __FILE__, __LINE__);157 */
158 if ( (i % 1000 == 0) && (i > 0) && updates_pending)
159 {
160 status = H5TBwrite_records (file_id, "influenza.faa", i - 1000, 1000,
161 dst_size, dst_offset, dst_sizes,
162 &dst_buf[i-1000]);
163 if (status < 0)
164 check_h5_error (status, __FILE__, __LINE__);
165
166 status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
167 if (status < 0)
168 check_h5_error (status, __FILE__, __LINE__);
169
170 updates_pending = false;
171
172 printf ("Processed %i of %i records.\n", i, (int)nrecords);
173 }
174
175 }
157176
158 printf ("Processed %i of %i records.\n", i, (int)nrecords);177 /*
159 }178 * Write out records from the last bin if it was less than 1000
160 }179 * records in size.
180 */
181 if (updates_pending)
182 {
183 if ((int)nrecords < 1000)
184 {
185 status = H5TBwrite_records (file_id, "influenza.faa", 0, nrecords,
186 dst_size, dst_offset, dst_sizes,
187 dst_buf);
188 }
189 else
190 {
191 status = H5TBwrite_records (file_id, "influenza.faa", nrecords - 1000, 1000,
192 dst_size, dst_offset, dst_sizes,
193 &dst_buf[nrecords-1000]);
194 }
195 if (status < 0)
196 check_h5_error (status, __FILE__, __LINE__);
197
198 status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
199 if (status < 0)
200 check_h5_error (status, __FILE__, __LINE__);
161 201
202 updates_pending = false;
162 }203 }
163 204
164 free (dst_buf);205 free (dst_buf);

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.