summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-24 04:18:36 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-24 04:18:36 (GMT)
commit6f7b615d1e0a2aac33712792d6146b42f2623e8f (patch) (side-by-side diff)
treefc261f842fcf6cc17f3b0a85ab563d9d2382e95f
parentf34c2e06e985c72b8b431e98b4aa865c8d74b7ba (diff)
downloadexp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.zip
exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.tar.gz
exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.tar.bz2
Modified to load the blast scores from a text file into the HDF5 file.
-rw-r--r--src/load/load_blast_scores.c53
1 files changed, 45 insertions, 8 deletions
diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c
index 42e6bd9..e6a6fd3 100644
--- a/src/load/load_blast_scores.c
+++ b/src/load/load_blast_scores.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
#include "error/check_error.h"
#include "error/check_h5_error.h"
#include "model/blast_scores_data.h"
@@ -6,7 +7,19 @@
#include <hdf5_hl.h>
#include <string.h>
#include <stdlib.h>
-
+#include <stdio.h>
+
+/*
+ * A simple sanity check can be performed by comparing the HDF5
+ * content with the content of the input text file. Data block output
+ * from the first command should be equal to the first 5 lines of the
+ * input file while data block output from the second command should
+ * be equal to the last 5 lines of the input file.
+ *
+ * h5dump --dataset=blast --start "0" --count "5" influenza.h5
+ *
+ * h5dump --dataset=blast --start "5749892" --count "5" influenza.h5
+ */
void
load_blast_scores (hid_t file_id, const char *file_name)
{
@@ -21,14 +34,15 @@ load_blast_scores (hid_t file_id, const char *file_name)
int *fill_data = NULL;
int compress = 0;
- blast_scores_data p_data[1000];
+ blast_scores_data p_data[10000];
FILE *dat = fopen (file_name, "r");
if (dat == NULL)
check_error (__FILE__, __LINE__);
char *line = NULL;
size_t len = 0;
- int current_line = 0;
int i = -1;
+ int written = 0;
+ int current_line = 0;
while (getline (&line, &len, dat) != -1)
{
@@ -65,8 +79,10 @@ load_blast_scores (hid_t file_id, const char *file_name)
token = strsep (&running, ",");
p_data[i].evalue = strtod (token, NULL);
- if (current_line == 1)
+ if (written == 0)
{
+
+ printf ("Creating blast table.\n");
const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] =
BLAST_SCORES_DATA_FIELD_NAMES;
@@ -84,13 +100,17 @@ load_blast_scores (hid_t file_id, const char *file_name)
if (status < 0)
check_h5_error (__FILE__, __LINE__);
+ written = 1;
+ i = -1;
+ current_line = 0;
+
}
- if ((i % 1000 == 0) && (i > 0))
+ if ((current_line % 10000 == 0) && (current_line > 0))
{
-
+
herr_t status =
- H5TBappend_records (file_id, "blast", 1000,
+ H5TBappend_records (file_id, "blast", 10000,
dst_size, dst_offset, dst_sizes,
&p_data[0]);
if(status < 0)
@@ -100,9 +120,11 @@ load_blast_scores (hid_t file_id, const char *file_name)
if (status < 0)
check_h5_error (__FILE__, __LINE__);
- printf ("Processed %i of records.\n", current_line);
+ printf ("Processed %i records.\n", current_line);
+ written += 10000;
i = -1;
+
}
if (running)
@@ -110,6 +132,21 @@ load_blast_scores (hid_t file_id, const char *file_name)
} // End for each line of the input file.
+ if (i >= 0)
+ {
+ herr_t status =
+ H5TBappend_records (file_id, "blast", i+1,
+ dst_size, dst_offset, dst_sizes, &p_data[0]);
+ if (status < 0)
+ check_h5_error (__FILE__, __LINE__);
+
+ printf ("Wrote %i records.\n", written + i + 1);
+ }
+
+ herr_t status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
+ if (status < 0)
+ check_h5_error (__FILE__, __LINE__);
+
if (line)
free (line);

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.