summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-24 04:18:36 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-24 04:18:36 (GMT)
commit6f7b615d1e0a2aac33712792d6146b42f2623e8f (patch) (unidiff)
treefc261f842fcf6cc17f3b0a85ab563d9d2382e95f
parentf34c2e06e985c72b8b431e98b4aa865c8d74b7ba (diff)
downloadexp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.zip
exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.tar.gz
exp007-6f7b615d1e0a2aac33712792d6146b42f2623e8f.tar.bz2
Modified to load the blast scores from a text file into the HDF5 file.
-rw-r--r--src/load/load_blast_scores.c53
1 files changed, 45 insertions, 8 deletions
diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c
index 42e6bd9..e6a6fd3 100644
--- a/src/load/load_blast_scores.c
+++ b/src/load/load_blast_scores.c
@@ -1,3 +1,4 @@
1#define _GNU_SOURCE
1#include "error/check_error.h"2#include "error/check_error.h"
2#include "error/check_h5_error.h"3#include "error/check_h5_error.h"
3#include "model/blast_scores_data.h"4#include "model/blast_scores_data.h"
@@ -6,7 +7,19 @@
6#include <hdf5_hl.h>7#include <hdf5_hl.h>
7#include <string.h>8#include <string.h>
8#include <stdlib.h>9#include <stdlib.h>
910#include <stdio.h>
11
12/*
13 * A simple sanity check can be performed by comparing the HDF5
14 * content with the content of the input text file. Data block output
15 * from the first command should be equal to the first 5 lines of the
16 * input file while data block output from the second command should
17 * be equal to the last 5 lines of the input file.
18 *
19 * h5dump --dataset=blast --start "0" --count "5" influenza.h5
20 *
21 * h5dump --dataset=blast --start "5749892" --count "5" influenza.h5
22 */
10void23void
11load_blast_scores (hid_t file_id, const char *file_name)24load_blast_scores (hid_t file_id, const char *file_name)
12{25{
@@ -21,14 +34,15 @@ load_blast_scores (hid_t file_id, const char *file_name)
21 int *fill_data = NULL;34 int *fill_data = NULL;
22 int compress = 0;35 int compress = 0;
2336
24 blast_scores_data p_data[1000];37 blast_scores_data p_data[10000];
25 FILE *dat = fopen (file_name, "r");38 FILE *dat = fopen (file_name, "r");
26 if (dat == NULL)39 if (dat == NULL)
27 check_error (__FILE__, __LINE__);40 check_error (__FILE__, __LINE__);
28 char *line = NULL;41 char *line = NULL;
29 size_t len = 0;42 size_t len = 0;
30 int current_line = 0;
31 int i = -1;43 int i = -1;
44 int written = 0;
45 int current_line = 0;
3246
33 while (getline (&line, &len, dat) != -1)47 while (getline (&line, &len, dat) != -1)
34 {48 {
@@ -65,8 +79,10 @@ load_blast_scores (hid_t file_id, const char *file_name)
65 token = strsep (&running, ",");79 token = strsep (&running, ",");
66 p_data[i].evalue = strtod (token, NULL);80 p_data[i].evalue = strtod (token, NULL);
6781
68 if (current_line == 1)82 if (written == 0)
69 {83 {
84
85 printf ("Creating blast table.\n");
70 86
71 const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] =87 const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] =
72 BLAST_SCORES_DATA_FIELD_NAMES;88 BLAST_SCORES_DATA_FIELD_NAMES;
@@ -84,13 +100,17 @@ load_blast_scores (hid_t file_id, const char *file_name)
84 if (status < 0)100 if (status < 0)
85 check_h5_error (__FILE__, __LINE__);101 check_h5_error (__FILE__, __LINE__);
86102
103 written = 1;
104 i = -1;
105 current_line = 0;
106
87 }107 }
88108
89 if ((i % 1000 == 0) && (i > 0))109 if ((current_line % 10000 == 0) && (current_line > 0))
90 {110 {
91111
92 herr_t status =112 herr_t status =
93 H5TBappend_records (file_id, "blast", 1000,113 H5TBappend_records (file_id, "blast", 10000,
94 dst_size, dst_offset, dst_sizes,114 dst_size, dst_offset, dst_sizes,
95 &p_data[0]);115 &p_data[0]);
96 if(status < 0)116 if(status < 0)
@@ -100,9 +120,11 @@ load_blast_scores (hid_t file_id, const char *file_name)
100 if (status < 0)120 if (status < 0)
101 check_h5_error (__FILE__, __LINE__);121 check_h5_error (__FILE__, __LINE__);
102122
103 printf ("Processed %i of records.\n", current_line);123 printf ("Processed %i records.\n", current_line);
104124
125 written += 10000;
105 i = -1;126 i = -1;
127
106 }128 }
107129
108 if (running)130 if (running)
@@ -110,6 +132,21 @@ load_blast_scores (hid_t file_id, const char *file_name)
110132
111 } // End for each line of the input file.133 } // End for each line of the input file.
112134
135 if (i >= 0)
136 {
137 herr_t status =
138 H5TBappend_records (file_id, "blast", i+1,
139 dst_size, dst_offset, dst_sizes, &p_data[0]);
140 if (status < 0)
141 check_h5_error (__FILE__, __LINE__);
142
143 printf ("Wrote %i records.\n", written + i + 1);
144 }
145
146 herr_t status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
147 if (status < 0)
148 check_h5_error (__FILE__, __LINE__);
149
113 if (line)150 if (line)
114 free (line);151 free (line);
115152

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.