summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-23 23:00:22 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-23 23:00:22 (GMT)
commitd2dda50ed620d93cb1c3c9705b3379c3507b8d9c (patch) (unidiff)
tree40df511a81bd346cbba2f7c1d753e36249e81f73
parent6bfe5755d6a9b3d88032287e97681c65b7f32f0f (diff)
downloadexp007-d2dda50ed620d93cb1c3c9705b3379c3507b8d9c.zip
exp007-d2dda50ed620d93cb1c3c9705b3379c3507b8d9c.tar.gz
exp007-d2dda50ed620d93cb1c3c9705b3379c3507b8d9c.tar.bz2
Routines to load the results of a BLAST run into the HDF5 file.
-rw-r--r--src/load/load_blast_scores.c119
-rw-r--r--src/load/load_blast_scores.h11
-rw-r--r--src/model/blast_scores_data.h21
-rw-r--r--src/model/blast_scores_data_init.c43
-rw-r--r--src/model/blast_scores_data_init.h14
5 files changed, 208 insertions, 0 deletions
diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c
new file mode 100644
index 0000000..42e6bd9
--- a/dev/null
+++ b/src/load/load_blast_scores.c
@@ -0,0 +1,119 @@
1#include "error/check_error.h"
2#include "error/check_h5_error.h"
3#include "model/blast_scores_data.h"
4#include "model/blast_scores_data_init.h"
5#include "load_blast_scores.h"
6#include <hdf5_hl.h>
7#include <string.h>
8#include <stdlib.h>
9
10void
11load_blast_scores (hid_t file_id, const char *file_name)
12{
13 size_t dst_size;
14 size_t dst_offset[BLAST_SCORES_DATA_FIELD_NUM];
15 size_t dst_sizes[BLAST_SCORES_DATA_FIELD_NUM];
16 hid_t field_type[BLAST_SCORES_DATA_FIELD_NUM];
17
18 blast_scores_data_init (&dst_size, dst_offset, dst_sizes, field_type);
19
20 hsize_t chunk_size = 10;
21 int *fill_data = NULL;
22 int compress = 0;
23
24 blast_scores_data p_data[1000];
25 FILE *dat = fopen (file_name, "r");
26 if (dat == NULL)
27 check_error (__FILE__, __LINE__);
28 char *line = NULL;
29 size_t len = 0;
30 int current_line = 0;
31 int i = -1;
32
33 while (getline (&line, &len, dat) != -1)
34 {
35 current_line++;
36 i++;
37
38 char *running = strdup (line);
39 char *token = NULL;
40
41 token = strsep (&running, ",");
42 p_data[i].source_gi = atoi (&token[4]);
43
44 token = strsep (&running, ",");
45 p_data[i].source_start = atoi (token);
46
47 token = strsep (&running, ",");
48 p_data[i].source_end = atoi (token);
49
50 token = strsep (&running, ",");
51 p_data[i].target_gi = atoi (&token[4]);
52
53 token = strsep (&running, ",");
54 p_data[i].target_start = atoi (token);
55
56 token = strsep (&running, ",");
57 p_data[i].target_end = atoi (token);
58
59 token = strsep (&running, ",");
60 p_data[i].score = atoi (token);
61
62 token = strsep (&running, ",");
63 p_data[i].bit_score = strtod (token, NULL);
64
65 token = strsep (&running, ",");
66 p_data[i].evalue = strtod (token, NULL);
67
68 if (current_line == 1)
69 {
70
71 const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] =
72 BLAST_SCORES_DATA_FIELD_NAMES;
73
74 herr_t status = H5TBmake_table ("blast", file_id,
75 "blast",
76 BLAST_SCORES_DATA_FIELD_NUM, 1,
77 dst_size,
78 blast_scores_data_field_names,
79 dst_offset, field_type,
80 chunk_size, fill_data,
81 compress,
82 &p_data);
83
84 if (status < 0)
85 check_h5_error (__FILE__, __LINE__);
86
87 }
88
89 if ((i % 1000 == 0) && (i > 0))
90 {
91
92 herr_t status =
93 H5TBappend_records (file_id, "blast", 1000,
94 dst_size, dst_offset, dst_sizes,
95 &p_data[0]);
96 if(status < 0)
97 check_h5_error (__FILE__, __LINE__);
98
99 status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
100 if (status < 0)
101 check_h5_error (__FILE__, __LINE__);
102
103 printf ("Processed %i of records.\n", current_line);
104
105 i = -1;
106 }
107
108 if (running)
109 free (running);
110
111 } // End for each line of the input file.
112
113 if (line)
114 free (line);
115
116 fclose (dat);
117
118 return;
119}
diff --git a/src/load/load_blast_scores.h b/src/load/load_blast_scores.h
new file mode 100644
index 0000000..e41968d
--- a/dev/null
+++ b/src/load/load_blast_scores.h
@@ -0,0 +1,11 @@
1#ifndef LOAD_BLAST_SCORES_H
2#define LOAD_BLAST_SCORES_H
3
4#include <hdf5.h>
5
6/*
7 * Load the results of a BLAST run into the HDF5 container.
8 */
9void load_blast_scores (hid_t file_id, const char *file_name);
10
11#endif // LOAD_BLAST_SCORES_H
diff --git a/src/model/blast_scores_data.h b/src/model/blast_scores_data.h
new file mode 100644
index 0000000..8c4aaef
--- a/dev/null
+++ b/src/model/blast_scores_data.h
@@ -0,0 +1,21 @@
1#ifndef BLAST_SCORES_DATA_H
2#define BLAST_SCORES_DATA_H
3
4#define BLAST_SCORES_DATA_FIELD_NUM 9
5
6#define BLAST_SCORES_DATA_FIELD_NAMES { "Source GI", "Source Start", "Source End", "Target GI", "Target Start", "Target End", "Score", "Bit Score", "Evalue" }
7
8typedef struct
9{
10 int source_gi;
11 int source_start;
12 int source_end;
13 int target_gi;
14 int target_start;
15 int target_end;
16 int score;
17 double bit_score;
18 double evalue;
19} blast_scores_data;
20
21#endif // BLAST_SCORES_DATA_H
diff --git a/src/model/blast_scores_data_init.c b/src/model/blast_scores_data_init.c
new file mode 100644
index 0000000..14fa6f0
--- a/dev/null
+++ b/src/model/blast_scores_data_init.c
@@ -0,0 +1,43 @@
1#include "blast_scores_data_init.h"
2#include "blast_scores_data.h"
3
4void
5blast_scores_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes,
6 hid_t *field_type)
7{
8 *dst_size = sizeof (blast_scores_data);
9
10 dst_offset[0] = HOFFSET (blast_scores_data, source_gi);
11 dst_offset[1] = HOFFSET (blast_scores_data, source_start);
12 dst_offset[2] = HOFFSET (blast_scores_data, source_end);
13 dst_offset[3] = HOFFSET (blast_scores_data, target_gi);
14 dst_offset[4] = HOFFSET (blast_scores_data, target_start);
15 dst_offset[5] = HOFFSET (blast_scores_data, target_end);
16 dst_offset[6] = HOFFSET (blast_scores_data, score);
17 dst_offset[7] = HOFFSET (blast_scores_data, bit_score);
18 dst_offset[8] = HOFFSET (blast_scores_data, evalue);
19
20 blast_scores_data dst_buf[1];
21
22 dst_sizes[0] = sizeof (dst_buf[0].source_gi);
23 dst_sizes[1] = sizeof (dst_buf[0].source_start);
24 dst_sizes[2] = sizeof (dst_buf[0].source_end);
25 dst_sizes[3] = sizeof (dst_buf[0].target_gi);
26 dst_sizes[4] = sizeof (dst_buf[0].target_start);
27 dst_sizes[5] = sizeof (dst_buf[0].target_end);
28 dst_sizes[6] = sizeof (dst_buf[0].score);
29 dst_sizes[7] = sizeof (dst_buf[0].bit_score);
30 dst_sizes[8] = sizeof (dst_buf[0].evalue);
31
32 field_type[0] = H5T_NATIVE_INT;
33 field_type[1] = H5T_NATIVE_INT;
34 field_type[2] = H5T_NATIVE_INT;
35 field_type[3] = H5T_NATIVE_INT;
36 field_type[4] = H5T_NATIVE_INT;
37 field_type[5] = H5T_NATIVE_INT;
38 field_type[6] = H5T_NATIVE_INT;
39 field_type[7] = H5T_NATIVE_DOUBLE;
40 field_type[8] = H5T_NATIVE_DOUBLE;
41
42 return;
43}
diff --git a/src/model/blast_scores_data_init.h b/src/model/blast_scores_data_init.h
new file mode 100644
index 0000000..cae6edd
--- a/dev/null
+++ b/src/model/blast_scores_data_init.h
@@ -0,0 +1,14 @@
1#ifndef BLAST_SCORES_DATA_INIT_H
2#define BLAST_SCORES_DATA_INIT_H
3
4#include <hdf5.h>
5
6/*
7 * Initialize the structures describing the struct. These descriptive
8 * structures are used by the HDF5 API.
9 */
10void
11blast_scores_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes,
12 hid_t *field_type);
13
14#endif // BLAST_SCORES_DATA_INIT_H

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.