summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-18 02:32:30 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-18 02:32:30 (GMT)
commitc9b5307e5d4acd555aaf8629915020e4a70bb652 (patch) (unidiff)
tree232b0355399bf7adb4ff7a512b4d8e5ecb2996e5
parent9627d495455f38eb564fe8b9355a1e90b8902295 (diff)
downloadexp007-c9b5307e5d4acd555aaf8629915020e4a70bb652.zip
exp007-c9b5307e5d4acd555aaf8629915020e4a70bb652.tar.gz
exp007-c9b5307e5d4acd555aaf8629915020e4a70bb652.tar.bz2
Added loading of the influenza.faa file.
-rw-r--r--src/aggregator.c14
-rw-r--r--src/check_ncbi_error.c3
-rw-r--r--src/load_influenza_aa_dat.c2
-rw-r--r--src/load_influenza_faa.c115
4 files changed, 126 insertions, 8 deletions
diff --git a/src/aggregator.c b/src/aggregator.c
index 5fb9d4a..36ea18c 100644
--- a/src/aggregator.c
+++ b/src/aggregator.c
@@ -16,26 +16,26 @@ main ()
16 /*16 /*
17 * Create the HDF5 file.17 * Create the HDF5 file.
18 */18 */
19 // hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);19 hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
2020
21 /*21 /*
22 * Load the supplementary protein data file.22 * Load the supplementary protein data file.
23 */23 */
24 // load_influenza_aa_dat (file_id);24 load_influenza_aa_dat (file_id);
2525
26 /*26 /*
27 * Load the FASTA protein sequence data file.27 * Load the FASTA protein sequence data file.
28 */28 */
29 // load_influenza_faa (file_id);29 load_influenza_faa (file_id);
3030
31 /*31 /*
32 * Close the HD5 file.32 * Close the HD5 file.
33 */33 */
34 // herr_t status = H5Fclose (file_id);34 herr_t status = H5Fclose (file_id);
35 // if (status < 0)35 if (status < 0)
36 // check_h5_error (status, __FILE__, __LINE__);36 check_h5_error (status, __FILE__, __LINE__);
3737
38 assign_protein_type (0);38 // assign_protein_type (0);
3939
40 return 0;40 return 0;
41}41}
diff --git a/src/check_ncbi_error.c b/src/check_ncbi_error.c
index 8e1c3b2..6071d1a 100644
--- a/src/check_ncbi_error.c
+++ b/src/check_ncbi_error.c
@@ -1,4 +1,7 @@
1#include "check_ncbi_error.h"1#include "check_ncbi_error.h"
2#include <error.h>
3#include <stdlib.h>
4#include <blast.h>
25
3void6void
4check_ncbi_error (ValNodePtr error_returns,7check_ncbi_error (ValNodePtr error_returns,
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index 9ee3c46..f0d9ee5 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -158,7 +158,7 @@ load_influenza_aa_dat (hid_t file_id)
158158
159 current_line++;159 current_line++;
160 char *running = strdup (line);160 char *running = strdup (line);
161 char *token;161 char *token = NULL;
162162
163 /*163 /*
164 * Parse the line, handling the case of empty fields represented164 * Parse the line, handling the case of empty fields represented
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c
index 8fd0cd7..61bb99d 100644
--- a/src/load_influenza_faa.c
+++ b/src/load_influenza_faa.c
@@ -1,9 +1,124 @@
1#include "load_influenza_faa.h"1#include "load_influenza_faa.h"
2#include "check_error.h"
3#include "check_h5_error.h"
4#include "hdf5_hl.h"
5#include <string.h>
6#include <stdlib.h>
27
3void8void
4load_influenza_faa (hid_t file_id)9load_influenza_faa (hid_t file_id)
5{10{
11 typedef struct
12 {
13 int gi;
14 char gb[9];
15 char description[196];
16 } sequence_data;
617
18 size_t dst_size = sizeof (sequence_data);
19 size_t dst_offset[3] =
20 { HOFFSET (sequence_data, gi),
21 HOFFSET (sequence_data, gb),
22 HOFFSET (sequence_data, description)
23 };
24
25 sequence_data dst_buf[1];
26
27 size_t dst_sizes[3] = {
28 sizeof (dst_buf[0].gi),
29 sizeof (dst_buf[0].gb),
30 sizeof (dst_buf[0].description)
31 };
32
33 hid_t field_type[3];
34
35 field_type[0] = H5T_NATIVE_INT;
36
37 hid_t gb_type = H5Tcopy (H5T_C_S1);
38 H5Tset_size (gb_type, 9);
39 field_type[1] = gb_type;
40
41 hid_t description_type = H5Tcopy (H5T_C_S1);
42 H5Tset_size (description_type, 196);
43 field_type[2] = description_type;
44
45 const char *field_names[3] = { "GI",
46 "GB",
47 "Description" };
48
49 hsize_t chunk_size = 10;
50 int *fill_data = NULL;
51 int compress = 0;
52
53 sequence_data p_data;
54 FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa",
55 "r");
56 if (dat == NULL)
57 check_error (__FILE__, __LINE__);
58 char *line = NULL;
59 size_t len = 0;
60 int current_line = 0;
61
62 while (getline (&line, &len, dat) != -1)
63 {
64 current_line++;
65
66 // Header line.
67 if (line[0] == '>')
68 {
69 char *running = strdup (line);
70 char *token = NULL;
71
72 // Eat the ">gi".
73 strsep (&running, "|");
74
75 // GI value.
76 token = strsep (&running, "|");
77 p_data.gi = atoi (token);
78
79 // Eat the "gb"
80 strsep (&running, "|");
81
82 // GB value.
83 strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb));
84
85 // Description value.
86 strncpy (p_data.description, strsep (&running, "|"),
87 sizeof (p_data.description));
88
89 if (current_line == 1)
90 {
91 herr_t status = H5TBmake_table ("influenza.faa", file_id,
92 "influenza.faa", 3, 1, dst_size,
93 field_names, dst_offset,
94 field_type, chunk_size,
95 fill_data, compress, &p_data);
96 if (status < 0)
97 check_h5_error (status, __FILE__, __LINE__);
98 }
99 else
100 {
101 herr_t status =
102 H5TBappend_records (file_id, "influenza.faa", 1, dst_size,
103 dst_offset, dst_sizes, &p_data);
104 if (status < 0)
105 check_h5_error (status, __FILE__, __LINE__);
106 }
107
108 if (running)
109 free (running);
110
111 }
112
113 }
114
115 if (line)
116 free (line);
117
118 fclose (dat);
119
120 H5Tclose (gb_type);
121 H5Tclose (description_type);
7122
8 return;123 return;
9}124}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.