summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-18 04:23:25 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-18 04:23:25 (GMT)
commit0d0e0886d17612fb7ebdb9110679d5b7bd5087be (patch) (unidiff)
tree5e7843ce019a3c3f057e5672127542794a27d645
parent6848b5e1aad3265278c728f8ae0849de31de4472 (diff)
downloadexp007-0d0e0886d17612fb7ebdb9110679d5b7bd5087be.zip
exp007-0d0e0886d17612fb7ebdb9110679d5b7bd5087be.tar.gz
exp007-0d0e0886d17612fb7ebdb9110679d5b7bd5087be.tar.bz2
Beginning of implmentation to iterate through and existing HDF5 and add calculate protein type field values based on BLAST queries. This code currently does not compile.
-rw-r--r--src/assign_protein_type.c29
-rw-r--r--src/load_influenza_aa_dat.c2
-rw-r--r--src/load_influenza_faa.c42
3 files changed, 59 insertions, 14 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c
index 54db84e..1b58f54 100644
--- a/src/assign_protein_type.c
+++ b/src/assign_protein_type.c
@@ -1,10 +1,12 @@
1#include "assign_protein_type.h"1#include "assign_protein_type.h"
2#include "check_ncbi_error.h"2#include "check_ncbi_error.h"
3#include "check_h5_error.h"
3#include <ncbi.h>4#include <ncbi.h>
4#include <readdb.h>5#include <readdb.h>
5#include <blast.h>6#include <blast.h>
6#include <salpacc.h>7#include <salpacc.h>
7#include <stdbool.h>8#include <stdbool.h>
9#include <hdf5_hl.h>
810
9/*11/*
10 * BLAST database containing all of the influenza protein sequences.12 * BLAST database containing all of the influenza protein sequences.
@@ -45,6 +47,33 @@ assign_protein_type (hid_t file_id)
45 ValNodePtr error_returns = NULL;47 ValNodePtr error_returns = NULL;
4648
47 /*49 /*
50 * Read the data from HDF5 file.
51 */
52 hsize_t nfields;
53 hsize_t nrecords;
54 herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields,
55 &nrecords);
56 if (status < 0)
57 check_h5_error (status, __FILE__, __LINE__);
58
59 /*
60 * todo: Allocate memory of nrecords for dst_buf.
61 *
62 * todo: Refactor code to share structres in read and write HDF5
63 * calls.
64 */
65
66 status = H5TBread_table (file_id, "influenza.faa", dst_size, dst_offset,
67 dst_sizes, dst_buf);
68 if (status < 0)
69 check_h5_error (status, __FILE__, __LINE__);
70
71 for (int i = 0; i < nrecords; i++)
72 {
73
74 }
75
76 /*
48 * Read the sequence from the database by GI.77 * Read the sequence from the database by GI.
49 */78 */
50 Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL);79 Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL);
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index f0d9ee5..aed33e8 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -8,7 +8,7 @@
8#include "load_influenza_aa_dat.h"8#include "load_influenza_aa_dat.h"
9#include "check_error.h"9#include "check_error.h"
10#include "check_h5_error.h"10#include "check_h5_error.h"
11#include "hdf5_hl.h"11#include <hdf5_hl.h>
12#include <string.h>12#include <string.h>
13#include <stdlib.h>13#include <stdlib.h>
1414
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c
index 61bb99d..749b7ad 100644
--- a/src/load_influenza_faa.c
+++ b/src/load_influenza_faa.c
@@ -5,6 +5,8 @@
5#include <string.h>5#include <string.h>
6#include <stdlib.h>6#include <stdlib.h>
77
8#define SEQUENCE_DATA_FIELD_NUM 4
9
8void10void
9load_influenza_faa (hid_t file_id)11load_influenza_faa (hid_t file_id)
10{12{
@@ -13,24 +15,27 @@ load_influenza_faa (hid_t file_id)
13 int gi;15 int gi;
14 char gb[9];16 char gb[9];
15 char description[196];17 char description[196];
18 char protein_type[7];
16 } sequence_data;19 } sequence_data;
1720
18 size_t dst_size = sizeof (sequence_data);21 size_t dst_size = sizeof (sequence_data);
19 size_t dst_offset[3] =22 size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] =
20 { HOFFSET (sequence_data, gi),23 { HOFFSET (sequence_data, gi),
21 HOFFSET (sequence_data, gb),24 HOFFSET (sequence_data, gb),
22 HOFFSET (sequence_data, description)25 HOFFSET (sequence_data, description),
26 HOFFSET (sequence_data, protein_type)
23 };27 };
2428
25 sequence_data dst_buf[1];29 sequence_data dst_buf[1];
2630
27 size_t dst_sizes[3] = {31 size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = {
28 sizeof (dst_buf[0].gi),32 sizeof (dst_buf[0].gi),
29 sizeof (dst_buf[0].gb),33 sizeof (dst_buf[0].gb),
30 sizeof (dst_buf[0].description)34 sizeof (dst_buf[0].description),
35 sizeof (dst_buf[0].protein_type)
31 };36 };
3237
33 hid_t field_type[3];38 hid_t field_type[SEQUENCE_DATA_FIELD_NUM];
3439
35 field_type[0] = H5T_NATIVE_INT;40 field_type[0] = H5T_NATIVE_INT;
3641
@@ -42,9 +47,15 @@ load_influenza_faa (hid_t file_id)
42 H5Tset_size (description_type, 196);47 H5Tset_size (description_type, 196);
43 field_type[2] = description_type;48 field_type[2] = description_type;
4449
45 const char *field_names[3] = { "GI",50 hid_t protein_type_type = H5Tcopy (H5T_C_S1);
46 "GB",51 H5Tset_size (protein_type_type, 7);
47 "Description" };52 field_type[3] = protein_type_type;
53
54 const char *field_names[SEQUENCE_DATA_FIELD_NUM] =
55 { "GI",
56 "GB",
57 "Description",
58 "Protein Type" };
4859
49 hsize_t chunk_size = 10;60 hsize_t chunk_size = 10;
50 int *fill_data = NULL;61 int *fill_data = NULL;
@@ -86,13 +97,17 @@ load_influenza_faa (hid_t file_id)
86 strncpy (p_data.description, strsep (&running, "|"),97 strncpy (p_data.description, strsep (&running, "|"),
87 sizeof (p_data.description));98 sizeof (p_data.description));
8899
100 strncpy (p_data.protein_type, "", sizeof (p_data.protein_type));
101
89 if (current_line == 1)102 if (current_line == 1)
90 {103 {
91 herr_t status = H5TBmake_table ("influenza.faa", file_id,104 herr_t status = H5TBmake_table ("influenza.faa", file_id,
92 "influenza.faa", 3, 1, dst_size,105 "influenza.faa",
93 field_names, dst_offset,106 SEQUENCE_DATA_FIELD_NUM, 1,
94 field_type, chunk_size,107 dst_size, field_names,
95 fill_data, compress, &p_data);108 dst_offset, field_type,
109 chunk_size, fill_data, compress,
110 &p_data);
96 if (status < 0)111 if (status < 0)
97 check_h5_error (status, __FILE__, __LINE__);112 check_h5_error (status, __FILE__, __LINE__);
98 }113 }
@@ -119,6 +134,7 @@ load_influenza_faa (hid_t file_id)
119134
120 H5Tclose (gb_type);135 H5Tclose (gb_type);
121 H5Tclose (description_type);136 H5Tclose (description_type);
137 H5Tclose (protein_type_type);
122138
123 return;139 return;
124}140}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.