summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-18 04:23:25 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-18 04:23:25 (GMT)
commit0d0e0886d17612fb7ebdb9110679d5b7bd5087be (patch) (side-by-side diff)
tree5e7843ce019a3c3f057e5672127542794a27d645
parent6848b5e1aad3265278c728f8ae0849de31de4472 (diff)
downloadexp007-0d0e0886d17612fb7ebdb9110679d5b7bd5087be.zip
exp007-0d0e0886d17612fb7ebdb9110679d5b7bd5087be.tar.gz
exp007-0d0e0886d17612fb7ebdb9110679d5b7bd5087be.tar.bz2
Beginning of implmentation to iterate through and existing HDF5 and add calculate protein type field values based on BLAST queries. This code currently does not compile.
-rw-r--r--src/assign_protein_type.c29
-rw-r--r--src/load_influenza_aa_dat.c2
-rw-r--r--src/load_influenza_faa.c42
3 files changed, 59 insertions, 14 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c
index 54db84e..1b58f54 100644
--- a/src/assign_protein_type.c
+++ b/src/assign_protein_type.c
@@ -1,10 +1,12 @@
#include "assign_protein_type.h"
#include "check_ncbi_error.h"
+#include "check_h5_error.h"
#include <ncbi.h>
#include <readdb.h>
#include <blast.h>
#include <salpacc.h>
#include <stdbool.h>
+#include <hdf5_hl.h>
/*
* BLAST database containing all of the influenza protein sequences.
@@ -45,6 +47,33 @@ assign_protein_type (hid_t file_id)
ValNodePtr error_returns = NULL;
/*
+ * Read the data from HDF5 file.
+ */
+ hsize_t nfields;
+ hsize_t nrecords;
+ herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields,
+ &nrecords);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+
+ /*
+ * todo: Allocate memory of nrecords for dst_buf.
+ *
+ * todo: Refactor code to share structres in read and write HDF5
+ * calls.
+ */
+
+ status = H5TBread_table (file_id, "influenza.faa", dst_size, dst_offset,
+ dst_sizes, dst_buf);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+
+ for (int i = 0; i < nrecords; i++)
+ {
+
+ }
+
+ /*
* Read the sequence from the database by GI.
*/
Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL);
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index f0d9ee5..aed33e8 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -8,7 +8,7 @@
#include "load_influenza_aa_dat.h"
#include "check_error.h"
#include "check_h5_error.h"
-#include "hdf5_hl.h"
+#include <hdf5_hl.h>
#include <string.h>
#include <stdlib.h>
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c
index 61bb99d..749b7ad 100644
--- a/src/load_influenza_faa.c
+++ b/src/load_influenza_faa.c
@@ -5,6 +5,8 @@
#include <string.h>
#include <stdlib.h>
+#define SEQUENCE_DATA_FIELD_NUM 4
+
void
load_influenza_faa (hid_t file_id)
{
@@ -13,24 +15,27 @@ load_influenza_faa (hid_t file_id)
int gi;
char gb[9];
char description[196];
+ char protein_type[7];
} sequence_data;
-
+
size_t dst_size = sizeof (sequence_data);
- size_t dst_offset[3] =
+ size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] =
{ HOFFSET (sequence_data, gi),
HOFFSET (sequence_data, gb),
- HOFFSET (sequence_data, description)
+ HOFFSET (sequence_data, description),
+ HOFFSET (sequence_data, protein_type)
};
sequence_data dst_buf[1];
- size_t dst_sizes[3] = {
+ size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = {
sizeof (dst_buf[0].gi),
sizeof (dst_buf[0].gb),
- sizeof (dst_buf[0].description)
+ sizeof (dst_buf[0].description),
+ sizeof (dst_buf[0].protein_type)
};
- hid_t field_type[3];
+ hid_t field_type[SEQUENCE_DATA_FIELD_NUM];
field_type[0] = H5T_NATIVE_INT;
@@ -42,9 +47,15 @@ load_influenza_faa (hid_t file_id)
H5Tset_size (description_type, 196);
field_type[2] = description_type;
- const char *field_names[3] = { "GI",
- "GB",
- "Description" };
+ hid_t protein_type_type = H5Tcopy (H5T_C_S1);
+ H5Tset_size (protein_type_type, 7);
+ field_type[3] = protein_type_type;
+
+ const char *field_names[SEQUENCE_DATA_FIELD_NUM] =
+ { "GI",
+ "GB",
+ "Description",
+ "Protein Type" };
hsize_t chunk_size = 10;
int *fill_data = NULL;
@@ -86,13 +97,17 @@ load_influenza_faa (hid_t file_id)
strncpy (p_data.description, strsep (&running, "|"),
sizeof (p_data.description));
+ strncpy (p_data.protein_type, "", sizeof (p_data.protein_type));
+
if (current_line == 1)
{
herr_t status = H5TBmake_table ("influenza.faa", file_id,
- "influenza.faa", 3, 1, dst_size,
- field_names, dst_offset,
- field_type, chunk_size,
- fill_data, compress, &p_data);
+ "influenza.faa",
+ SEQUENCE_DATA_FIELD_NUM, 1,
+ dst_size, field_names,
+ dst_offset, field_type,
+ chunk_size, fill_data, compress,
+ &p_data);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
}
@@ -119,6 +134,7 @@ load_influenza_faa (hid_t file_id)
H5Tclose (gb_type);
H5Tclose (description_type);
+ H5Tclose (protein_type_type);
return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.