From 0d0e0886d17612fb7ebdb9110679d5b7bd5087be Mon Sep 17 00:00:00 2001 From: Don Pellegrino Date: Mon, 18 Jan 2010 04:23:25 +0000 Subject: Beginning of implmentation to iterate through and existing HDF5 and add calculate protein type field values based on BLAST queries. This code currently does not compile. --- diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c index 54db84e..1b58f54 100644 --- a/src/assign_protein_type.c +++ b/src/assign_protein_type.c @@ -1,10 +1,12 @@ #include "assign_protein_type.h" #include "check_ncbi_error.h" +#include "check_h5_error.h" #include #include #include #include #include +#include /* * BLAST database containing all of the influenza protein sequences. @@ -45,6 +47,33 @@ assign_protein_type (hid_t file_id) ValNodePtr error_returns = NULL; /* + * Read the data from HDF5 file. + */ + hsize_t nfields; + hsize_t nrecords; + herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields, + &nrecords); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + + /* + * todo: Allocate memory of nrecords for dst_buf. + * + * todo: Refactor code to share structres in read and write HDF5 + * calls. + */ + + status = H5TBread_table (file_id, "influenza.faa", dst_size, dst_offset, + dst_sizes, dst_buf); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + + for (int i = 0; i < nrecords; i++) + { + + } + + /* * Read the sequence from the database by GI. */ Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL); diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c index f0d9ee5..aed33e8 100644 --- a/src/load_influenza_aa_dat.c +++ b/src/load_influenza_aa_dat.c @@ -8,7 +8,7 @@ #include "load_influenza_aa_dat.h" #include "check_error.h" #include "check_h5_error.h" -#include "hdf5_hl.h" +#include #include #include diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 61bb99d..749b7ad 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c @@ -5,6 +5,8 @@ #include #include +#define SEQUENCE_DATA_FIELD_NUM 4 + void load_influenza_faa (hid_t file_id) { @@ -13,24 +15,27 @@ load_influenza_faa (hid_t file_id) int gi; char gb[9]; char description[196]; + char protein_type[7]; } sequence_data; - + size_t dst_size = sizeof (sequence_data); - size_t dst_offset[3] = + size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] = { HOFFSET (sequence_data, gi), HOFFSET (sequence_data, gb), - HOFFSET (sequence_data, description) + HOFFSET (sequence_data, description), + HOFFSET (sequence_data, protein_type) }; sequence_data dst_buf[1]; - size_t dst_sizes[3] = { + size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = { sizeof (dst_buf[0].gi), sizeof (dst_buf[0].gb), - sizeof (dst_buf[0].description) + sizeof (dst_buf[0].description), + sizeof (dst_buf[0].protein_type) }; - hid_t field_type[3]; + hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; field_type[0] = H5T_NATIVE_INT; @@ -42,9 +47,15 @@ load_influenza_faa (hid_t file_id) H5Tset_size (description_type, 196); field_type[2] = description_type; - const char *field_names[3] = { "GI", - "GB", - "Description" }; + hid_t protein_type_type = H5Tcopy (H5T_C_S1); + H5Tset_size (protein_type_type, 7); + field_type[3] = protein_type_type; + + const char *field_names[SEQUENCE_DATA_FIELD_NUM] = + { "GI", + "GB", + "Description", + "Protein Type" }; hsize_t chunk_size = 10; int *fill_data = NULL; @@ -86,13 +97,17 @@ load_influenza_faa (hid_t file_id) strncpy (p_data.description, strsep (&running, "|"), sizeof (p_data.description)); + strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); + if (current_line == 1) { herr_t status = H5TBmake_table ("influenza.faa", file_id, - "influenza.faa", 3, 1, dst_size, - field_names, dst_offset, - field_type, chunk_size, - fill_data, compress, &p_data); + "influenza.faa", + SEQUENCE_DATA_FIELD_NUM, 1, + dst_size, field_names, + dst_offset, field_type, + chunk_size, fill_data, compress, + &p_data); if (status < 0) check_h5_error (status, __FILE__, __LINE__); } @@ -119,6 +134,7 @@ load_influenza_faa (hid_t file_id) H5Tclose (gb_type); H5Tclose (description_type); + H5Tclose (protein_type_type); return; } -- cgit v0.8.3.1-22-g547a