summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-18 02:32:30 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-18 02:32:30 (GMT)
commitc9b5307e5d4acd555aaf8629915020e4a70bb652 (patch) (side-by-side diff)
tree232b0355399bf7adb4ff7a512b4d8e5ecb2996e5
parent9627d495455f38eb564fe8b9355a1e90b8902295 (diff)
downloadexp007-c9b5307e5d4acd555aaf8629915020e4a70bb652.zip
exp007-c9b5307e5d4acd555aaf8629915020e4a70bb652.tar.gz
exp007-c9b5307e5d4acd555aaf8629915020e4a70bb652.tar.bz2
Added loading of the influenza.faa file.
-rw-r--r--src/aggregator.c14
-rw-r--r--src/check_ncbi_error.c3
-rw-r--r--src/load_influenza_aa_dat.c2
-rw-r--r--src/load_influenza_faa.c115
4 files changed, 126 insertions, 8 deletions
diff --git a/src/aggregator.c b/src/aggregator.c
index 5fb9d4a..36ea18c 100644
--- a/src/aggregator.c
+++ b/src/aggregator.c
@@ -16,26 +16,26 @@ main ()
/*
* Create the HDF5 file.
*/
- // hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+ hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
/*
* Load the supplementary protein data file.
*/
- // load_influenza_aa_dat (file_id);
+ load_influenza_aa_dat (file_id);
/*
* Load the FASTA protein sequence data file.
*/
- // load_influenza_faa (file_id);
+ load_influenza_faa (file_id);
/*
* Close the HD5 file.
*/
- // herr_t status = H5Fclose (file_id);
- // if (status < 0)
- // check_h5_error (status, __FILE__, __LINE__);
+ herr_t status = H5Fclose (file_id);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
- assign_protein_type (0);
+ // assign_protein_type (0);
return 0;
}
diff --git a/src/check_ncbi_error.c b/src/check_ncbi_error.c
index 8e1c3b2..6071d1a 100644
--- a/src/check_ncbi_error.c
+++ b/src/check_ncbi_error.c
@@ -1,4 +1,7 @@
#include "check_ncbi_error.h"
+#include <error.h>
+#include <stdlib.h>
+#include <blast.h>
void
check_ncbi_error (ValNodePtr error_returns,
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c
index 9ee3c46..f0d9ee5 100644
--- a/src/load_influenza_aa_dat.c
+++ b/src/load_influenza_aa_dat.c
@@ -158,7 +158,7 @@ load_influenza_aa_dat (hid_t file_id)
current_line++;
char *running = strdup (line);
- char *token;
+ char *token = NULL;
/*
* Parse the line, handling the case of empty fields represented
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c
index 8fd0cd7..61bb99d 100644
--- a/src/load_influenza_faa.c
+++ b/src/load_influenza_faa.c
@@ -1,9 +1,124 @@
#include "load_influenza_faa.h"
+#include "check_error.h"
+#include "check_h5_error.h"
+#include "hdf5_hl.h"
+#include <string.h>
+#include <stdlib.h>
void
load_influenza_faa (hid_t file_id)
{
+ typedef struct
+ {
+ int gi;
+ char gb[9];
+ char description[196];
+ } sequence_data;
+ size_t dst_size = sizeof (sequence_data);
+ size_t dst_offset[3] =
+ { HOFFSET (sequence_data, gi),
+ HOFFSET (sequence_data, gb),
+ HOFFSET (sequence_data, description)
+ };
+
+ sequence_data dst_buf[1];
+
+ size_t dst_sizes[3] = {
+ sizeof (dst_buf[0].gi),
+ sizeof (dst_buf[0].gb),
+ sizeof (dst_buf[0].description)
+ };
+
+ hid_t field_type[3];
+
+ field_type[0] = H5T_NATIVE_INT;
+
+ hid_t gb_type = H5Tcopy (H5T_C_S1);
+ H5Tset_size (gb_type, 9);
+ field_type[1] = gb_type;
+
+ hid_t description_type = H5Tcopy (H5T_C_S1);
+ H5Tset_size (description_type, 196);
+ field_type[2] = description_type;
+
+ const char *field_names[3] = { "GI",
+ "GB",
+ "Description" };
+
+ hsize_t chunk_size = 10;
+ int *fill_data = NULL;
+ int compress = 0;
+
+ sequence_data p_data;
+ FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa",
+ "r");
+ if (dat == NULL)
+ check_error (__FILE__, __LINE__);
+ char *line = NULL;
+ size_t len = 0;
+ int current_line = 0;
+
+ while (getline (&line, &len, dat) != -1)
+ {
+ current_line++;
+
+ // Header line.
+ if (line[0] == '>')
+ {
+ char *running = strdup (line);
+ char *token = NULL;
+
+ // Eat the ">gi".
+ strsep (&running, "|");
+
+ // GI value.
+ token = strsep (&running, "|");
+ p_data.gi = atoi (token);
+
+ // Eat the "gb"
+ strsep (&running, "|");
+
+ // GB value.
+ strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb));
+
+ // Description value.
+ strncpy (p_data.description, strsep (&running, "|"),
+ sizeof (p_data.description));
+
+ if (current_line == 1)
+ {
+ herr_t status = H5TBmake_table ("influenza.faa", file_id,
+ "influenza.faa", 3, 1, dst_size,
+ field_names, dst_offset,
+ field_type, chunk_size,
+ fill_data, compress, &p_data);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+ }
+ else
+ {
+ herr_t status =
+ H5TBappend_records (file_id, "influenza.faa", 1, dst_size,
+ dst_offset, dst_sizes, &p_data);
+ if (status < 0)
+ check_h5_error (status, __FILE__, __LINE__);
+ }
+
+ if (running)
+ free (running);
+
+ }
+
+ }
+
+ if (line)
+ free (line);
+
+ fclose (dat);
+
+ H5Tclose (gb_type);
+ H5Tclose (description_type);
return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.