From c9b5307e5d4acd555aaf8629915020e4a70bb652 Mon Sep 17 00:00:00 2001 From: Don Pellegrino Date: Mon, 18 Jan 2010 02:32:30 +0000 Subject: Added loading of the influenza.faa file. --- diff --git a/src/aggregator.c b/src/aggregator.c index 5fb9d4a..36ea18c 100644 --- a/src/aggregator.c +++ b/src/aggregator.c @@ -16,26 +16,26 @@ main () /* * Create the HDF5 file. */ - // hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); /* * Load the supplementary protein data file. */ - // load_influenza_aa_dat (file_id); + load_influenza_aa_dat (file_id); /* * Load the FASTA protein sequence data file. */ - // load_influenza_faa (file_id); + load_influenza_faa (file_id); /* * Close the HD5 file. */ - // herr_t status = H5Fclose (file_id); - // if (status < 0) - // check_h5_error (status, __FILE__, __LINE__); + herr_t status = H5Fclose (file_id); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); - assign_protein_type (0); + // assign_protein_type (0); return 0; } diff --git a/src/check_ncbi_error.c b/src/check_ncbi_error.c index 8e1c3b2..6071d1a 100644 --- a/src/check_ncbi_error.c +++ b/src/check_ncbi_error.c @@ -1,4 +1,7 @@ #include "check_ncbi_error.h" +#include +#include +#include void check_ncbi_error (ValNodePtr error_returns, diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c index 9ee3c46..f0d9ee5 100644 --- a/src/load_influenza_aa_dat.c +++ b/src/load_influenza_aa_dat.c @@ -158,7 +158,7 @@ load_influenza_aa_dat (hid_t file_id) current_line++; char *running = strdup (line); - char *token; + char *token = NULL; /* * Parse the line, handling the case of empty fields represented diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 8fd0cd7..61bb99d 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c @@ -1,9 +1,124 @@ #include "load_influenza_faa.h" +#include "check_error.h" +#include "check_h5_error.h" +#include "hdf5_hl.h" +#include +#include void load_influenza_faa (hid_t file_id) { + typedef struct + { + int gi; + char gb[9]; + char description[196]; + } sequence_data; + size_t dst_size = sizeof (sequence_data); + size_t dst_offset[3] = + { HOFFSET (sequence_data, gi), + HOFFSET (sequence_data, gb), + HOFFSET (sequence_data, description) + }; + + sequence_data dst_buf[1]; + + size_t dst_sizes[3] = { + sizeof (dst_buf[0].gi), + sizeof (dst_buf[0].gb), + sizeof (dst_buf[0].description) + }; + + hid_t field_type[3]; + + field_type[0] = H5T_NATIVE_INT; + + hid_t gb_type = H5Tcopy (H5T_C_S1); + H5Tset_size (gb_type, 9); + field_type[1] = gb_type; + + hid_t description_type = H5Tcopy (H5T_C_S1); + H5Tset_size (description_type, 196); + field_type[2] = description_type; + + const char *field_names[3] = { "GI", + "GB", + "Description" }; + + hsize_t chunk_size = 10; + int *fill_data = NULL; + int compress = 0; + + sequence_data p_data; + FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", + "r"); + if (dat == NULL) + check_error (__FILE__, __LINE__); + char *line = NULL; + size_t len = 0; + int current_line = 0; + + while (getline (&line, &len, dat) != -1) + { + current_line++; + + // Header line. + if (line[0] == '>') + { + char *running = strdup (line); + char *token = NULL; + + // Eat the ">gi". + strsep (&running, "|"); + + // GI value. + token = strsep (&running, "|"); + p_data.gi = atoi (token); + + // Eat the "gb" + strsep (&running, "|"); + + // GB value. + strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb)); + + // Description value. + strncpy (p_data.description, strsep (&running, "|"), + sizeof (p_data.description)); + + if (current_line == 1) + { + herr_t status = H5TBmake_table ("influenza.faa", file_id, + "influenza.faa", 3, 1, dst_size, + field_names, dst_offset, + field_type, chunk_size, + fill_data, compress, &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + else + { + herr_t status = + H5TBappend_records (file_id, "influenza.faa", 1, dst_size, + dst_offset, dst_sizes, &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + + if (running) + free (running); + + } + + } + + if (line) + free (line); + + fclose (dat); + + H5Tclose (gb_type); + H5Tclose (description_type); return; } -- cgit v0.8.3.1-22-g547a