-rw-r--r-- | src/aggregator.c | 14 | ||||
-rw-r--r-- | src/check_ncbi_error.c | 3 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 2 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 115 |
4 files changed, 126 insertions, 8 deletions
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 8fd0cd7..61bb99d 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c @@ -1,9 +1,124 @@ #include "load_influenza_faa.h" +#include "check_error.h" +#include "check_h5_error.h" +#include "hdf5_hl.h" +#include <string.h> +#include <stdlib.h> void load_influenza_faa (hid_t file_id) { + typedef struct + { + int gi; + char gb[9]; + char description[196]; + } sequence_data; + size_t dst_size = sizeof (sequence_data); + size_t dst_offset[3] = + { HOFFSET (sequence_data, gi), + HOFFSET (sequence_data, gb), + HOFFSET (sequence_data, description) + }; + + sequence_data dst_buf[1]; + + size_t dst_sizes[3] = { + sizeof (dst_buf[0].gi), + sizeof (dst_buf[0].gb), + sizeof (dst_buf[0].description) + }; + + hid_t field_type[3]; + + field_type[0] = H5T_NATIVE_INT; + + hid_t gb_type = H5Tcopy (H5T_C_S1); + H5Tset_size (gb_type, 9); + field_type[1] = gb_type; + + hid_t description_type = H5Tcopy (H5T_C_S1); + H5Tset_size (description_type, 196); + field_type[2] = description_type; + + const char *field_names[3] = { "GI", + "GB", + "Description" }; + + hsize_t chunk_size = 10; + int *fill_data = NULL; + int compress = 0; + + sequence_data p_data; + FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", + "r"); + if (dat == NULL) + check_error (__FILE__, __LINE__); + char *line = NULL; + size_t len = 0; + int current_line = 0; + + while (getline (&line, &len, dat) != -1) + { + current_line++; + + // Header line. + if (line[0] == '>') + { + char *running = strdup (line); + char *token = NULL; + + // Eat the ">gi". + strsep (&running, "|"); + + // GI value. + token = strsep (&running, "|"); + p_data.gi = atoi (token); + + // Eat the "gb" + strsep (&running, "|"); + + // GB value. + strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb)); + + // Description value. + strncpy (p_data.description, strsep (&running, "|"), + sizeof (p_data.description)); + + if (current_line == 1) + { + herr_t status = H5TBmake_table ("influenza.faa", file_id, + "influenza.faa", 3, 1, dst_size, + field_names, dst_offset, + field_type, chunk_size, + fill_data, compress, &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + else + { + herr_t status = + H5TBappend_records (file_id, "influenza.faa", 1, dst_size, + dst_offset, dst_sizes, &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + + if (running) + free (running); + + } + + } + + if (line) + free (line); + + fclose (dat); + + H5Tclose (gb_type); + H5Tclose (description_type); return; } |