From d2dda50ed620d93cb1c3c9705b3379c3507b8d9c Mon Sep 17 00:00:00 2001 From: Don Pellegrino Date: Sat, 23 Jan 2010 23:00:22 +0000 Subject: Routines to load the results of a BLAST run into the HDF5 file. --- diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c new file mode 100644 index 0000000..42e6bd9 --- a/dev/null +++ b/src/load/load_blast_scores.c @@ -0,0 +1,119 @@ +#include "error/check_error.h" +#include "error/check_h5_error.h" +#include "model/blast_scores_data.h" +#include "model/blast_scores_data_init.h" +#include "load_blast_scores.h" +#include +#include +#include + +void +load_blast_scores (hid_t file_id, const char *file_name) +{ + size_t dst_size; + size_t dst_offset[BLAST_SCORES_DATA_FIELD_NUM]; + size_t dst_sizes[BLAST_SCORES_DATA_FIELD_NUM]; + hid_t field_type[BLAST_SCORES_DATA_FIELD_NUM]; + + blast_scores_data_init (&dst_size, dst_offset, dst_sizes, field_type); + + hsize_t chunk_size = 10; + int *fill_data = NULL; + int compress = 0; + + blast_scores_data p_data[1000]; + FILE *dat = fopen (file_name, "r"); + if (dat == NULL) + check_error (__FILE__, __LINE__); + char *line = NULL; + size_t len = 0; + int current_line = 0; + int i = -1; + + while (getline (&line, &len, dat) != -1) + { + current_line++; + i++; + + char *running = strdup (line); + char *token = NULL; + + token = strsep (&running, ","); + p_data[i].source_gi = atoi (&token[4]); + + token = strsep (&running, ","); + p_data[i].source_start = atoi (token); + + token = strsep (&running, ","); + p_data[i].source_end = atoi (token); + + token = strsep (&running, ","); + p_data[i].target_gi = atoi (&token[4]); + + token = strsep (&running, ","); + p_data[i].target_start = atoi (token); + + token = strsep (&running, ","); + p_data[i].target_end = atoi (token); + + token = strsep (&running, ","); + p_data[i].score = atoi (token); + + token = strsep (&running, ","); + p_data[i].bit_score = strtod (token, NULL); + + token = strsep (&running, ","); + p_data[i].evalue = strtod (token, NULL); + + if (current_line == 1) + { + + const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] = + BLAST_SCORES_DATA_FIELD_NAMES; + + herr_t status = H5TBmake_table ("blast", file_id, + "blast", + BLAST_SCORES_DATA_FIELD_NUM, 1, + dst_size, + blast_scores_data_field_names, + dst_offset, field_type, + chunk_size, fill_data, + compress, + &p_data); + + if (status < 0) + check_h5_error (__FILE__, __LINE__); + + } + + if ((i % 1000 == 0) && (i > 0)) + { + + herr_t status = + H5TBappend_records (file_id, "blast", 1000, + dst_size, dst_offset, dst_sizes, + &p_data[0]); + if(status < 0) + check_h5_error (__FILE__, __LINE__); + + status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); + if (status < 0) + check_h5_error (__FILE__, __LINE__); + + printf ("Processed %i of records.\n", current_line); + + i = -1; + } + + if (running) + free (running); + + } // End for each line of the input file. + + if (line) + free (line); + + fclose (dat); + + return; +} diff --git a/src/load/load_blast_scores.h b/src/load/load_blast_scores.h new file mode 100644 index 0000000..e41968d --- a/dev/null +++ b/src/load/load_blast_scores.h @@ -0,0 +1,11 @@ +#ifndef LOAD_BLAST_SCORES_H +#define LOAD_BLAST_SCORES_H + +#include + +/* + * Load the results of a BLAST run into the HDF5 container. + */ +void load_blast_scores (hid_t file_id, const char *file_name); + +#endif // LOAD_BLAST_SCORES_H diff --git a/src/model/blast_scores_data.h b/src/model/blast_scores_data.h new file mode 100644 index 0000000..8c4aaef --- a/dev/null +++ b/src/model/blast_scores_data.h @@ -0,0 +1,21 @@ +#ifndef BLAST_SCORES_DATA_H +#define BLAST_SCORES_DATA_H + +#define BLAST_SCORES_DATA_FIELD_NUM 9 + +#define BLAST_SCORES_DATA_FIELD_NAMES { "Source GI", "Source Start", "Source End", "Target GI", "Target Start", "Target End", "Score", "Bit Score", "Evalue" } + +typedef struct +{ + int source_gi; + int source_start; + int source_end; + int target_gi; + int target_start; + int target_end; + int score; + double bit_score; + double evalue; +} blast_scores_data; + +#endif // BLAST_SCORES_DATA_H diff --git a/src/model/blast_scores_data_init.c b/src/model/blast_scores_data_init.c new file mode 100644 index 0000000..14fa6f0 --- a/dev/null +++ b/src/model/blast_scores_data_init.c @@ -0,0 +1,43 @@ +#include "blast_scores_data_init.h" +#include "blast_scores_data.h" + +void +blast_scores_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, + hid_t *field_type) +{ + *dst_size = sizeof (blast_scores_data); + + dst_offset[0] = HOFFSET (blast_scores_data, source_gi); + dst_offset[1] = HOFFSET (blast_scores_data, source_start); + dst_offset[2] = HOFFSET (blast_scores_data, source_end); + dst_offset[3] = HOFFSET (blast_scores_data, target_gi); + dst_offset[4] = HOFFSET (blast_scores_data, target_start); + dst_offset[5] = HOFFSET (blast_scores_data, target_end); + dst_offset[6] = HOFFSET (blast_scores_data, score); + dst_offset[7] = HOFFSET (blast_scores_data, bit_score); + dst_offset[8] = HOFFSET (blast_scores_data, evalue); + + blast_scores_data dst_buf[1]; + + dst_sizes[0] = sizeof (dst_buf[0].source_gi); + dst_sizes[1] = sizeof (dst_buf[0].source_start); + dst_sizes[2] = sizeof (dst_buf[0].source_end); + dst_sizes[3] = sizeof (dst_buf[0].target_gi); + dst_sizes[4] = sizeof (dst_buf[0].target_start); + dst_sizes[5] = sizeof (dst_buf[0].target_end); + dst_sizes[6] = sizeof (dst_buf[0].score); + dst_sizes[7] = sizeof (dst_buf[0].bit_score); + dst_sizes[8] = sizeof (dst_buf[0].evalue); + + field_type[0] = H5T_NATIVE_INT; + field_type[1] = H5T_NATIVE_INT; + field_type[2] = H5T_NATIVE_INT; + field_type[3] = H5T_NATIVE_INT; + field_type[4] = H5T_NATIVE_INT; + field_type[5] = H5T_NATIVE_INT; + field_type[6] = H5T_NATIVE_INT; + field_type[7] = H5T_NATIVE_DOUBLE; + field_type[8] = H5T_NATIVE_DOUBLE; + + return; +} diff --git a/src/model/blast_scores_data_init.h b/src/model/blast_scores_data_init.h new file mode 100644 index 0000000..cae6edd --- a/dev/null +++ b/src/model/blast_scores_data_init.h @@ -0,0 +1,14 @@ +#ifndef BLAST_SCORES_DATA_INIT_H +#define BLAST_SCORES_DATA_INIT_H + +#include + +/* + * Initialize the structures describing the struct. These descriptive + * structures are used by the HDF5 API. + */ +void +blast_scores_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, + hid_t *field_type); + +#endif // BLAST_SCORES_DATA_INIT_H -- cgit v0.8.3.1-22-g547a