#define _GNU_SOURCE #include "error/check_error.h" #include "error/check_h5_error.h" #include "model/blast_scores_data.h" #include "model/blast_scores_data_init.h" #include "load_blast_scores.h" #include #include #include #include /* * A simple sanity check can be performed by comparing the HDF5 * content with the content of the input text file. Data block output * from the first command should be equal to the first 5 lines of the * input file while data block output from the second command should * be equal to the last 5 lines of the input file. * * h5dump --dataset=blast --start "0" --count "5" influenza.h5 * * h5dump --dataset=blast --start "5749892" --count "5" influenza.h5 */ void load_blast_scores (hid_t file_id, const char *file_name) { size_t dst_size; size_t dst_offset[BLAST_SCORES_DATA_FIELD_NUM]; size_t dst_sizes[BLAST_SCORES_DATA_FIELD_NUM]; hid_t field_type[BLAST_SCORES_DATA_FIELD_NUM]; blast_scores_data_init (&dst_size, dst_offset, dst_sizes, field_type); hsize_t chunk_size = 10; int *fill_data = NULL; int compress = 0; blast_scores_data p_data[10000]; FILE *dat = fopen (file_name, "r"); if (dat == NULL) check_error (__FILE__, __LINE__); char *line = NULL; size_t len = 0; int i = -1; int written = 0; int current_line = 0; while (getline (&line, &len, dat) != -1) { current_line++; i++; char *running = strdup (line); char *token = NULL; token = strsep (&running, ","); p_data[i].source_gi = atoi (&token[4]); token = strsep (&running, ","); p_data[i].source_start = atoi (token); token = strsep (&running, ","); p_data[i].source_end = atoi (token); token = strsep (&running, ","); p_data[i].target_gi = atoi (&token[4]); token = strsep (&running, ","); p_data[i].target_start = atoi (token); token = strsep (&running, ","); p_data[i].target_end = atoi (token); token = strsep (&running, ","); p_data[i].score = atoi (token); token = strsep (&running, ","); p_data[i].bit_score = strtod (token, NULL); token = strsep (&running, ","); p_data[i].evalue = strtod (token, NULL); if (written == 0) { printf ("Creating blast table.\n"); const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] = BLAST_SCORES_DATA_FIELD_NAMES; herr_t status = H5TBmake_table ("blast", file_id, "blast", BLAST_SCORES_DATA_FIELD_NUM, 1, dst_size, blast_scores_data_field_names, dst_offset, field_type, chunk_size, fill_data, compress, &p_data); if (status < 0) check_h5_error (__FILE__, __LINE__); written = 1; i = -1; current_line = 0; } if ((current_line % 10000 == 0) && (current_line > 0)) { herr_t status = H5TBappend_records (file_id, "blast", 10000, dst_size, dst_offset, dst_sizes, &p_data[0]); if(status < 0) check_h5_error (__FILE__, __LINE__); status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); if (status < 0) check_h5_error (__FILE__, __LINE__); printf ("Processed %i records.\n", current_line); written += 10000; i = -1; } if (running) free (running); } // End for each line of the input file. if (i >= 0) { herr_t status = H5TBappend_records (file_id, "blast", i+1, dst_size, dst_offset, dst_sizes, &p_data[0]); if (status < 0) check_h5_error (__FILE__, __LINE__); printf ("Wrote %i records.\n", written + i + 1); } herr_t status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); if (status < 0) check_h5_error (__FILE__, __LINE__); if (line) free (line); fclose (dat); return; }