#include "load_influenza_faa.h" #include "check_error.h" #include "check_h5_error.h" #include "hdf5_hl.h" #include #include void load_influenza_faa (hid_t file_id) { typedef struct { int gi; char gb[9]; char description[196]; } sequence_data; size_t dst_size = sizeof (sequence_data); size_t dst_offset[3] = { HOFFSET (sequence_data, gi), HOFFSET (sequence_data, gb), HOFFSET (sequence_data, description) }; sequence_data dst_buf[1]; size_t dst_sizes[3] = { sizeof (dst_buf[0].gi), sizeof (dst_buf[0].gb), sizeof (dst_buf[0].description) }; hid_t field_type[3]; field_type[0] = H5T_NATIVE_INT; hid_t gb_type = H5Tcopy (H5T_C_S1); H5Tset_size (gb_type, 9); field_type[1] = gb_type; hid_t description_type = H5Tcopy (H5T_C_S1); H5Tset_size (description_type, 196); field_type[2] = description_type; const char *field_names[3] = { "GI", "GB", "Description" }; hsize_t chunk_size = 10; int *fill_data = NULL; int compress = 0; sequence_data p_data; FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", "r"); if (dat == NULL) check_error (__FILE__, __LINE__); char *line = NULL; size_t len = 0; int current_line = 0; while (getline (&line, &len, dat) != -1) { current_line++; // Header line. if (line[0] == '>') { char *running = strdup (line); char *token = NULL; // Eat the ">gi". strsep (&running, "|"); // GI value. token = strsep (&running, "|"); p_data.gi = atoi (token); // Eat the "gb" strsep (&running, "|"); // GB value. strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb)); // Description value. strncpy (p_data.description, strsep (&running, "|"), sizeof (p_data.description)); if (current_line == 1) { herr_t status = H5TBmake_table ("influenza.faa", file_id, "influenza.faa", 3, 1, dst_size, field_names, dst_offset, field_type, chunk_size, fill_data, compress, &p_data); if (status < 0) check_h5_error (status, __FILE__, __LINE__); } else { herr_t status = H5TBappend_records (file_id, "influenza.faa", 1, dst_size, dst_offset, dst_sizes, &p_data); if (status < 0) check_h5_error (status, __FILE__, __LINE__); } if (running) free (running); } } if (line) free (line); fclose (dat); H5Tclose (gb_type); H5Tclose (description_type); return; }