#include "load_influenza_faa.h" #include "check_error.h" #include "check_h5_error.h" #include "hdf5_hl.h" #include #include #define SEQUENCE_DATA_FIELD_NUM 4 void load_influenza_faa (hid_t file_id) { typedef struct { int gi; char gb[9]; char description[196]; char protein_type[7]; } sequence_data; size_t dst_size = sizeof (sequence_data); size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] = { HOFFSET (sequence_data, gi), HOFFSET (sequence_data, gb), HOFFSET (sequence_data, description), HOFFSET (sequence_data, protein_type) }; sequence_data dst_buf[1]; size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = { sizeof (dst_buf[0].gi), sizeof (dst_buf[0].gb), sizeof (dst_buf[0].description), sizeof (dst_buf[0].protein_type) }; hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; field_type[0] = H5T_NATIVE_INT; hid_t gb_type = H5Tcopy (H5T_C_S1); H5Tset_size (gb_type, 9); field_type[1] = gb_type; hid_t description_type = H5Tcopy (H5T_C_S1); H5Tset_size (description_type, 196); field_type[2] = description_type; hid_t protein_type_type = H5Tcopy (H5T_C_S1); H5Tset_size (protein_type_type, 7); field_type[3] = protein_type_type; const char *field_names[SEQUENCE_DATA_FIELD_NUM] = { "GI", "GB", "Description", "Protein Type" }; hsize_t chunk_size = 10; int *fill_data = NULL; int compress = 0; sequence_data p_data; FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", "r"); if (dat == NULL) check_error (__FILE__, __LINE__); char *line = NULL; size_t len = 0; int current_line = 0; while (getline (&line, &len, dat) != -1) { current_line++; // Header line. if (line[0] == '>') { char *running = strdup (line); char *token = NULL; // Eat the ">gi". strsep (&running, "|"); // GI value. token = strsep (&running, "|"); p_data.gi = atoi (token); // Eat the "gb" strsep (&running, "|"); // GB value. strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb)); // Description value. strncpy (p_data.description, strsep (&running, "|"), sizeof (p_data.description)); strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); if (current_line == 1) { herr_t status = H5TBmake_table ("influenza.faa", file_id, "influenza.faa", SEQUENCE_DATA_FIELD_NUM, 1, dst_size, field_names, dst_offset, field_type, chunk_size, fill_data, compress, &p_data); if (status < 0) check_h5_error (status, __FILE__, __LINE__); } else { herr_t status = H5TBappend_records (file_id, "influenza.faa", 1, dst_size, dst_offset, dst_sizes, &p_data); if (status < 0) check_h5_error (status, __FILE__, __LINE__); } if (running) free (running); } } if (line) free (line); fclose (dat); H5Tclose (gb_type); H5Tclose (description_type); H5Tclose (protein_type_type); return; }