-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | src/Makefile.am | 10 | ||||
-rw-r--r-- | src/assign_protein_type.c | 135 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 67 | ||||
-rw-r--r-- | src/sequence_data.h | 16 | ||||
-rw-r--r-- | src/sequence_data_init.c | 37 | ||||
-rw-r--r-- | src/sequence_data_init.h | 14 |
7 files changed, 188 insertions, 96 deletions
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 749b7ad..fd35254 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c @@ -1,62 +1,22 @@ -#include "load_influenza_faa.h" #include "check_error.h" #include "check_h5_error.h" -#include "hdf5_hl.h" +#include "load_influenza_faa.h" +#include "sequence_data.h" +#include "sequence_data_init.h" +#include <hdf5_hl.h> #include <string.h> #include <stdlib.h> -#define SEQUENCE_DATA_FIELD_NUM 4 - void load_influenza_faa (hid_t file_id) { - typedef struct - { - int gi; - char gb[9]; - char description[196]; - char protein_type[7]; - } sequence_data; - - size_t dst_size = sizeof (sequence_data); - size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] = - { HOFFSET (sequence_data, gi), - HOFFSET (sequence_data, gb), - HOFFSET (sequence_data, description), - HOFFSET (sequence_data, protein_type) - }; - - sequence_data dst_buf[1]; - - size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = { - sizeof (dst_buf[0].gi), - sizeof (dst_buf[0].gb), - sizeof (dst_buf[0].description), - sizeof (dst_buf[0].protein_type) - }; - + size_t dst_size; + size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; + size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM]; hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; - field_type[0] = H5T_NATIVE_INT; - - hid_t gb_type = H5Tcopy (H5T_C_S1); - H5Tset_size (gb_type, 9); - field_type[1] = gb_type; - - hid_t description_type = H5Tcopy (H5T_C_S1); - H5Tset_size (description_type, 196); - field_type[2] = description_type; - - hid_t protein_type_type = H5Tcopy (H5T_C_S1); - H5Tset_size (protein_type_type, 7); - field_type[3] = protein_type_type; - - const char *field_names[SEQUENCE_DATA_FIELD_NUM] = - { "GI", - "GB", - "Description", - "Protein Type" }; - + sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type); + hsize_t chunk_size = 10; int *fill_data = NULL; int compress = 0; @@ -99,12 +59,15 @@ load_influenza_faa (hid_t file_id) strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); + const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] = + SEQUENCE_DATA_FIELD_NAMES; + if (current_line == 1) { herr_t status = H5TBmake_table ("influenza.faa", file_id, "influenza.faa", SEQUENCE_DATA_FIELD_NUM, 1, - dst_size, field_names, + dst_size, sequence_data_field_names, dst_offset, field_type, chunk_size, fill_data, compress, &p_data); @@ -132,9 +95,5 @@ load_influenza_faa (hid_t file_id) fclose (dat); - H5Tclose (gb_type); - H5Tclose (description_type); - H5Tclose (protein_type_type); - return; } |