1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
#include "load_influenza_faa.h"
#include "check_error.h"
#include "check_h5_error.h"
#include "hdf5_hl.h"
#include <string.h>
#include <stdlib.h>
void
load_influenza_faa (hid_t file_id)
{
typedef struct
{
int gi;
char gb[9];
char description[196];
} sequence_data;
size_t dst_size = sizeof (sequence_data);
size_t dst_offset[3] =
{ HOFFSET (sequence_data, gi),
HOFFSET (sequence_data, gb),
HOFFSET (sequence_data, description)
};
sequence_data dst_buf[1];
size_t dst_sizes[3] = {
sizeof (dst_buf[0].gi),
sizeof (dst_buf[0].gb),
sizeof (dst_buf[0].description)
};
hid_t field_type[3];
field_type[0] = H5T_NATIVE_INT;
hid_t gb_type = H5Tcopy (H5T_C_S1);
H5Tset_size (gb_type, 9);
field_type[1] = gb_type;
hid_t description_type = H5Tcopy (H5T_C_S1);
H5Tset_size (description_type, 196);
field_type[2] = description_type;
const char *field_names[3] = { "GI",
"GB",
"Description" };
hsize_t chunk_size = 10;
int *fill_data = NULL;
int compress = 0;
sequence_data p_data;
FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa",
"r");
if (dat == NULL)
check_error (__FILE__, __LINE__);
char *line = NULL;
size_t len = 0;
int current_line = 0;
while (getline (&line, &len, dat) != -1)
{
current_line++;
// Header line.
if (line[0] == '>')
{
char *running = strdup (line);
char *token = NULL;
// Eat the ">gi".
strsep (&running, "|");
// GI value.
token = strsep (&running, "|");
p_data.gi = atoi (token);
// Eat the "gb"
strsep (&running, "|");
// GB value.
strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb));
// Description value.
strncpy (p_data.description, strsep (&running, "|"),
sizeof (p_data.description));
if (current_line == 1)
{
herr_t status = H5TBmake_table ("influenza.faa", file_id,
"influenza.faa", 3, 1, dst_size,
field_names, dst_offset,
field_type, chunk_size,
fill_data, compress, &p_data);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
}
else
{
herr_t status =
H5TBappend_records (file_id, "influenza.faa", 1, dst_size,
dst_offset, dst_sizes, &p_data);
if (status < 0)
check_h5_error (status, __FILE__, __LINE__);
}
if (running)
free (running);
}
}
if (line)
free (line);
fclose (dat);
H5Tclose (gb_type);
H5Tclose (description_type);
return;
}
|