summaryrefslogtreecommitdiffstats
path: root/src/load/load_influenza_faa.c (plain)
blob: 04bf05bcac28d3c62f90f7c2f4cd9a07efb3d16f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include "error/check_error.h"
#include "error/check_h5_error.h"
#include "load_influenza_faa.h"
#include "model/sequence_data.h"
#include "model/sequence_data_init.h"
#include <hdf5_hl.h>
#include <string.h>
#include <stdlib.h>

void
load_influenza_faa (hid_t file_id, const char* file_name)
{
  size_t dst_size;
  size_t dst_offset[SEQUENCE_DATA_FIELD_NUM];
  size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM];
  hid_t field_type[SEQUENCE_DATA_FIELD_NUM];

  sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type);

  hsize_t chunk_size = 10;
  int *fill_data = NULL;
  int compress = 0;

  sequence_data p_data;
  FILE *dat = fopen (file_name, "r");
  if (dat == NULL)
    check_error (__FILE__, __LINE__);
  char *line = NULL;
  size_t len = 0;
  int current_line = 0;

  while (getline (&line, &len, dat) != -1)
    {
      current_line++;

      // Header line.
      if (line[0] == '>')
	{
	  char *running = strdup (line);
	  char *token = NULL;

	  // Eat the ">gi".
	  strsep (&running, "|");

	  // GI value.
	  token = strsep (&running, "|");
	  p_data.gi = atoi (token);

	  // Eat the "gb"
	  strsep (&running, "|");

	  // GB value.
	  strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb));

	  // Description value.
	  strncpy (p_data.description, strsep (&running, "|"),
		   sizeof (p_data.description));

	  const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] =
	    SEQUENCE_DATA_FIELD_NAMES;

	  if (current_line == 1)
	    {
	      /*
	       * Dataset already exists.  Purge it.
	       */
	      if (H5LTfind_dataset (file_id, "influenza.faa") == 1)
		{
		  hsize_t nfields = 0;
		  hsize_t nrecords = 0;
		  herr_t status = H5TBget_table_info (file_id, "influenza.faa", &nfields,
						      &nrecords);
		  if (status < 0)
		    check_h5_error (status, __FILE__, __LINE__);

		  status = H5TBdelete_record (file_id, "influenza.faa", 0, nrecords);
		  if (status < 0)
		    check_h5_error (status, __FILE__, __LINE__);

		  status =
		    H5TBappend_records (file_id, "influenza.faa", 1, dst_size,
					dst_offset, dst_sizes, &p_data);
		  if (status < 0)
		    check_h5_error (status, __FILE__, __LINE__);
		}

	      /*
	       * Dataset does not exist.  Create it.
	       */
	      else
		{
		  herr_t status = H5TBmake_table ("influenza.faa", file_id,
						  "influenza.faa",
						  SEQUENCE_DATA_FIELD_NUM, 1,
						  dst_size, sequence_data_field_names,
						  dst_offset, field_type,
						  chunk_size, fill_data, compress,
						  &p_data);
		  if (status < 0)
		    check_h5_error (status, __FILE__, __LINE__);
		}
	    }

	  else
	    {
	      herr_t status =
		H5TBappend_records (file_id, "influenza.faa", 1, dst_size,
				    dst_offset, dst_sizes, &p_data);
	      if (status < 0)
		check_h5_error (status, __FILE__, __LINE__);
	    }

	  if (running)
	    free (running);

	}

    }

  if (line)
    free (line);

  fclose (dat);

  return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.