summaryrefslogtreecommitdiffstats
path: root/src/load/load_blast_scores.c (plain)
blob: 42e6bd93d8f2960d38a6e2baeddc5d268dcdfaf1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include "error/check_error.h"
#include "error/check_h5_error.h"
#include "model/blast_scores_data.h"
#include "model/blast_scores_data_init.h"
#include "load_blast_scores.h"
#include <hdf5_hl.h>
#include <string.h>
#include <stdlib.h>

void
load_blast_scores (hid_t file_id, const char *file_name)
{
  size_t dst_size;
  size_t dst_offset[BLAST_SCORES_DATA_FIELD_NUM];
  size_t dst_sizes[BLAST_SCORES_DATA_FIELD_NUM];
  hid_t field_type[BLAST_SCORES_DATA_FIELD_NUM];

  blast_scores_data_init (&dst_size, dst_offset, dst_sizes, field_type);

  hsize_t chunk_size = 10;
  int *fill_data = NULL;
  int compress = 0;

  blast_scores_data p_data[1000];
  FILE *dat = fopen (file_name, "r");
  if (dat == NULL)
    check_error (__FILE__, __LINE__);
  char *line = NULL;
  size_t len = 0;
  int current_line = 0;
  int i = -1;

  while (getline (&line, &len, dat) != -1)
    {
      current_line++;
      i++;

      char *running = strdup (line);
      char *token = NULL;

      token = strsep (&running, ",");
      p_data[i].source_gi = atoi (&token[4]);

      token = strsep (&running, ",");
      p_data[i].source_start = atoi (token);

      token = strsep (&running, ",");
      p_data[i].source_end = atoi (token);

      token = strsep (&running, ",");
      p_data[i].target_gi = atoi (&token[4]);

      token = strsep (&running, ",");
      p_data[i].target_start = atoi (token);

      token = strsep (&running, ",");
      p_data[i].target_end = atoi (token);

      token = strsep (&running, ",");
      p_data[i].score = atoi (token);

      token = strsep (&running, ",");
      p_data[i].bit_score = strtod (token, NULL);

      token = strsep (&running, ",");
      p_data[i].evalue = strtod (token, NULL);

      if (current_line == 1)
	{

	  const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] =
	    BLAST_SCORES_DATA_FIELD_NAMES;

	  herr_t status = H5TBmake_table ("blast", file_id,
					  "blast",
					  BLAST_SCORES_DATA_FIELD_NUM, 1,
					  dst_size,
					  blast_scores_data_field_names,
					  dst_offset, field_type,
					  chunk_size, fill_data,
					  compress,
					  &p_data);

	  if (status < 0)
	    check_h5_error (__FILE__, __LINE__);

	}

      if ((i % 1000 == 0) && (i > 0))
	{

	  herr_t status =
	    H5TBappend_records (file_id, "blast", 1000,
				dst_size, dst_offset, dst_sizes,
				&p_data[0]);
	  if(status < 0)
	    check_h5_error (__FILE__, __LINE__);

	  status = H5Fflush (file_id, H5F_SCOPE_GLOBAL);
	  if (status < 0)
	    check_h5_error (__FILE__, __LINE__);

	  printf ("Processed %i of records.\n", current_line);

	  i = -1;
	}

      if (running)
	free (running);

    } // End for each line of the input file.

  if (line)
    free (line);

  fclose (dat);

  return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.