summaryrefslogtreecommitdiffstats
path: root/src/load/load_features.c (plain)
blob: b18031a45ceb66e1d3333a01ed4cb63da63b9c01
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include "load_features.h"
#include <libxml/parser.h>
#include <stdbool.h>
#include <asn.h>
#include <objgbseq.h>

/*
 * An NCBI GBSeq structure to hold the data for the current record.
 */
GBSeqPtr g;

bool in_element;

static void
lf_startDocument (void *ctx ATTRIBUTE_UNUSED)
{
  printf ("SAX.startDocument()\n");

  return;
}

static void
lf_endDocument (void *ctx ATTRIBUTE_UNUSED)
{
  printf ("SAX.endDocument()\n");

  return;
}

static xmlEntityPtr
lf_getEntity (void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
{
  printf("SAX.getEntity(%s)\n", name);

  return (NULL);
}

static void
lf_startElement(void *ctx ATTRIBUTE_UNUSED,
		const xmlChar *name, const xmlChar **atts)
{
  int i;

  fprintf(stdout, "SAX.startElement(%s", (char *) name);
  if (atts != NULL) {
    for (i = 0;(atts[i] != NULL);i++) {
      fprintf(stdout, ", %s='", atts[i++]);
      if (atts[i] != NULL)
	fprintf(stdout, "%s'", atts[i]);
    }
  }
  fprintf(stdout, ")\n");

  in_element = true;

  return;
}

static void
lf_endElement(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
{
  printf("SAX.endElement(%s)\n", (char *) name);
  in_element = false;

  return;
}

static void
lf_characters(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
{
  if (!in_element)
    return;

  char output[40];
  int i;

  for (i = 0;(i<len) && (i < 30);i++)
    output[i] = ch[i];
  output[i] = 0;

  printf("SAX.characters(%s, %d)\n", output, len);
}

/*
 * 1. Parse and load the XML file into memory.
 * 2. Insert the XML into HDF5.
 * For an example of parsing XML with libxml2 and SAX see:
 * [http://git.gnome.org/browse/libxml2/tree/testSAX.c].
 */
void
load_features (hid_t file_id, const char* file_name)
{
  g = GBSeqNew ();

  LIBXML_TEST_VERSION;

  in_element = false;

  static xmlSAXHandler emptySAXHandlerStruct = {
    NULL, /* internalSubset */
    NULL, /* isStandalone */
    NULL, /* hasInternalSubset */
    NULL, /* hasExternalSubset */
    NULL, /* resolveEntity */
    lf_getEntity, /* getEntity */
    NULL, /* entityDecl */
    NULL, /* notationDecl */
    NULL, /* attributeDecl */
    NULL, /* elementDecl */
    NULL, /* unparsedEntityDecl */
    NULL, /* setDocumentLocator */
    lf_startDocument, /* startDocument */
    lf_endDocument, /* endDocument */
    lf_startElement, /* startElement */
    lf_endElement, /* endElement */
    NULL, /* reference */
    lf_characters, /* characters */
    NULL, /* ignorableWhitespace */
    NULL, /* processingInstruction */
    NULL, /* comment */
    NULL, /* xmlParserWarning */
    NULL, /* xmlParserError */
    NULL, /* xmlParserError */
    NULL, /* getParameterEntity */
    NULL, /* cdataBlock; */
    NULL, /* externalSubset; */
    1,
    NULL,
    NULL, /* startElementNs */
    NULL, /* endElementNs */
    NULL  /* xmlStructuredErrorFunc */
  };

  static xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;

  FILE *f = fopen (file_name, "r");

  if (f != NULL)
    {
      int ret;
      char chars[10];
      xmlParserCtxtPtr ctxt;

      ret = fread (chars, 1, 4, f);
      if (ret > 0)
	{
	  ctxt = xmlCreatePushParserCtxt (emptySAXHandler, NULL,
					  chars, ret, file_name);
	  while ((ret = fread(chars, 1, 3, f)) > 0)
	    {
	      xmlParseChunk (ctxt, chars, ret, 0);
	    }
	  xmlParseChunk (ctxt, chars, 0, 1);
	  xmlFreeParserCtxt(ctxt);
	}
      fclose (f);
    }
  else
    {
      xmlGenericError (xmlGenericErrorContext,
		       "Cannot read file.");
    }

  GBSeqFree (g);

  return;
}

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.