-rw-r--r-- | src/load/load_asn.c | 173 | ||||
-rw-r--r-- | src/load/load_asn.h | 24 | ||||
-rw-r--r-- | src/load/load_features.c | 167 | ||||
-rw-r--r-- | src/load/load_features.h | 12 |
4 files changed, 376 insertions, 0 deletions
diff --git a/src/load/load_asn.c b/src/load/load_asn.c new file mode 100644 index 0000000..fc27d84 --- a/dev/null +++ b/src/load/load_asn.c @@ -0,0 +1,173 @@ +#define _GNU_SOURCE +#include "load_asn.h" +#include <string.h> +#include <asn.h> +#include <objgbseq.h> +#include <objsset.h> +#include <sqnutils.h> + +void +print_asn (ObjectIdPtr oid, SeqIdPtr id, ValNodePtr descr, SeqAnnotPtr annot) +{ + /* + * Print the record identifiers. + */ + printf (" IDENTIFIERS\n"); + printf (" -----------\n"); + while (oid != NULL) + { + printf("%i, %s\n", oid->id, oid->str); + } + while (id != NULL) + { + // printf ("ID: %i\n", id->choice); + + char idval[256]; + SeqIdPrint (id, idval, PRINTID_FASTA_SHORT); + printf (" %s\n", idval); + + // if (id->choice == SEQID_GI) + // printf ("GI: %i\n", id->data.intvalue); + + id = id->next; + } + + /* + * Print descriptions. + * [http://www.ncbi.nlm.nih.gov/IEB/ToolBox/SDKDOCS/BIOSEQ.HTML#_Seq-descr:_Describing_the] + */ + printf ("\n DESCRIPTIONS\n"); + printf (" ------------\n"); + while (descr != NULL) + { + switch (descr->choice) + { + case Seq_descr_title: + printf (" TITLE: %s\n", (char*)descr->data.ptrvalue); + break; + case Seq_descr_genbank: + printf (" GENBANK\n"); + break; + case Seq_descr_pub: + printf (" PUB\n"); + break; + case Seq_descr_create_date: + printf (" CREATE DATE\n"); + break; + case Seq_descr_update_date: + printf (" UPDATE DATE\n"); + break; + case Seq_descr_source: + printf (" BIOSOURCE\n"); + break; + case Seq_descr_molinfo: + printf (" MOLINFO\n"); + break; + default: + printf (" DESCRIPTION CHOICE=%i\n", descr->choice); + break; + } + + descr = descr->next; + } + + /* + * Print annotations. + */ + printf ("\n ANNOTATIONS\n"); + printf (" -----------\n"); + while (annot != NULL) + { + printf (" ANNOTATION: %s, ", annot->name); + if (annot->desc != NULL) { + switch (annot->desc->choice) + { + case Annot_descr_name: + printf (" NAME: %s\n", (char*)annot->desc->data.ptrvalue); + break; + default: + printf (" CHOICE=%i\n", annot->desc->choice); + break; + } + } + else + printf (" NONE\n"); + + annot = annot->next; + } + +} + +/* + * Based on example at + * [http://www.ncbi.nlm.nih.gov/IEB/ToolBox/SDKDOCS/SEQUTIL.HTML]. + */ +void +load_asn (hid_t file_id, const char* file_name) +{ + char* asn_file = strdup(file_name); + AsnIoPtr aip = AsnIoOpen (asn_file, "r"); + SeqEntryPtr sep = SeqEntryAsnRead (aip, NULL); + BioseqSetPtr bsetp = 0; + ValNodePtr descr = 0; + SeqAnnotPtr annot = 0; + SeqIdPtr id = 0; + ObjectIdPtr oid = 0; + + /* + * Data file statistics. + */ + printf ("NODES: %i\tBIOSEQS: %i\n", ValNodeLen (sep), BioseqCount (sep)); + printf ("\n"); + + /* + * This loop needs to be corrected to handle nesting of sets. + */ + + while (sep != NULL) + { + bsetp = (BioseqSetPtr) sep->data.ptrvalue; + if (bsetp != NULL) + { + oid = bsetp->id; + id = NULL; + descr = bsetp->descr; + annot = bsetp->annot; + } + + printf ("BIOSEQSET\n"); + printf ("\n"); + print_asn (oid, id, descr, annot); + printf ("\n"); + + /* + * Process Bioseqs in the set. + */ + SeqEntryPtr sep2 = bsetp->seq_set; + while (sep2 != NULL) + { + BioseqPtr bsp = sep2->data.ptrvalue; + if (bsp != NULL) + { + oid = NULL; + id = bsp->id; + descr = bsp->descr; + annot = bsp->annot; + + printf ("BIOSEQ\n"); + printf ("\n"); + print_asn (oid, id, descr, annot); + printf ("\n"); + } + + sep2 = sep2->next; + } + + sep = sep->next; + } + + AsnIoClose (aip); + free (asn_file); + + return; +} |