Updated the merge command to reflect the new table structure in the HDF5 file.

author: Don Pellegrino <don@drexel.edu> 2010-01-19 20:43:55 (GMT)
committer: Don Pellegrino <don@drexel.edu> 2010-01-19 20:43:55 (GMT)
commit: aae357374282d545f0a036c1c80b007247ff6067 (patch) (side-by-side diff)
tree: 458204c6968acf89a55fa080be0f535cef3fc9c6
parent: 9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8 (diff)
download: exp007-aae357374282d545f0a036c1c80b007247ff6067.zip
exp007-aae357374282d545f0a036c1c80b007247ff6067.tar.gz
exp007-aae357374282d545f0a036c1c80b007247ff6067.tar.bz2
1 files changed, 20 insertions, 9 deletions
diff --git a/analysis/year.R b/analysis/year.R
index 6d68925..37310d5 100644
--- a/analysis/year.R
+++ b/analysis/year.R
@@ -4,22 +4,33 @@ require(hdf5);
 
 hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE);
 
-A <- influenza.aa.dat;
-B <- influenza.faa;
-
-# Join the two tables by GB value.
-C <- merge (A, B, by.x = "GenBank accession number", by.y = "GB");
+A <- merge (influenza.aa.dat, influenza.faa,
+            by.x = "GenBank accession number",
+            by.y = "GB");
+
+B <- merge (A, gi.type.data,
+            by.x = "GI",
+            by.y = "GI");
+
+# Compare the local copy with a query performed on the NCBI database.
+# A quick check of the number of records returned and the first and
+# last set of GB values in sorted order should not show any
+# inconsistencies.
+T <- B[B$Year == 1978 & B$Type == "A" & B$Protein == "HA", ];
+nrow (T);
+U <- T$"GenBank accession number";
+sort (U);
 
 # All records for 1918.  Based on code from
 # http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations
-D <- C[C$Year == 1918, ]
+C <- B[B$Year == 1918, ]
 
-summary (D);
+summary (C);
 
 # Countries represented in the 1918 dataset.
-D$Country;
+C$Country;
 
-D[D$"Protein Type" == "HA", ]
+C[C$Protein == "HA", ]
 
 # All record with a year value.
 E <- A[A$Year != 0, ];
author	Don Pellegrino <don@drexel.edu>	2010-01-19 20:43:55 (GMT)
committer	Don Pellegrino <don@drexel.edu>	2010-01-19 20:43:55 (GMT)
commit	aae357374282d545f0a036c1c80b007247ff6067 (patch) (side-by-side diff)
tree	458204c6968acf89a55fa080be0f535cef3fc9c6
parent	9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8 (diff)
download	exp007-aae357374282d545f0a036c1c80b007247ff6067.zip exp007-aae357374282d545f0a036c1c80b007247ff6067.tar.gz exp007-aae357374282d545f0a036c1c80b007247ff6067.tar.bz2