summaryrefslogtreecommitdiffstats
authorDon Pellegrino <don@drexel.edu>2010-01-19 20:43:55 (GMT)
committer Don Pellegrino <don@drexel.edu>2010-01-19 20:43:55 (GMT)
commitaae357374282d545f0a036c1c80b007247ff6067 (patch) (unidiff)
tree458204c6968acf89a55fa080be0f535cef3fc9c6
parent9642c682be8bb2f1dd0eb616488ccaf2c7bb1ad8 (diff)
downloadexp007-aae357374282d545f0a036c1c80b007247ff6067.zip
exp007-aae357374282d545f0a036c1c80b007247ff6067.tar.gz
exp007-aae357374282d545f0a036c1c80b007247ff6067.tar.bz2
Updated the merge command to reflect the new table structure in the HDF5 file.
-rw-r--r--analysis/year.R29
1 files changed, 20 insertions, 9 deletions
diff --git a/analysis/year.R b/analysis/year.R
index 6d68925..37310d5 100644
--- a/analysis/year.R
+++ b/analysis/year.R
@@ -4,22 +4,33 @@ require(hdf5);
44
5hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE);5hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE);
66
7A <- influenza.aa.dat;7A <- merge (influenza.aa.dat, influenza.faa,
8B <- influenza.faa;8 by.x = "GenBank accession number",
99 by.y = "GB");
10# Join the two tables by GB value.10
11C <- merge (A, B, by.x = "GenBank accession number", by.y = "GB");11B <- merge (A, gi.type.data,
12 by.x = "GI",
13 by.y = "GI");
14
15# Compare the local copy with a query performed on the NCBI database.
16# A quick check of the number of records returned and the first and
17# last set of GB values in sorted order should not show any
18# inconsistencies.
19T <- B[B$Year == 1978 & B$Type == "A" & B$Protein == "HA", ];
20nrow (T);
21U <- T$"GenBank accession number";
22sort (U);
1223
13# All records for 1918. Based on code from24# All records for 1918. Based on code from
14# http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations25# http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations
15D <- C[C$Year == 1918, ]26C <- B[B$Year == 1918, ]
1627
17summary (D);28summary (C);
1829
19# Countries represented in the 1918 dataset.30# Countries represented in the 1918 dataset.
20D$Country;31C$Country;
2132
22D[D$"Protein Type" == "HA", ]33C[C$Protein == "HA", ]
2334
24# All record with a year value.35# All record with a year value.
25E <- A[A$Year != 0, ];36E <- A[A$Year != 0, ];

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.