# Explore the qualities of the year feature. require(hdf5); hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE); A <- influenza.aa.dat; B <- influenza.faa; # Join the two tables by GB value. C <- merge (A, B, by.x = "GenBank accession number", by.y = "GB"); # All records for 1918. Based on code from # http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations D <- C[C$Year == 1918, ] summary (D); # Countries represented in the 1918 dataset. D$Country; D[D$"Protein Type" == "HA", ] # All record with a year value. E <- A[A$Year != 0, ]; hist(E$Year);