summaryrefslogtreecommitdiffstats
Unidiff
-rw-r--r--test/entropy/Align2Ref.m24
-rw-r--r--test/entropy/CalculateEntropy.m15
-rw-r--r--test/entropy/CalculateProteinEntropy.m25
-rw-r--r--test/entropy/FastNWalign2.c94
-rw-r--r--test/entropy/GenomeAlignments.m31
-rw-r--r--test/entropy/GenomePairwiseDist.m98
-rw-r--r--test/entropy/RefineAlignments.m276
-rw-r--r--test/entropy/don_anal.m40
-rw-r--r--test/entropy/nwalign_mod.m637
9 files changed, 1240 insertions, 0 deletions
diff --git a/test/entropy/GenomePairwiseDist.m b/test/entropy/GenomePairwiseDist.m
new file mode 100644
index 0000000..544db0a
--- a/dev/null
+++ b/test/entropy/GenomePairwiseDist.m
@@ -0,0 +1,98 @@
1function DIST_OUT = GenomePairwiseDist(IN_PROTS, ALIGN_NAME)
2% GenomePairwiseDist
3% This will align all of the protein segments in the genome structure
4% and return a cell-array. Each cell is the alignment of one
5% protein. If the protein is missing then the distance is NaN.
6%
7
8if nargin < 1
9 ALIGN_NAME = 'saved_alignments';
10end
11
12try
13 display('Loading Previous Alignments')
14 d = load(ALIGN_NAME);
15 saved_dists = d.DIST_CELL;
16 saved_headers = d.header_names;
17catch %#ok<CTCH>
18 display('No previous alignments found')
19 saved_headers = {};
20 saved_dists = zeros(numel(IN_PROTS));
21end
22
23NUM_LETTERS = 4000000;
24
25
26[header_names{1:length(IN_PROTS)}] = deal(IN_PROTS(:).Header);
27
28display('Starting Alignments')
29
30dist_mat = NaN(size(IN_PROTS,1));
31
32[tf loc]=ismember(header_names, saved_headers);
33
34dist_mat(tf,tf) = saved_dists(loc(tf), loc(tf));
35[I J] = find(triu(isnan(dist_mat)));
36inds = sub2ind(size(dist_mat), I, J);
37
38sizes = arrayfun(@(x)(length(x.Sequence)), IN_PROTS);
39
40%find all of the ones that are not worth aligning
41bad_mask = sizes(I) <= 1 | sizes(J) <= 1;
42dist_mat(inds(bad_mask)) = Inf;
43I(bad_mask) = [];
44J(bad_mask) = [];
45
46p_sizes = sizes(I)+sizes(J);
47counter = 1;
48display('Aligning...')
49tic
50while counter < length(I)
51 cum_sizes = cumsum(p_sizes(counter+1:end));
52 top = counter+find(cum_sizes<NUM_LETTERS,1, 'last');
53
54 I_spots = I(counter:top);
55 J_spots = J(counter:top);
56 need_align = cell(length(I_spots),2);
57 [need_align{:,1}] = deal(IN_PROTS(I_spots).Sequence);
58 [need_align{:,2}] = deal(IN_PROTS(J_spots).Sequence);
59 dist = zeros(size(I_spots));
60
61 %parfor k = 1:size(need_align,1)
62 for k = 1:size(need_align,1)
63 %display([num2str(k) ' ' num2str(toc)])
64 try
65 dist(k)=nwalign_mod(need_align{k,:});
66 catch %#ok<CTCH>
67 dist(k) = Inf;
68 end
69 end
70 time = toc;
71 inds = sub2ind(size(dist_mat), I_spots,J_spots);
72 dist_mat(inds) = dist;
73 counter = top+1;
74 display([num2str(counter/length(I)) ' in ' num2str(time) ' seconds'])
75end
76
77DIST_CELL = dist_mat;
78save(ALIGN_NAME, 'DIST_CELL', 'header_names')
79DIST_OUT = dfun(DIST_CELL);
80
81
82
83
84
85function out_d = dfun(MAT)
86
87score12 = triu(MAT,1);
88self_d = diag(MAT);
89d = (1-bsxfun(@rdivide, score12, self_d(:))).*(1-bsxfun(@rdivide, score12, self_d(:)'));
90
91out_d = triu(d,1) + triu(d,1)';
92
93
94
95
96
97
98

Valid XHTML 1.0 Strict

Copyright © 2009 Don Pellegrino All Rights Reserved.