2424package org .biojava .nbio .alignment ;
2525
2626import org .biojava .nbio .alignment .template .GapPenalty ;
27+ import org .biojava .nbio .alignment .template .PairwiseSequenceAligner ;
28+ import org .biojava .nbio .alignment .template .SequencePair ;
2729import org .biojava .nbio .alignment .template .SubstitutionMatrix ;
2830import org .biojava .nbio .core .exceptions .CompoundNotFoundException ;
2931import org .biojava .nbio .core .sequence .DNASequence ;
3537import org .junit .Before ;
3638import org .junit .Test ;
3739
38- import static org .junit .Assert .*;
40+ import static org .junit .Assert .assertEquals ;
41+ import static org .junit .Assert .assertFalse ;
42+ import static org .junit .Assert .assertTrue ;
3943
4044public class NeedlemanWunschTest {
4145
@@ -56,6 +60,30 @@ public void setup() throws CompoundNotFoundException {
5660 self = new NeedlemanWunsch <ProteinSequence , AminoAcidCompound >(query , query , gaps , blosum62 );
5761 }
5862
63+ @ Test
64+ public void testComplex () throws Exception {
65+
66+ short match = 2 , gop = -5 , gep = -3 ; // 2, 5, and 3 are coprime; -2 is the mismatch score
67+ SimpleSubstitutionMatrix <NucleotideCompound > mx = new SimpleSubstitutionMatrix <NucleotideCompound >(new DNACompoundSet (), match , (short )-match );
68+
69+ DNASequence a = new DNASequence ("CGTAT ATATCGCGCGCGCGATATATATATCT TCTCTAAAAAAA" .replaceAll (" " , "" ));
70+ DNASequence b = new DNASequence ("GGTATATATATCGCGCGCACGAT TATATATCTCTCTCTAAAAAAA" .replaceAll (" " , "" ));
71+ // --CGTATATATCGCGCGCGCGATATATATATCT-TCTCTAAAAAAA
72+ // GGTATATATATCGCGCGCACGAT-TATATATCTCTCTCTAAAAAAA
73+ // mismatches: ^ ^
74+ // The two alignments should have the same score. The bottom one is the one the aligner found.
75+
76+ PairwiseSequenceAligner <DNASequence , NucleotideCompound > aligner = Alignments .getPairwiseAligner (a , b , Alignments .PairwiseSequenceAlignerType .GLOBAL , new SimpleGapPenalty (gop , gep ), mx );
77+ SequencePair <DNASequence , NucleotideCompound > pair = aligner .getPair ();
78+ System .out .println (pair ); // prints the alignment above
79+
80+ int nMatches = "--CGTATATATCGCGCGCGCGATATATATATCT-TCTCTAAAAAAA" .length () - 2 - 4 ;
81+ double expectedScore = nMatches * match
82+ - 2 * match // there are two mismatches
83+ + 3 * gop + 4 * gep ; // there are 3 gap opens and either 1 or 4 extensions, depending on the def
84+ assertEquals (expectedScore , aligner .getScore (), 0.00000001 );
85+ }
86+
5987 @ Test
6088 public void testNeedlemanWunsch () {
6189 NeedlemanWunsch <ProteinSequence , AminoAcidCompound > nw =
@@ -124,16 +152,16 @@ public void testGetScoreMatrixAsString() {
124152 "R -12 -12 -24 -23%n" +
125153 "N -13 -13 -14 -24%n" +
126154 "D -14 -14 -15 -14%n" +
127- "%nInsertion%n" +
155+ "%nInsertion%n" +
128156 " R D G%n" +
129157 " -10 -11 -12 -13%n" +
130158 "A -\u221E -\u221E -12 -13%n" +
131159 "R -\u221E -\u221E -17 -14%n" +
132160 "N -\u221E -\u221E -23 -16%n" +
133161 "D -\u221E -\u221E -26 -17%n" ),
134- alignment .getScoreMatrixAsString ());
162+ alignment .getScoreMatrixAsString ());
135163 assertEquals (String .format (
136- "Substitution%n" +
164+ "Substitution%n" +
137165 " A R N D%n" +
138166 " 0 -\u221E -\u221E -\u221E -\u221E %n" +
139167 "A -\u221E 4 -12 -14 -15%n" +
@@ -144,7 +172,7 @@ public void testGetScoreMatrixAsString() {
144172 " A R N D%n" +
145173 " -10 -\u221E -\u221E -\u221E -\u221E %n" +
146174 "A -11 -\u221E -\u221E -\u221E -\u221E %n" +
147- "R -12 -7 -23 -25 -26%n" +
175+ "R -12 -7 -23 -25 -26%n" +
148176 "N -13 -8 -2 -18 -21%n" +
149177 "D -14 -9 -3 4 -12%n" +
150178 "%nInsertion%n" +
0 commit comments