1 package com.larvalabs.starfish.examples.sequence;
2
3 import com.larvalabs.starfish.algorithm.*;
4 import com.larvalabs.starfish.algorithm.parameter.*;
5 import com.larvalabs.starfish.util.Uuid;
6 import jaligner.Alignment;
7 import jaligner.Matrix;
8 import jaligner.Sequence;
9 import jaligner.SmithWatermanGotoh;
10import jaligner.formats.Format;
11import jaligner.formats.Pair;
12import jaligner.util.MatrixLoader;
13import org.apache.commons.logging.Log;
14import org.apache.commons.logging.LogFactory;
15
16import java.io.*;
17
18
21public class JAlignAlgorithm extends Algorithm {
22
23
26 static Log log = LogFactory.getLog(JAlignAlgorithm.class);
27
28 private String keySequence;
30 private File dbSequenceFile;
31 private long dbSequenceLength;
32 private int segmentSize;
33 private int segmentOverlap;
34 private String scoringMatrix;
35 private float openGapPenalty;
36 private float extendGapPenalty;
37 private int desiredSegmentation;
38
39 public static final String PARAM_KEY_SEQUENCE = "key.sequence";
41 public static final String DESC_KEY_SEQUENCE = "Search Sequence";
42 public static final String PARAM_DB_SEQUENCE_FILENAME = "db.sequence.filename";
43 public static final String DESC_DB_SEQUENCE_FILENAME = "DB Sequence File";
44 public static final String PARAM_SCORING_MATRIX = "scoring.matrix";
45 public static final String DESC_SCORING_MATRIX = "Scoring Matrix";
46 public static final String PARAM_OPEN_GAP_PENALTY = "open.gap.penalty";
47 public static final String DESC_OPEN_GAP_PENALTY = "Open Gap Penalty";
48 public static final String PARAM_EXTEND_GAP_PENALTY = "extend.gap.penalty";
49 public static final String DESC_EXTEND_GAP_PENALTY = "Extend Gap Penalty";
50 public static final String PARAM_DESIRED_SEGMENTATION = "desired.segmentation";
51 public static final String DESC_DESIRED_SEGMENTATION = "Desired Segmentation";
52
53
56 public static final String[] SCORING_MATRICES =
57 {
58 "EDNAFULL",
59 "BLOSUM30",
60 "BLOSUM40",
61 "BLOSUM50",
62 "BLOSUM60",
63 "BLOSUM70",
64 "BLOSUM80",
65 "BLOSUM90"
66 };
67
68
71 public static final int DEFAULT_SEGMENTATION = 100;
72
73
77 public String getName() {
78 return "Smith-Waterman-Gotoh Sequence Alignment";
79 }
80
81
85 public void setParameters(ParameterSet parameterSet) {
86 StringType keyParam = new StringType(PARAM_KEY_SEQUENCE, DESC_KEY_SEQUENCE, true, 0, "[atgcATGC]*", "Key sequence must be a nucleic sequence.", null);
88 keyParam.setExpectingLargeInput(true);
89 parameterSet.addParameter(keyParam);
90 FileType fileParam = new FileType(PARAM_DB_SEQUENCE_FILENAME, DESC_DB_SEQUENCE_FILENAME, true, null, null);
92 parameterSet.addParameter(fileParam);
93 SelectorType scoreParam = new SelectorType(PARAM_SCORING_MATRIX, DESC_SCORING_MATRIX, true, SCORING_MATRICES, SCORING_MATRICES[0]);
95 parameterSet.addParameter(scoreParam);
96 FloatType openGapParam = new FloatType(PARAM_OPEN_GAP_PENALTY, DESC_OPEN_GAP_PENALTY, true, new Float(10f));
98 parameterSet.addParameter(openGapParam);
99 FloatType extendGapParam = new FloatType(PARAM_EXTEND_GAP_PENALTY, DESC_EXTEND_GAP_PENALTY, true, new Float(0.5f));
01 parameterSet.addParameter(extendGapParam);
02 IntegerType segmentationParam = new IntegerType(PARAM_DESIRED_SEGMENTATION, DESC_DESIRED_SEGMENTATION, false, null);
05 parameterSet.addParameter(segmentationParam);
06 }
07
08
13 public void initialize(Parameters parameters) {
14 keySequence = (String) parameters.getParameter(PARAM_KEY_SEQUENCE);
15 segmentOverlap = keySequence.length();
16 dbSequenceFile = (File) parameters.getParameter(PARAM_DB_SEQUENCE_FILENAME);
17 scoringMatrix = (String) parameters.getParameter(PARAM_SCORING_MATRIX);
18 openGapPenalty = ((Float) parameters.getParameter(PARAM_OPEN_GAP_PENALTY)).floatValue();
19 extendGapPenalty = ((Float) parameters.getParameter(PARAM_EXTEND_GAP_PENALTY)).floatValue();
20 Integer segmentation = (Integer)parameters.getParameter(PARAM_DESIRED_SEGMENTATION);
21 if (segmentation != null) {
22 desiredSegmentation = segmentation.intValue();
23 } else {
24 desiredSegmentation = DEFAULT_SEGMENTATION;
25 }
26 }
27
28
33 public int totalNumberOfSegments() {
34 dbSequenceLength = dbSequenceFile.length();
35 segmentSize = (int)Math.max(dbSequenceLength/desiredSegmentation, 10 * segmentOverlap);
36 log.debug("Segment size: " + segmentSize);
37 double k = 1 + ((double) dbSequenceLength - segmentSize) / (segmentSize - segmentOverlap);
38 return (int) Math.ceil(k);
39 }
40
41
45 public void generateSegmentInputData(SegmentInputData segmentInputData) {
46 try {
47 int k = totalNumberOfSegments();
48 BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(dbSequenceFile)));
49 long charsRemaining = dbSequenceLength;
50 char[] lastSegment = null;
51 long segmentStartIndex = 0;
52 for (int i = 0; i < k; i++) {
53 int currentSegmentSize = segmentSize;
54 if (segmentSize > charsRemaining) {
55 currentSegmentSize = (int) charsRemaining;
56 }
57 char[] segment = new char[currentSegmentSize];
58 int j = 0;
59 if (lastSegment != null) {
60 int lastIndex = lastSegment.length - segmentOverlap;
61 while (j < segmentOverlap) {
62 segment[j] = lastSegment[lastIndex];
63 j++;
64 lastIndex++;
65 }
66 }
67 int charsToRead = currentSegmentSize - j;
68 in.read(segment, j, charsToRead);
69 String segmentString = new String(segment);
70 segmentInputData.setSegmentInputData(i, new SequenceSegment(segmentString, segmentStartIndex - segmentOverlap));
71 charsRemaining -= charsToRead;
72 segmentStartIndex += charsToRead;
73 lastSegment = segment;
74 }
75 in.close();
76 } catch (Exception e) {
77 log.error("Could not read db sequence!", e);
78 }
79 }
80
81
88 public Serializable processSegment(int segmentNum, Object segmentData) {
89 try {
90 SequenceSegment segment = (SequenceSegment) segmentData;
91 String dbSequence = segment.getSequence();
92 Sequence s1 = new Sequence(keySequence, "Key", "Key Sequence", Sequence.NUCLEIC);
93 Sequence s2 = new Sequence(dbSequence, "DB", "Database Sequence", Sequence.NUCLEIC);
94 Matrix matrix = MatrixLoader.load(scoringMatrix);
95 Alignment alignment = SmithWatermanGotoh.align(s1, s2, matrix, openGapPenalty, extendGapPenalty);
96 alignment.setStart2((int)(alignment.getStart2() + segment.getStartIndex()));
97 AlignmentResult result = new AlignmentResult(alignment);
98 return result;
99 } catch (Exception e) {
00 log.error("Error processing alignment!", e);
01 return null;
02 }
03 }
04
05
11 public ProblemResult processResults(Uuid[] results, Resources resources) {
12 AlignmentResult best = null;
13 for (int i = 0; i < results.length; i++) {
14 Uuid uuid = (Uuid) results[i];
15 AlignmentResult result = (AlignmentResult) resources.getResource(uuid);
16 if ((best == null) || (best.getScore() < result.getScore())) {
17 best = result;
18 }
19 }
20 Alignment alignment = best.getAlignment();
21 log.debug("Start1: " + alignment.getStart1());
22 log.debug("Start2: " + alignment.getStart2());
23 Format format = new Pair();
24 log.debug("Score: " + alignment.getScore());
25 String output = "Score: " + alignment.getScore() + "\nAlignment:\n" + format.format(alignment);
26 log.debug(output);
27 return ProblemResult.createStringProblemResult(output);
28 }
29}
30