001/*
002 * Copyright (c) 2004-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: welker $'
006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 
007 * '$Revision: 24234 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.sdm.spa.actors.piw.viz;
031
032import java.util.Collections;
033import java.util.Comparator;
034import java.util.Vector;
035
036/**
037 * @author xiaowen
038 */
039public class SequenceCollection {
040
041        private Sequence[] _sequences;
042        private int _lengthMinimumConsensus;
043
044        public SequenceCollection(Sequence[] sequences) {
045                this._sequences = sequences;
046                this._lengthMinimumConsensus = 1;
047        }
048
049        public Sequence getSequence(int index) {
050                return _sequences[index];
051        }
052
053        public int size() {
054                return _sequences.length;
055        }
056
057        public void setOffset(int index, int offset) {
058                Sequence seq = _sequences[index];
059                _sequences[index] = new Sequence(seq.accessionNumberOriginal,
060                                seq.geneID, seq.alignedSequence, seq.arrTFBSs, offset);
061        }
062
063        public int getMaximumSequenceLength() {
064                int intLength = 0;
065                for (int i = 0; i < _sequences.length; i++) {
066                        Sequence seq = _sequences[i];
067                        int intCurrentLength = seq.alignedSequence.length() + seq.offset;
068                        if (intLength < intCurrentLength) {
069                                intLength = intCurrentLength;
070                        }
071                }
072
073                return intLength;
074        }
075
076        public void setMinimumConsensusLength(int length) {
077                this._lengthMinimumConsensus = length;
078        }
079
080        public int getMinimumConsensusLength() {
081                return _lengthMinimumConsensus;
082        }
083
084        /** Uses the sequences to construct the consensus sequence. */
085        public String getConsensus() {
086
087                // Get the minimum length of all the sequences.
088                // The length of them should all be the same,
089                // but this will allow differences.
090                int intMinLength = -1;
091                for (int i = 0; i < _sequences.length; i++) {
092                        Sequence seq = _sequences[i];
093                        int intCurrentLength = seq.alignedSequence.length() + seq.offset;
094                        if (-1 == intMinLength || intMinLength > intCurrentLength) {
095                                intMinLength = intCurrentLength;
096                        }
097                }
098
099                // String buffer for the consensus sequence.
100                StringBuffer strConsensus = new StringBuffer();
101
102                // Loop over the length of the sequences.
103                for (int index = 0; index < intMinLength; index++) {
104                        Vector vecBases = new Vector();
105                        final int bin[] = new int[] { 0, 0, 0, 0, 0 };
106                        char trans[] = new char[] { 'A', 'G', 'C', 'T', '-' };
107                        char type[] = new char[] { 'R', 'R', 'Y', 'Y', '-' };
108                        int total = _sequences.length;
109                        char ch;
110
111                        for (int i = 0; i < _sequences.length; i++) {
112                                Sequence seq = _sequences[i];
113
114                                char c = '-';
115                                if (index >= seq.offset) {
116                                        c = seq.alignedSequence.charAt(index - seq.offset);
117                                }
118
119                                Character chr = new Character(Character.toUpperCase(c));
120
121                                int id = chr.equals(new Character('A')) ? 0 : chr
122                                                .equals(new Character('G')) ? 1 : chr
123                                                .equals(new Character('C')) ? 2 : chr
124                                                .equals(new Character('T')) ? 3 : 4;
125
126                                bin[id]++;
127                        }
128
129                        // Sort the indices by the number each contains.
130                        Vector vecIndices = new Vector();
131                        for (int i = 0; i < 5; i++) {
132                                vecIndices.add(new Integer(i));
133                        }
134
135                        Collections.sort(vecIndices, new Comparator() {
136                                public int compare(Object o1, Object o2) {
137                                        Integer i1 = (Integer) o1;
138                                        Integer i2 = (Integer) o2;
139                                        return bin[i2.intValue()] - bin[i1.intValue()];
140                                }
141                        });
142
143                        // Convert vector to array.
144                        int ind[] = new int[5];
145                        for (int i = 0; i < 5; i++) {
146                                ind[i] = ((Integer) vecIndices.elementAt(i)).intValue();
147                        }
148
149                        // Determine the letter to reach the consensus sequence.
150                        if (bin[ind[0]] * 4 >= total * 3) {
151                                // 75% or more are all one letter.
152                                ch = trans[ind[0]];
153                        } else if (type[ind[0]] == type[ind[1]]
154                                        && (bin[ind[0]] + bin[ind[1]]) * 4 >= total * 3) {
155                                // 75% or more are the same type.
156                                ch = type[ind[0]];
157                        } else {
158                                ch = '-';
159                        }
160
161                        strConsensus.append(ch);
162                }
163
164                return strConsensus.toString();
165        }
166}