001/* 002 * Copyright (c) 2004-2010 The Regents of the University of California. 003 * All rights reserved. 004 * 005 * '$Author: welker $' 006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 007 * '$Revision: 24234 $' 008 * 009 * Permission is hereby granted, without written agreement and without 010 * license or royalty fees, to use, copy, modify, and distribute this 011 * software and its documentation for any purpose, provided that the above 012 * copyright notice and the following two paragraphs appear in all copies 013 * of this software. 014 * 015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 019 * SUCH DAMAGE. 020 * 021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 026 * ENHANCEMENTS, OR MODIFICATIONS. 027 * 028 */ 029 030package org.sdm.spa.actors.piw.viz; 031 032import java.util.Collections; 033import java.util.Comparator; 034import java.util.Vector; 035 036/** 037 * @author xiaowen 038 */ 039public class SequenceCollection { 040 041 private Sequence[] _sequences; 042 private int _lengthMinimumConsensus; 043 044 public SequenceCollection(Sequence[] sequences) { 045 this._sequences = sequences; 046 this._lengthMinimumConsensus = 1; 047 } 048 049 public Sequence getSequence(int index) { 050 return _sequences[index]; 051 } 052 053 public int size() { 054 return _sequences.length; 055 } 056 057 public void setOffset(int index, int offset) { 058 Sequence seq = _sequences[index]; 059 _sequences[index] = new Sequence(seq.accessionNumberOriginal, 060 seq.geneID, seq.alignedSequence, seq.arrTFBSs, offset); 061 } 062 063 public int getMaximumSequenceLength() { 064 int intLength = 0; 065 for (int i = 0; i < _sequences.length; i++) { 066 Sequence seq = _sequences[i]; 067 int intCurrentLength = seq.alignedSequence.length() + seq.offset; 068 if (intLength < intCurrentLength) { 069 intLength = intCurrentLength; 070 } 071 } 072 073 return intLength; 074 } 075 076 public void setMinimumConsensusLength(int length) { 077 this._lengthMinimumConsensus = length; 078 } 079 080 public int getMinimumConsensusLength() { 081 return _lengthMinimumConsensus; 082 } 083 084 /** Uses the sequences to construct the consensus sequence. */ 085 public String getConsensus() { 086 087 // Get the minimum length of all the sequences. 088 // The length of them should all be the same, 089 // but this will allow differences. 090 int intMinLength = -1; 091 for (int i = 0; i < _sequences.length; i++) { 092 Sequence seq = _sequences[i]; 093 int intCurrentLength = seq.alignedSequence.length() + seq.offset; 094 if (-1 == intMinLength || intMinLength > intCurrentLength) { 095 intMinLength = intCurrentLength; 096 } 097 } 098 099 // String buffer for the consensus sequence. 100 StringBuffer strConsensus = new StringBuffer(); 101 102 // Loop over the length of the sequences. 103 for (int index = 0; index < intMinLength; index++) { 104 Vector vecBases = new Vector(); 105 final int bin[] = new int[] { 0, 0, 0, 0, 0 }; 106 char trans[] = new char[] { 'A', 'G', 'C', 'T', '-' }; 107 char type[] = new char[] { 'R', 'R', 'Y', 'Y', '-' }; 108 int total = _sequences.length; 109 char ch; 110 111 for (int i = 0; i < _sequences.length; i++) { 112 Sequence seq = _sequences[i]; 113 114 char c = '-'; 115 if (index >= seq.offset) { 116 c = seq.alignedSequence.charAt(index - seq.offset); 117 } 118 119 Character chr = new Character(Character.toUpperCase(c)); 120 121 int id = chr.equals(new Character('A')) ? 0 : chr 122 .equals(new Character('G')) ? 1 : chr 123 .equals(new Character('C')) ? 2 : chr 124 .equals(new Character('T')) ? 3 : 4; 125 126 bin[id]++; 127 } 128 129 // Sort the indices by the number each contains. 130 Vector vecIndices = new Vector(); 131 for (int i = 0; i < 5; i++) { 132 vecIndices.add(new Integer(i)); 133 } 134 135 Collections.sort(vecIndices, new Comparator() { 136 public int compare(Object o1, Object o2) { 137 Integer i1 = (Integer) o1; 138 Integer i2 = (Integer) o2; 139 return bin[i2.intValue()] - bin[i1.intValue()]; 140 } 141 }); 142 143 // Convert vector to array. 144 int ind[] = new int[5]; 145 for (int i = 0; i < 5; i++) { 146 ind[i] = ((Integer) vecIndices.elementAt(i)).intValue(); 147 } 148 149 // Determine the letter to reach the consensus sequence. 150 if (bin[ind[0]] * 4 >= total * 3) { 151 // 75% or more are all one letter. 152 ch = trans[ind[0]]; 153 } else if (type[ind[0]] == type[ind[1]] 154 && (bin[ind[0]] + bin[ind[1]]) * 4 >= total * 3) { 155 // 75% or more are the same type. 156 ch = type[ind[0]]; 157 } else { 158 ch = '-'; 159 } 160 161 strConsensus.append(ch); 162 } 163 164 return strConsensus.toString(); 165 } 166}