001/* 002 * Copyright (c) 2004-2010 The Regents of the University of California. 003 * All rights reserved. 004 * 005 * '$Author: welker $' 006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 007 * '$Revision: 24234 $' 008 * 009 * Permission is hereby granted, without written agreement and without 010 * license or royalty fees, to use, copy, modify, and distribute this 011 * software and its documentation for any purpose, provided that the above 012 * copyright notice and the following two paragraphs appear in all copies 013 * of this software. 014 * 015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 019 * SUCH DAMAGE. 020 * 021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 026 * ENHANCEMENTS, OR MODIFICATIONS. 027 * 028 */ 029 030package org.sdm.spa.actors.piw.viz; 031 032import java.util.Arrays; 033 034public class Sequence { 035 036 /** 037 * Construct an instance with offset 0. 038 * 039 * @see Sequence#Sequence(String, String, String, 040 * TranscriptionFactorBindingSite[], int) 041 */ 042 public Sequence(String accessionNumberOriginal, String geneID, 043 String alignedSequence, TranscriptionFactorBindingSite[] arrTFBSs) { 044 045 this(accessionNumberOriginal, geneID, alignedSequence, arrTFBSs, 0); 046 } 047 048 /** 049 * Construct an instance. 050 * 051 * @param accessionNumberOriginal 052 * Accession number of the original sequence we submitted to 053 * BLAST. 054 * @param geneID 055 * Gene ID of this sequence. 056 * @param alignedSequence 057 * Sequence returned by ClustalW with gaps. 058 * @param arrTFBSs 059 * Transcription factor binding sites relative to this sequence 060 * with no gaps. 061 * @param offset 062 * How much this sequence is shifted. 063 */ 064 public Sequence(String accessionNumberOriginal, String geneID, 065 String alignedSequence, TranscriptionFactorBindingSite[] arrTFBSs, 066 int offset) { 067 068 // Remove useless trailing hyphens. 069 alignedSequence = alignedSequence.replaceFirst("-*$", ""); 070 071 this.accessionNumberOriginal = accessionNumberOriginal; 072 this.geneID = geneID; 073 this.alignedSequence = alignedSequence.replaceFirst("-*", ""); 074 this.arrTFBSs = arrTFBSs; 075 this.offset = alignedSequence.length() - this.alignedSequence.length() 076 + offset; 077 } 078 079 /** 080 * @param index 081 * Index on a sequence without gaps. 082 * @return Corresponding index on this sequence. 083 */ 084 public int getActualIndex(int index) { 085 086 int countLetters = 0; 087 088 int i = 0; 089 for (; i < this.alignedSequence.length(); i++) { 090 if ('-' != this.alignedSequence.charAt(i)) { 091 countLetters++; 092 if (index + 1 == countLetters) { 093 break; 094 } 095 } 096 } 097 098 return i + offset; 099 } 100 101 /** 102 * Find the subsequence, filling in hyphens for gaps. 103 * 104 * @param start 105 * Initial index. 106 * @param length 107 * Length of sequence to return. 108 * @return subsequence. 109 */ 110 public String subsequence(int start, int length) { 111 System.out.println(length); 112 int overlap1 = start > offset ? start : offset; 113 int overlap2 = this.alignedSequence.length() + offset > start + length ? start 114 + length 115 : this.alignedSequence.length() + offset; 116 117 String str = ""; 118 if (overlap2 <= overlap1) { 119 for (int i = 0; i < length; i++) { 120 str += "-"; 121 } 122 } else { 123 for (int i = start; i < overlap1; i++) { 124 str += "-"; 125 } 126 str += this.alignedSequence.substring(overlap1 - offset, overlap2 127 - offset); 128 for (int i = overlap2; i < start + length; i++) { 129 str += "-"; 130 } 131 } 132 System.out.println(str); 133 return str; 134 } 135 136 /** 137 * Returns the number of times the given tfbs appears in this sequence. 138 */ 139 public int getTotalNumParticularTFBS(String tfbs) { 140 int count = 0; 141 142 for (int i = 0; i < arrTFBSs.length; i++) { 143 if (arrTFBSs[i].name.equals(tfbs)) { 144 count++; 145 } 146 } 147 return count; 148 } 149 150 /** 151 * @return Length of sequence with gaps removed. 152 */ 153 public int lengthNoGaps() { 154 return this.alignedSequence.replaceAll("-", "").length(); 155 } 156 157 public String toString() { 158 return super.toString(); 159 } 160 161 public boolean equals(Object ob) { 162 if (!(ob instanceof Sequence)) { 163 return false; 164 } 165 166 Sequence seq = (Sequence) ob; 167 168 if (!this.accessionNumberOriginal.equals(seq.accessionNumberOriginal)) { 169 return false; 170 } else if (!this.geneID.equals(seq.geneID)) { 171 return false; 172 } else if (!this.alignedSequence.equals(seq.alignedSequence)) { 173 return false; 174 } else if (!Arrays.equals(this.arrTFBSs, seq.arrTFBSs)) { 175 return false; 176 } 177 178 return true; 179 } 180 181 public final String accessionNumberOriginal; 182 public final String geneID; 183 public final String alignedSequence; 184 public final TranscriptionFactorBindingSite[] arrTFBSs; 185 public final int offset; 186}