001/*
002 * Copyright (c) 2004-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: welker $'
006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 
007 * '$Revision: 24234 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.sdm.spa.actors.piw.viz;
031
032import java.util.Arrays;
033
034public class Sequence {
035
036        /**
037         * Construct an instance with offset 0.
038         * 
039         * @see Sequence#Sequence(String, String, String,
040         *      TranscriptionFactorBindingSite[], int)
041         */
042        public Sequence(String accessionNumberOriginal, String geneID,
043                        String alignedSequence, TranscriptionFactorBindingSite[] arrTFBSs) {
044
045                this(accessionNumberOriginal, geneID, alignedSequence, arrTFBSs, 0);
046        }
047
048        /**
049         * Construct an instance.
050         * 
051         * @param accessionNumberOriginal
052         *            Accession number of the original sequence we submitted to
053         *            BLAST.
054         * @param geneID
055         *            Gene ID of this sequence.
056         * @param alignedSequence
057         *            Sequence returned by ClustalW with gaps.
058         * @param arrTFBSs
059         *            Transcription factor binding sites relative to this sequence
060         *            with no gaps.
061         * @param offset
062         *            How much this sequence is shifted.
063         */
064        public Sequence(String accessionNumberOriginal, String geneID,
065                        String alignedSequence, TranscriptionFactorBindingSite[] arrTFBSs,
066                        int offset) {
067
068                // Remove useless trailing hyphens.
069                alignedSequence = alignedSequence.replaceFirst("-*$", "");
070
071                this.accessionNumberOriginal = accessionNumberOriginal;
072                this.geneID = geneID;
073                this.alignedSequence = alignedSequence.replaceFirst("-*", "");
074                this.arrTFBSs = arrTFBSs;
075                this.offset = alignedSequence.length() - this.alignedSequence.length()
076                                + offset;
077        }
078
079        /**
080         * @param index
081         *            Index on a sequence without gaps.
082         * @return Corresponding index on this sequence.
083         */
084        public int getActualIndex(int index) {
085
086                int countLetters = 0;
087
088                int i = 0;
089                for (; i < this.alignedSequence.length(); i++) {
090                        if ('-' != this.alignedSequence.charAt(i)) {
091                                countLetters++;
092                                if (index + 1 == countLetters) {
093                                        break;
094                                }
095                        }
096                }
097
098                return i + offset;
099        }
100
101        /**
102         * Find the subsequence, filling in hyphens for gaps.
103         * 
104         * @param start
105         *            Initial index.
106         * @param length
107         *            Length of sequence to return.
108         * @return subsequence.
109         */
110        public String subsequence(int start, int length) {
111                System.out.println(length);
112                int overlap1 = start > offset ? start : offset;
113                int overlap2 = this.alignedSequence.length() + offset > start + length ? start
114                                + length
115                                : this.alignedSequence.length() + offset;
116
117                String str = "";
118                if (overlap2 <= overlap1) {
119                        for (int i = 0; i < length; i++) {
120                                str += "-";
121                        }
122                } else {
123                        for (int i = start; i < overlap1; i++) {
124                                str += "-";
125                        }
126                        str += this.alignedSequence.substring(overlap1 - offset, overlap2
127                                        - offset);
128                        for (int i = overlap2; i < start + length; i++) {
129                                str += "-";
130                        }
131                }
132                System.out.println(str);
133                return str;
134        }
135
136        /**
137         * Returns the number of times the given tfbs appears in this sequence.
138         */
139        public int getTotalNumParticularTFBS(String tfbs) {
140                int count = 0;
141
142                for (int i = 0; i < arrTFBSs.length; i++) {
143                        if (arrTFBSs[i].name.equals(tfbs)) {
144                                count++;
145                        }
146                }
147                return count;
148        }
149
150        /**
151         * @return Length of sequence with gaps removed.
152         */
153        public int lengthNoGaps() {
154                return this.alignedSequence.replaceAll("-", "").length();
155        }
156
157        public String toString() {
158                return super.toString();
159        }
160
161        public boolean equals(Object ob) {
162                if (!(ob instanceof Sequence)) {
163                        return false;
164                }
165
166                Sequence seq = (Sequence) ob;
167
168                if (!this.accessionNumberOriginal.equals(seq.accessionNumberOriginal)) {
169                        return false;
170                } else if (!this.geneID.equals(seq.geneID)) {
171                        return false;
172                } else if (!this.alignedSequence.equals(seq.alignedSequence)) {
173                        return false;
174                } else if (!Arrays.equals(this.arrTFBSs, seq.arrTFBSs)) {
175                        return false;
176                }
177
178                return true;
179        }
180
181        public final String accessionNumberOriginal;
182        public final String geneID;
183        public final String alignedSequence;
184        public final TranscriptionFactorBindingSite[] arrTFBSs;
185        public final int offset;
186}