001/* An actor that converts fastq format file to fasta format.
002
003 Copyright (c) 2003-2010 The Regents of the University of California.
004 All rights reserved.
005 Permission is hereby granted, without written agreement and without
006 license or royalty fees, to use, copy, modify, and distribute this
007 software and its documentation for any purpose, provided that the above
008 copyright notice and the following two paragraphs appear in all copies
009 of this software.
010
011 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
012 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
013 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
014 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
015 SUCH DAMAGE.
016
017 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
018 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
019 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
020 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
021 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
022 ENHANCEMENTS, OR MODIFICATIONS.
023
024 PT_COPYRIGHT_VERSION_2
025 COPYRIGHTENDKEY
026 */
027
028package org.camera.service;
029
030import static org.biojavax.bio.seq.RichSequence.Tools.createRichSequence;
031
032import java.io.File;
033import java.io.FileInputStream;
034import java.io.FileNotFoundException;
035import java.io.FileOutputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.util.zip.GZIPInputStream;
039
040import org.apache.commons.logging.Log;
041import org.apache.commons.logging.LogFactory;
042import org.biojava.bio.BioException;
043import org.biojava.bio.program.fastq.Fastq;
044import org.biojava.bio.program.fastq.FastqReader;
045import org.biojava.bio.program.fastq.IlluminaFastqReader;
046import org.biojava.bio.program.fastq.SangerFastqReader;
047import org.biojava.bio.program.fastq.SolexaFastqReader;
048import org.biojava.bio.seq.DNATools;
049import org.biojavax.SimpleNamespace;
050import org.biojavax.bio.seq.RichSequence;
051
052import ptolemy.actor.TypedAtomicActor;
053import ptolemy.actor.TypedIOPort;
054import ptolemy.actor.parameters.PortParameter;
055import ptolemy.data.BooleanToken;
056import ptolemy.data.StringToken;
057import ptolemy.data.expr.FileParameter;
058import ptolemy.data.expr.StringParameter;
059import ptolemy.data.type.BaseType;
060import ptolemy.kernel.CompositeEntity;
061import ptolemy.kernel.util.IllegalActionException;
062import ptolemy.kernel.util.NameDuplicationException;
063
064
065/**
066 * This actor converts fastq file to fasta format. It can take three
067 * different types of fastq files (Sanger, Solexa, and Illumina). However,
068 * user must defined the correct type of file else by default the 'Sanger'
069 * is selected from dropdown list.
070 * Actor can take regular fastq as well as gzip file for input,
071 * but the gzip file must have .gz extension. User should define the output
072 * file path else the output will be written to the user home directory with
073 * the same file prefix as input. User can define the name space if there
074 * is need to make modification to the sequence description else leave it
075 * empty.
076 * 
077 * Actor has four input ports and two output ports. The message output port
078 * provides the general information about the input/output file. The status
079 * output port outputs true/false if the file processed correctly then 'true'
080 * will be emitted from the status port.
081 * 
082 * Caution: Since biojava keeps the input file in memory, it is likely that
083 * this tool might break with very large files. It will help to gzip the fastq
084 * file to begin with.
085 * 
086 * @author madhu
087 * @version $Id: Fastq2Fasta.java 31113 2012-11-26 22:19:36Z crawl $
088 *
089 */
090public class Fastq2Fasta extends TypedAtomicActor {
091        
092        private static final long serialVersionUID = 1L;
093        private static final int DEFAULT_BUFFER_SIZE = 5096;
094        private static Log log = LogFactory.getLog(Fastq2Fasta.class);
095        
096        /* drop down value*/    
097        private static final String SANGER = "Sanger";
098        private static final String SOLEXA = "Solexa";
099        private static final String ILLUMINA = "Illumina";
100        
101        /** 
102         * Full path to the input file.
103     */
104        public TypedIOPort inputFilePath;
105        
106        /** 
107         * Full path to output file. If this field is left empty 
108         * then the output will be written to the home directory.
109     */
110        public TypedIOPort outputFilePath;
111        
112        /** Name space is a string that is added to the sequence description.*/
113        public PortParameter nameSpace;
114        
115        /** Input sequence data type */
116        public TypedIOPort inputPortType;
117        
118        /** Output status, it is boolean. */
119        public TypedIOPort outputPortStatus;
120        
121        /** Output message */
122        public TypedIOPort outputPortMessage;
123        
124    public Fastq2Fasta(CompositeEntity container,  String name) 
125        throws NameDuplicationException, IllegalActionException  {
126        
127        super(container, name);
128
129        inputFilePath = new TypedIOPort(this, "inputFilePath", true, false);
130        outputFilePath = new TypedIOPort(this, "outputFilePath", true, false);
131        inputPortType = new TypedIOPort(this, "inputPortType", true, false);            
132        outputPortMessage = new TypedIOPort(this, "outputPortMessage", false, true);
133        outputPortStatus = new TypedIOPort(this, "outputPortStatus", false, true);
134        
135        inputFilePath.setTypeEquals(BaseType.STRING);
136        outputFilePath.setTypeEquals(BaseType.STRING);
137        inputPortType.setTypeEquals(BaseType.STRING);
138        outputPortStatus.setTypeEquals(BaseType.BOOLEAN);
139        outputPortMessage.setTypeEquals(BaseType.STRING);
140        
141        inputFileParameter = new FileParameter(this, "inputFile");
142        outputFileParameter = new FileParameter(this, "outputFile");
143        
144        dropDownValue = new StringParameter(this, "dropDownValue");
145        dropDownValue.addChoice(ILLUMINA); 
146        dropDownValue.addChoice(SANGER);
147        dropDownValue.addChoice(SOLEXA); 
148        
149        nameSpace = new PortParameter(this, "nameSpace");
150        nameSpace.setExpression("");
151        nameSpace.setStringMode(true);
152        
153        _attachText("_iconDescription", "<svg>\n"
154            + "<rect x=\"-25\" y=\"-20\" " + "width=\"50\" height=\"40\" "
155            + "style=\"fill:white\"/>\n"
156            + "<polygon points=\"-15,-10 -12,-10 -8,-14 -1,-14 3,-10"
157            + " 15,-10 15,10, -15,10\" " + "style=\"fill:red\"/>\n"
158            + "</svg>\n");
159    }
160
161    /** Drop down values for choices to be selected. 
162         *  If user does not make a selection then 
163         *  default value of 'DNA' is used.
164         */
165    public StringParameter dropDownValue;
166   
167
168        /** The file's full path.
169     *  @see FileParameter
170     */
171    public FileParameter inputFileParameter;
172    
173    /** The output file's full path.
174     *  @see FileParameter
175     */
176    public FileParameter outputFileParameter;
177    
178        /**
179         * 
180         */
181    public void fire() throws IllegalActionException {
182        super.fire();
183        
184        boolean monitor = false;
185         
186        String inFilePath = null;
187        String outFilePath = null;
188        String type = null;
189        String nmSpace = nameSpace.getExpression().trim();
190        StringBuilder message = new StringBuilder();
191        
192        if (inputFilePath.isOutsideConnected()) {
193            if (inputFilePath.hasToken(0)) {
194                String name = ((StringToken) inputFilePath.get(0))
195                        .stringValue();
196
197                // Using setExpression() rather than setToken() allows
198                // the string to refer to variables defined in the
199                // scope of this actor.
200                inputFileParameter.setExpression(name);
201                
202            }
203        }
204        inFilePath = inputFileParameter.getExpression();
205        
206        if (outputFilePath.isOutsideConnected()) {
207            if (outputFilePath.hasToken(0)) {
208                String name = ((StringToken) outputFilePath.get(0))
209                        .stringValue();
210
211                // Using setExpression() rather than setToken() allows
212                // the string to refer to variables defined in the
213                // scope of this actor.
214                outputFileParameter.setExpression(name);
215                
216            }
217        }
218        outFilePath = outputFileParameter.getExpression();
219        
220        if (inputPortType.isOutsideConnected()) {
221                if (inputPortType.hasToken(0)) {
222                type = ((StringToken) inputPortType.get(0)).stringValue();
223                if(type.equalsIgnoreCase(SANGER)){
224                        dropDownValue.setExpression(SANGER);
225                }else if(type.equalsIgnoreCase(ILLUMINA)){
226                        dropDownValue.setExpression(ILLUMINA);
227                }else if(type.equalsIgnoreCase(SOLEXA)){
228                        dropDownValue.setExpression(SOLEXA);
229                }
230                }
231        }
232        
233        if(ServiceUtils.checkEmptyString(dropDownValue.getExpression())){
234                dropDownValue.setExpression(SANGER);
235                message.append("NO SELECTION MADE FOR TYPE: DEFAULT VALUE IS SET TO SANGER");
236        }else{
237                message.append("SELECTION MADE BY USER FOR TYPE: ").append(dropDownValue.getExpression());
238        }
239        message.append(ServiceUtils.LINESEP);       
240        
241        type = dropDownValue.getExpression();
242        InputStream insrdr = null;
243        try{
244        
245                if(inFilePath.endsWith(".gz")){
246                        insrdr = new GZIPInputStream(new FileInputStream(inFilePath),DEFAULT_BUFFER_SIZE);
247                }else{
248                        insrdr = new FileInputStream(inFilePath);
249                }
250                message.append("Input file: " + inFilePath + ServiceUtils.LINESEP);
251       
252                FastqReader qReader = null;
253            
254                if(type.equals("Illumuna")){
255                        qReader = new IlluminaFastqReader();
256                }else if(type.equals("Solexa")){
257                        qReader = new SolexaFastqReader();
258                }else{
259                        qReader = new SangerFastqReader();
260                }
261            
262                if(ServiceUtils.checkEmptyString(outFilePath)){
263                        String homeDir = System.getProperty("user.home");
264                        String fName = inFilePath.replaceAll("^.*" + File.separator, "").split("[.]")[0];
265                        outFilePath = homeDir + File.separator + fName + ".fasta";
266                }
267                message.append("Output file: " + outFilePath + ServiceUtils.LINESEP);
268                
269                FileOutputStream outputFasta = new FileOutputStream(outFilePath);
270
271                int numOfSequences = 0;
272                for (Fastq fastq : qReader.read(insrdr)) {
273                        numOfSequences++;
274                        String sequence = fastq.getSequence();
275                        SimpleNamespace ns = new SimpleNamespace(nmSpace);
276                        RichSequence richSequence = createRichSequence(ns, fastq.getDescription(), sequence, DNATools.getDNA());
277                        RichSequence.IOTools.writeFasta(outputFasta, richSequence, ns);
278                }
279                message.append("Total # of sequences in a file: " + numOfSequences + ServiceUtils.LINESEP);
280                monitor = true;
281        }catch(FileNotFoundException fnfe){
282                fnfe.printStackTrace();
283                log.debug("Input file does not exist.\n");
284                //message.append("Input file does not exist.\n");
285                        throw new IllegalArgumentException("Input file does not exist!");
286                }catch(IOException ioe){
287                        ioe.printStackTrace();
288                        throw new IllegalArgumentException("IOException thrown!");
289                }catch(BioException bioe){
290                        bioe.printStackTrace();
291                        throw new IllegalArgumentException("BioException thrown!");
292                }
293            System.out.println("DONE");
294            
295            outputPortMessage.send(0, new StringToken(String.valueOf(message.toString())));
296        outputPortStatus.send(0, new BooleanToken(monitor));
297        
298        log.debug("MESSAGE: " + message.toString());
299        message = null;
300    }   
301
302}