001/*
002 * Copyright (c) 2009-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: welker $'
006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 
007 * '$Revision: 24234 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.camera.service;
031
032import java.io.BufferedReader;
033import java.io.FileReader;
034import java.math.BigDecimal;
035import java.util.ArrayList;
036import java.util.HashMap;
037import java.util.List;
038import java.util.Map;
039
040import ptolemy.actor.TypedAtomicActor;
041import ptolemy.actor.TypedIOPort;
042import ptolemy.data.ObjectToken;
043import ptolemy.data.StringToken;
044import ptolemy.data.Token;
045import ptolemy.data.type.BaseType;
046import ptolemy.kernel.CompositeEntity;
047import ptolemy.kernel.util.IllegalActionException;
048import ptolemy.kernel.util.NameDuplicationException;
049
050//////////////////////////////////////////////////////////////////////////
051////AverageGenomeSizeParserV2
052/**
053 * Calculates the Average Genome Size for a meta genome sample.
054 * The dependencies for the calculation are e value cutoff
055 * and the sample database of phages.
056 * 
057 * @author Madhu, SDSC  madhu@sdsc.edu
058 * @version $Id
059 */
060public class AverageGenomeSizeParserV2 extends TypedAtomicActor {
061        private static final long serialVersionUID = 1L;
062        private static final double LOWEST_EVALUE = 1.0E-99;
063        
064        /** Input port for the File map */
065        public TypedIOPort inFileMap;
066        
067        /** Outputs the average genome size */
068        public TypedIOPort outAverageGenomeSize;
069        
070        /** Input port with ECutOff limit */
071        public TypedIOPort      inputECutOffPort;
072        
073        /** Input port with Blast Results */
074        public TypedIOPort inBlastResultsPort;
075        
076        /** Input port for Database */
077        public TypedIOPort inDatabasePort;
078        
079        public AverageGenomeSizeParserV2(CompositeEntity container,  String name)
080        throws NameDuplicationException, IllegalActionException  {
081
082                super(container, name);
083                inFileMap = new TypedIOPort(this, "inFileMap", true, false);
084                
085                outAverageGenomeSize = new TypedIOPort(this, "outAverageGenomeSize", false, true);
086                outAverageGenomeSize.setTypeEquals(BaseType.STRING);
087                inputECutOffPort =  new TypedIOPort(this, "inputECutOffPort", true, false);
088                inBlastResultsPort =  new TypedIOPort(this, "inBlastResultsPort", true, false);
089                inDatabasePort =  new TypedIOPort(this, "inDatabasePort", true, false);
090        }
091        
092        /* (non-Javadoc)
093         * @see ptolemy.actor.AtomicActor#fire()
094         */
095        @Override
096        public void fire() throws IllegalActionException {          
097                super.fire();
098                Double genomeSize = 0.0;
099                Map<String, String> seqFileMap = null;
100//              Map<String,List<NameValuePair>> blastWtsMap = null;
101                Map<String,List<NameValuePair>> IDeValuesMap = null;
102                Map<String, String> databaseMap = null; 
103                
104                String  inputP  = null;
105                String eCutOff = null;
106                double eCutOffValue = 1.0E-5;
107                String dbLocation = null;
108        
109                if(inputECutOffPort.getWidth() > 0){
110                Token tkE = inputECutOffPort.get(0);
111                if(tkE != null){
112                        eCutOff=((StringToken)tkE).stringValue().trim();
113                }               
114                eCutOffValue = Double.parseDouble(eCutOff.toUpperCase());
115                System.out.println("CutOffEValue: " + eCutOffValue);
116            }
117                
118                if(inDatabasePort.getWidth() > 0){
119                Token dBase = inDatabasePort.get(0);
120                if(dBase != null){
121                        dbLocation=((StringToken)dBase).stringValue().trim();
122                }               
123                System.out.println("DatabaseLocation: " + dbLocation);
124            }
125                
126                databaseMap = getDatabaseMap(dbLocation);
127                
128                if(inBlastResultsPort.getWidth() > 0){
129                Token tkP = inBlastResultsPort.get(0);
130                if(tkP != null){
131                        inputP=((StringToken)tkP).stringValue().trim();
132                }
133                
134                IDeValuesMap = getGenomeIDeValuesMap(inputP, eCutOffValue);
135                for(String key : IDeValuesMap.keySet()){
136                        double ewtTotal = 0.0;
137                        List<NameValuePair> pairList = IDeValuesMap.get(key);
138                        for(NameValuePair pair: pairList){
139                                double wt =  (1.0/Double.parseDouble(pair.getValue()));
140                                System.out.println("WT for each one is: " + wt);
141                                ewtTotal = ewtTotal + wt;
142                        }
143                                        
144                        for(NameValuePair pair: pairList){
145                                double wt =  (1.0/Double.parseDouble(pair.getValue()));
146                                double abundance = wt / ewtTotal;
147                                System.out.println("Abundance in ComputeEvalWeights: " + abundance);
148                                pair.setValue(String.valueOf(abundance));
149                        }
150                    }
151            }
152                
153                
154                Map<String, Double> compoMap = new HashMap<String, Double>();
155                
156                Token tkFMap = inFileMap.get(0);
157        
158        if(tkFMap != null){
159                seqFileMap= (Map<String, String>)((ObjectToken)tkFMap).getValue();
160        }
161        
162        double grandTotal = 0.0;
163        
164        for(String key : IDeValuesMap.keySet()){
165                List<NameValuePair> pairList = IDeValuesMap.get(key);
166                int querySize = 0;
167                String seqSize = null;
168                if((seqSize = seqFileMap.get(key)) != null){
169                        querySize = Integer.parseInt(seqSize);
170                }
171                
172                for(NameValuePair pair: pairList){
173                        
174                        double abundance = Double.parseDouble(pair.getValue());
175                        int targetSize = 0;
176                        String genSize = null;
177                        if((genSize = databaseMap.get(pair.getName())) != null){
178                                targetSize = Integer.parseInt(genSize);
179                        }
180                        if(querySize > 0 && targetSize > 0){
181                                abundance = abundance /(querySize * targetSize);
182                                System.out.println("Abundance after division: " + abundance);
183                                grandTotal = grandTotal + abundance;
184                                if(compoMap.containsKey(pair.getName())){
185                                        double tmp = compoMap.get(pair.getName());
186                                        compoMap.put(pair.getName(), abundance + tmp);
187                                }else{
188                                        compoMap.put(pair.getName(), abundance);
189                                }
190                        }
191                        
192                }
193        }
194        
195        System.out.println("Grand Total is: " + grandTotal);
196        System.out.println("Element is CompoMap: " + compoMap.size());
197        double size = 0.0;
198        for(String key: compoMap.keySet()){
199                double tmp = compoMap.get(key);
200                tmp = tmp / grandTotal;
201                System.out.println("TMP Values: " + tmp);
202                
203                int val = Integer.parseInt(databaseMap.get(key));
204                size = size + (tmp * val);
205                
206                compoMap.put(key, tmp );                
207                
208        }
209        genomeSize = size;
210        
211        int decimalPlace = 1;
212        BigDecimal bd = new BigDecimal( Double.toString(genomeSize) );
213        bd = bd.setScale( decimalPlace, BigDecimal.ROUND_HALF_UP );
214        System.out.println( "Up to first decimal size is: " + bd.doubleValue());
215
216        System.out.println("Size is: " + size);
217        System.out.println("Element in CompoMap: " + compoMap.size());
218 //     outAverageGenomeSize.send(0, new StringToken("GenomeSize: " + genomeSize + " bp"));
219 //     outAverageGenomeSize.send(0, new StringToken(String.valueOf(genomeSize)));
220        outAverageGenomeSize.send(0, new StringToken(String.valueOf(bd.doubleValue())));
221        }
222        
223        /**
224         * @param databaseLocation is the location of the database file.
225         * 
226         * @return Map<String, String> for the Genomes database
227         *  key in the map consists of genome symbol and value consists of genome length.
228         */
229        private Map<String, String> getDatabaseMap(String databaseLocation){
230                
231                Map<String,String> map = new HashMap<String, String>();
232//              String genomeDBName = "C:"+ServiceUtils.FILESEP+"Camera"+ServiceUtils.FILESEP+"all_viruses_nt.fa";
233                        
234                String valueRead = null;
235                String paramName = null;
236                String paramValue = null;
237                String paramNamePlaceHolder = null;
238                StringBuilder genome = null;
239                
240                try{
241                        BufferedReader br = new BufferedReader(new FileReader(databaseLocation));
242                        while((valueRead = br.readLine())!= null){
243                                if(!ServiceUtils.checkEmptyString(valueRead)){
244                                        String valueReadTrim = valueRead.trim();
245                                        if(valueReadTrim.startsWith(">")){
246                                                int start = valueReadTrim.indexOf(">");
247                                                int end = valueReadTrim.indexOf(ServiceUtils.ANEMPTYSPACE);
248                                                
249                                                if(end < 0){
250                                                        end = valueReadTrim.length();
251                                                }
252                                                
253//                                              System.out.print("Start: "+ start + "  End: " + end);
254                                                paramName = valueReadTrim.substring(start + 1, end);
255//                                              System.out.println(ServiceUtils.TAB + paramName);       
256                                                if(genome != null){
257                                                        
258                                                        paramValue = String.valueOf(genome.toString().length());
259                                                        map.put(paramNamePlaceHolder, paramValue);
260                                                        
261                                                        if(genome.toString().length() > 200000){
262                                                                System.out.println("More than 200000 bases " + paramNamePlaceHolder + ServiceUtils.TAB + paramValue);
263                                                        }
264                                                }
265                                                genome = new StringBuilder();
266                                        }else{
267                                                paramNamePlaceHolder = paramName;
268                                                genome.append(valueReadTrim);
269                                        
270                                        }
271                                }
272                        }
273                        
274                        paramValue = String.valueOf(genome.toString().length());
275                        map.put(paramNamePlaceHolder, paramValue);
276//                      System.out.println(paramNamePlaceHolder + ServiceUtils.TAB + paramValue);
277//                      System.out.println("# of genomes in file: " + map.size());
278                        
279                }catch(Exception e){
280                        e.printStackTrace();
281                }
282                return map;
283        }
284        
285        /**
286         * It reads the blast results and e value cut off provided
287         * by user.
288         * @param blastResults
289         * @param cutOff E-value cut off
290         * @return Map consisting of sample ID as key and value is
291         * a List consisting of genome name and e value. We have a list 
292         * because a particular sequence from sample can get multiple hits
293         * against a genome or genomes.
294         */
295        private Map<String,List<NameValuePair>> getGenomeIDeValuesMap(String blastResults, double cutOff){
296                String[] blastOutputArray = blastResults.split(ServiceUtils.LINESEP);
297                Map<String,List<NameValuePair>> map = new HashMap<String, List<NameValuePair>>();
298                
299        for(String lineRead: blastOutputArray){ 
300                if(!(ServiceUtils.checkEmptyString(lineRead))){
301                        
302                        String[] strArray = lineRead.trim().split(ServiceUtils.TAB);
303                        System.out.println(strArray[0] + ServiceUtils.TAB + strArray[1] + ServiceUtils.TAB + strArray[10]);
304                                
305                        if(strArray.length >= 10 && Double.parseDouble(strArray[10].toUpperCase()) <= cutOff ){
306                                Double testingEValue = Double.parseDouble(strArray[10].toUpperCase());
307                                if (testingEValue < LOWEST_EVALUE){
308                                        testingEValue = LOWEST_EVALUE;
309                                }
310                                if(map.containsKey(strArray[0])){
311                                        List<NameValuePair> p = map.get(strArray[0]);
312                                        p.add(getNMVLPair(strArray[1], String.valueOf(testingEValue)));
313                                }else{
314                                        List<NameValuePair> list = new ArrayList<NameValuePair>();
315                                        list.add(getNMVLPair(strArray[1], String.valueOf(testingEValue)));
316                                        map.put(strArray[0], list);
317                                }
318                        }
319                }
320        }
321        
322        
323        for(String test :map.keySet()){
324                System.out.println("Key: " + test);
325                for(NameValuePair nmvlPair: map.get(test)){
326                        System.out.println("Name " + nmvlPair.getName() + " Value: " + nmvlPair.getValue());
327                }
328        }
329        
330        return map;
331        
332        }
333        
334        /**
335         * 
336         * @param name
337         * @param value
338         * @return NameValuePair object
339         */
340        NameValuePair getNMVLPair(String name, String value){
341                return new NameValuePair(name, value);
342        }
343
344
345}