001/* 002 * Copyright (c) 2009-2010 The Regents of the University of California. 003 * All rights reserved. 004 * 005 * '$Author: welker $' 006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 007 * '$Revision: 24234 $' 008 * 009 * Permission is hereby granted, without written agreement and without 010 * license or royalty fees, to use, copy, modify, and distribute this 011 * software and its documentation for any purpose, provided that the above 012 * copyright notice and the following two paragraphs appear in all copies 013 * of this software. 014 * 015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 019 * SUCH DAMAGE. 020 * 021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 026 * ENHANCEMENTS, OR MODIFICATIONS. 027 * 028 */ 029 030package org.camera.service; 031 032import java.io.BufferedReader; 033import java.io.FileReader; 034import java.math.BigDecimal; 035import java.util.ArrayList; 036import java.util.HashMap; 037import java.util.List; 038import java.util.Map; 039 040import ptolemy.actor.TypedAtomicActor; 041import ptolemy.actor.TypedIOPort; 042import ptolemy.data.ObjectToken; 043import ptolemy.data.StringToken; 044import ptolemy.data.Token; 045import ptolemy.data.type.BaseType; 046import ptolemy.kernel.CompositeEntity; 047import ptolemy.kernel.util.IllegalActionException; 048import ptolemy.kernel.util.NameDuplicationException; 049 050////////////////////////////////////////////////////////////////////////// 051////AverageGenomeSizeParserV2 052/** 053 * Calculates the Average Genome Size for a meta genome sample. 054 * The dependencies for the calculation are e value cutoff 055 * and the sample database of phages. 056 * 057 * @author Madhu, SDSC madhu@sdsc.edu 058 * @version $Id 059 */ 060public class AverageGenomeSizeParserV2 extends TypedAtomicActor { 061 private static final long serialVersionUID = 1L; 062 private static final double LOWEST_EVALUE = 1.0E-99; 063 064 /** Input port for the File map */ 065 public TypedIOPort inFileMap; 066 067 /** Outputs the average genome size */ 068 public TypedIOPort outAverageGenomeSize; 069 070 /** Input port with ECutOff limit */ 071 public TypedIOPort inputECutOffPort; 072 073 /** Input port with Blast Results */ 074 public TypedIOPort inBlastResultsPort; 075 076 /** Input port for Database */ 077 public TypedIOPort inDatabasePort; 078 079 public AverageGenomeSizeParserV2(CompositeEntity container, String name) 080 throws NameDuplicationException, IllegalActionException { 081 082 super(container, name); 083 inFileMap = new TypedIOPort(this, "inFileMap", true, false); 084 085 outAverageGenomeSize = new TypedIOPort(this, "outAverageGenomeSize", false, true); 086 outAverageGenomeSize.setTypeEquals(BaseType.STRING); 087 inputECutOffPort = new TypedIOPort(this, "inputECutOffPort", true, false); 088 inBlastResultsPort = new TypedIOPort(this, "inBlastResultsPort", true, false); 089 inDatabasePort = new TypedIOPort(this, "inDatabasePort", true, false); 090 } 091 092 /* (non-Javadoc) 093 * @see ptolemy.actor.AtomicActor#fire() 094 */ 095 @Override 096 public void fire() throws IllegalActionException { 097 super.fire(); 098 Double genomeSize = 0.0; 099 Map<String, String> seqFileMap = null; 100// Map<String,List<NameValuePair>> blastWtsMap = null; 101 Map<String,List<NameValuePair>> IDeValuesMap = null; 102 Map<String, String> databaseMap = null; 103 104 String inputP = null; 105 String eCutOff = null; 106 double eCutOffValue = 1.0E-5; 107 String dbLocation = null; 108 109 if(inputECutOffPort.getWidth() > 0){ 110 Token tkE = inputECutOffPort.get(0); 111 if(tkE != null){ 112 eCutOff=((StringToken)tkE).stringValue().trim(); 113 } 114 eCutOffValue = Double.parseDouble(eCutOff.toUpperCase()); 115 System.out.println("CutOffEValue: " + eCutOffValue); 116 } 117 118 if(inDatabasePort.getWidth() > 0){ 119 Token dBase = inDatabasePort.get(0); 120 if(dBase != null){ 121 dbLocation=((StringToken)dBase).stringValue().trim(); 122 } 123 System.out.println("DatabaseLocation: " + dbLocation); 124 } 125 126 databaseMap = getDatabaseMap(dbLocation); 127 128 if(inBlastResultsPort.getWidth() > 0){ 129 Token tkP = inBlastResultsPort.get(0); 130 if(tkP != null){ 131 inputP=((StringToken)tkP).stringValue().trim(); 132 } 133 134 IDeValuesMap = getGenomeIDeValuesMap(inputP, eCutOffValue); 135 for(String key : IDeValuesMap.keySet()){ 136 double ewtTotal = 0.0; 137 List<NameValuePair> pairList = IDeValuesMap.get(key); 138 for(NameValuePair pair: pairList){ 139 double wt = (1.0/Double.parseDouble(pair.getValue())); 140 System.out.println("WT for each one is: " + wt); 141 ewtTotal = ewtTotal + wt; 142 } 143 144 for(NameValuePair pair: pairList){ 145 double wt = (1.0/Double.parseDouble(pair.getValue())); 146 double abundance = wt / ewtTotal; 147 System.out.println("Abundance in ComputeEvalWeights: " + abundance); 148 pair.setValue(String.valueOf(abundance)); 149 } 150 } 151 } 152 153 154 Map<String, Double> compoMap = new HashMap<String, Double>(); 155 156 Token tkFMap = inFileMap.get(0); 157 158 if(tkFMap != null){ 159 seqFileMap= (Map<String, String>)((ObjectToken)tkFMap).getValue(); 160 } 161 162 double grandTotal = 0.0; 163 164 for(String key : IDeValuesMap.keySet()){ 165 List<NameValuePair> pairList = IDeValuesMap.get(key); 166 int querySize = 0; 167 String seqSize = null; 168 if((seqSize = seqFileMap.get(key)) != null){ 169 querySize = Integer.parseInt(seqSize); 170 } 171 172 for(NameValuePair pair: pairList){ 173 174 double abundance = Double.parseDouble(pair.getValue()); 175 int targetSize = 0; 176 String genSize = null; 177 if((genSize = databaseMap.get(pair.getName())) != null){ 178 targetSize = Integer.parseInt(genSize); 179 } 180 if(querySize > 0 && targetSize > 0){ 181 abundance = abundance /(querySize * targetSize); 182 System.out.println("Abundance after division: " + abundance); 183 grandTotal = grandTotal + abundance; 184 if(compoMap.containsKey(pair.getName())){ 185 double tmp = compoMap.get(pair.getName()); 186 compoMap.put(pair.getName(), abundance + tmp); 187 }else{ 188 compoMap.put(pair.getName(), abundance); 189 } 190 } 191 192 } 193 } 194 195 System.out.println("Grand Total is: " + grandTotal); 196 System.out.println("Element is CompoMap: " + compoMap.size()); 197 double size = 0.0; 198 for(String key: compoMap.keySet()){ 199 double tmp = compoMap.get(key); 200 tmp = tmp / grandTotal; 201 System.out.println("TMP Values: " + tmp); 202 203 int val = Integer.parseInt(databaseMap.get(key)); 204 size = size + (tmp * val); 205 206 compoMap.put(key, tmp ); 207 208 } 209 genomeSize = size; 210 211 int decimalPlace = 1; 212 BigDecimal bd = new BigDecimal( Double.toString(genomeSize) ); 213 bd = bd.setScale( decimalPlace, BigDecimal.ROUND_HALF_UP ); 214 System.out.println( "Up to first decimal size is: " + bd.doubleValue()); 215 216 System.out.println("Size is: " + size); 217 System.out.println("Element in CompoMap: " + compoMap.size()); 218 // outAverageGenomeSize.send(0, new StringToken("GenomeSize: " + genomeSize + " bp")); 219 // outAverageGenomeSize.send(0, new StringToken(String.valueOf(genomeSize))); 220 outAverageGenomeSize.send(0, new StringToken(String.valueOf(bd.doubleValue()))); 221 } 222 223 /** 224 * @param databaseLocation is the location of the database file. 225 * 226 * @return Map<String, String> for the Genomes database 227 * key in the map consists of genome symbol and value consists of genome length. 228 */ 229 private Map<String, String> getDatabaseMap(String databaseLocation){ 230 231 Map<String,String> map = new HashMap<String, String>(); 232// String genomeDBName = "C:"+ServiceUtils.FILESEP+"Camera"+ServiceUtils.FILESEP+"all_viruses_nt.fa"; 233 234 String valueRead = null; 235 String paramName = null; 236 String paramValue = null; 237 String paramNamePlaceHolder = null; 238 StringBuilder genome = null; 239 240 try{ 241 BufferedReader br = new BufferedReader(new FileReader(databaseLocation)); 242 while((valueRead = br.readLine())!= null){ 243 if(!ServiceUtils.checkEmptyString(valueRead)){ 244 String valueReadTrim = valueRead.trim(); 245 if(valueReadTrim.startsWith(">")){ 246 int start = valueReadTrim.indexOf(">"); 247 int end = valueReadTrim.indexOf(ServiceUtils.ANEMPTYSPACE); 248 249 if(end < 0){ 250 end = valueReadTrim.length(); 251 } 252 253// System.out.print("Start: "+ start + " End: " + end); 254 paramName = valueReadTrim.substring(start + 1, end); 255// System.out.println(ServiceUtils.TAB + paramName); 256 if(genome != null){ 257 258 paramValue = String.valueOf(genome.toString().length()); 259 map.put(paramNamePlaceHolder, paramValue); 260 261 if(genome.toString().length() > 200000){ 262 System.out.println("More than 200000 bases " + paramNamePlaceHolder + ServiceUtils.TAB + paramValue); 263 } 264 } 265 genome = new StringBuilder(); 266 }else{ 267 paramNamePlaceHolder = paramName; 268 genome.append(valueReadTrim); 269 270 } 271 } 272 } 273 274 paramValue = String.valueOf(genome.toString().length()); 275 map.put(paramNamePlaceHolder, paramValue); 276// System.out.println(paramNamePlaceHolder + ServiceUtils.TAB + paramValue); 277// System.out.println("# of genomes in file: " + map.size()); 278 279 }catch(Exception e){ 280 e.printStackTrace(); 281 } 282 return map; 283 } 284 285 /** 286 * It reads the blast results and e value cut off provided 287 * by user. 288 * @param blastResults 289 * @param cutOff E-value cut off 290 * @return Map consisting of sample ID as key and value is 291 * a List consisting of genome name and e value. We have a list 292 * because a particular sequence from sample can get multiple hits 293 * against a genome or genomes. 294 */ 295 private Map<String,List<NameValuePair>> getGenomeIDeValuesMap(String blastResults, double cutOff){ 296 String[] blastOutputArray = blastResults.split(ServiceUtils.LINESEP); 297 Map<String,List<NameValuePair>> map = new HashMap<String, List<NameValuePair>>(); 298 299 for(String lineRead: blastOutputArray){ 300 if(!(ServiceUtils.checkEmptyString(lineRead))){ 301 302 String[] strArray = lineRead.trim().split(ServiceUtils.TAB); 303 System.out.println(strArray[0] + ServiceUtils.TAB + strArray[1] + ServiceUtils.TAB + strArray[10]); 304 305 if(strArray.length >= 10 && Double.parseDouble(strArray[10].toUpperCase()) <= cutOff ){ 306 Double testingEValue = Double.parseDouble(strArray[10].toUpperCase()); 307 if (testingEValue < LOWEST_EVALUE){ 308 testingEValue = LOWEST_EVALUE; 309 } 310 if(map.containsKey(strArray[0])){ 311 List<NameValuePair> p = map.get(strArray[0]); 312 p.add(getNMVLPair(strArray[1], String.valueOf(testingEValue))); 313 }else{ 314 List<NameValuePair> list = new ArrayList<NameValuePair>(); 315 list.add(getNMVLPair(strArray[1], String.valueOf(testingEValue))); 316 map.put(strArray[0], list); 317 } 318 } 319 } 320 } 321 322 323 for(String test :map.keySet()){ 324 System.out.println("Key: " + test); 325 for(NameValuePair nmvlPair: map.get(test)){ 326 System.out.println("Name " + nmvlPair.getName() + " Value: " + nmvlPair.getValue()); 327 } 328 } 329 330 return map; 331 332 } 333 334 /** 335 * 336 * @param name 337 * @param value 338 * @return NameValuePair object 339 */ 340 NameValuePair getNMVLPair(String name, String value){ 341 return new NameValuePair(name, value); 342 } 343 344 345}