001/** 002 * '$Author: barseghian $' 003 * '$Date: 2013-01-29 22:40:34 +0000 (Tue, 29 Jan 2013) $' 004 * '$Revision: 31387 $' 005 * 006 * For Details: 007 * http://www.kepler-project.org 008 * 009 * Copyright (c) 2009-2010 The Regents of the 010 * University of California. All rights reserved. Permission is hereby granted, 011 * without written agreement and without license or royalty fees, to use, copy, 012 * modify, and distribute this software and its documentation for any purpose, 013 * provided that the above copyright notice and the following two paragraphs 014 * appear in all copies of this software. IN NO EVENT SHALL THE UNIVERSITY OF 015 * CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, 016 * OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS 017 * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE 018 * POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY 019 * DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 020 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 021 * SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 022 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 023 * ENHANCEMENTS, OR MODIFICATIONS. 024 */ 025 026package org.kepler.util; 027 028import java.io.File; 029import java.io.IOException; 030import java.io.InputStream; 031import java.util.ArrayList; 032import java.util.HashMap; 033import java.util.Iterator; 034import java.util.LinkedHashMap; 035import java.util.List; 036import java.util.Map; 037 038import org.apache.commons.logging.Log; 039import org.apache.commons.logging.LogFactory; 040import org.ecoinformatics.ecogrid.queryservice.resultset.ResultsetType; 041import org.ecoinformatics.ecogrid.queryservice.resultset.ResultsetTypeRecord; 042import org.ecoinformatics.ecogrid.queryservice.resultset.ResultsetTypeRecordReturnField; 043import org.ecoinformatics.ecogrid.queryservice.resultset.ResultsetTypeResultsetMetadataRecordStructureReturnField; 044import org.kepler.authentication.AuthenticationException; 045import org.kepler.configuration.ConfigurationProperty; 046import org.kepler.kar.karxml.KarXml.KarEntry; 047import org.kepler.objectmanager.lsid.KeplerLSID; 048import org.kepler.objectmanager.repository.EcogridRepository; 049import org.kepler.objectmanager.repository.EcogridRepositoryResults; 050import org.kepler.objectmanager.repository.KARDownloader; 051import org.kepler.objectmanager.repository.RepositoryException; 052 053import ptolemy.kernel.util.NamedObj; 054import ptolemy.moml.MoMLParser; 055 056public class ProvenanceStore { 057 058 private static final Log log = LogFactory.getLog(ProvenanceStore.class 059 .getName()); 060 061 //call the defaultSettings provenance store "Local" 062 private static String LOCAL_PROVENANCE_NAME = "Local"; 063 private String _type = null; 064 public static final String ECOGRID_REPOSITORY = "EcogridRepository"; 065 // TODO better name: 066 public static final String PROVENANCE_REPOSITORY = "ProvenanceRepository"; 067 068 private ConfigurationProperty _provenanceConfigProp = null; 069 070 // for EcogridRepository provenance store 071 private EcogridRepository _ecogridRepository = null; 072 private RemoteStoreRunsTracker _remoteStoreRunsTracker = null; 073 074 075 public ProvenanceStore(ConfigurationProperty configProp) { 076 _provenanceConfigProp = configProp; 077 _type = PROVENANCE_REPOSITORY; 078 log.debug("ProvenanceStore constructor. type:" + _type + " for " 079 + "" + _provenanceConfigProp.getName()); 080 } 081 082 public ProvenanceStore(ConfigurationProperty configProp, 083 EcogridRepository ecoRepo) { 084 _provenanceConfigProp = configProp; 085 _type = ECOGRID_REPOSITORY; 086 _ecogridRepository = ecoRepo; 087 _remoteStoreRunsTracker = new RemoteStoreRunsTracker(ecoRepo.getName()); 088 089 log.debug("ProvenanceStore constructor. type:" + _type + " for " 090 + "" + _provenanceConfigProp.getName()); 091 } 092 093 public String getType() { 094 return _type; 095 } 096 097 public boolean isRemoteKarStore(){ 098 return _type.equals(ECOGRID_REPOSITORY); 099 } 100 101 public String getName() { 102 if (_type.equals(PROVENANCE_REPOSITORY)) { 103 if (_provenanceConfigProp.getName().equals("defaultSettings")){ 104 return LOCAL_PROVENANCE_NAME; 105 } 106 return _provenanceConfigProp.getName(); 107 } else { 108 return _ecogridRepository.getName(); 109 } 110 } 111 112 public ConfigurationProperty getProvenanceConfigurationProperty() { 113 return _provenanceConfigProp; 114 } 115 116 public synchronized RemoteStoreRunsTracker getRemoteStoreRunsTracker() { 117 return _remoteStoreRunsTracker; 118 } 119 120 // null for ProvenanceRepository stores 121 public EcogridRepository getEcogridRepository() { 122 return _ecogridRepository; 123 } 124 125 /** 126 * Download all the KARs that contain a WorkflowRun 127 * @deprecated works fine, but probably better to download 128 * KARs using the info found in the tracker file 129 * queries. 130 * @throws AuthenticationException 131 */ 132 public void downloadAllRunKARs(boolean authenticate) throws AuthenticationException { 133 134 KARDownloader karDownloader = KARDownloader.getInstance(); 135 EcogridRepository ecoRepo = getEcogridRepository(); 136 137 try { 138 139 ResultsetType rst = ecoRepo 140 .arbitrarySearch(WorkflowRunUtil.buildRunKARQueryDoc(), authenticate); 141 142 if (rst == null || rst.getRecord() == null){ 143 return; 144 } 145 146 HashMap<String, String> metadataMap = new HashMap<String, String>(); 147 ResultsetTypeRecord[] records = rst.getRecord(); 148 ResultsetTypeResultsetMetadataRecordStructureReturnField[] rtrmrsrf = 149 rst.getResultsetMetadata().getRecordStructure().getReturnField(); 150 151 for (int i=0; i<rtrmrsrf.length; i++){ 152 metadataMap.put(rtrmrsrf[i].getId(), rtrmrsrf[i].getName()); 153 } 154 log.debug("there are " + records.length + " records"); 155 156 for (int i=0; i<records.length; i++){ 157 ResultsetTypeRecord record = records[i]; 158 ResultsetTypeRecordReturnField[] fields = record.getReturnField(); 159 String karLSID = null; 160 String karFileSize = null; 161 String karFileName = null; 162 if (fields != null) { 163 for (int j = 0; j < fields.length; j++) { 164 String fieldId = fields[j].getId(); 165 String value = fields[j].get_value(); 166 167 if (metadataMap.get(fieldId).equals(WorkflowRunUtil.KARLSID)){ 168 karLSID = value; 169 } 170 else if (metadataMap.get(fieldId).equals(WorkflowRunUtil.KARFILESIZE)){ 171 karFileSize = value; 172 } 173 else if (metadataMap.get(fieldId).equals(WorkflowRunUtil.KARFILENAME)){ 174 karFileName = value; 175 } 176 if (karLSID != null && karFileSize != null && karFileName != null 177 && (j == fields.length-1)){ 178 Long kSize = new Long(karFileSize); 179 180 karDownloader.setKarName(karFileName); 181 // download! 182 File downloadedKar = karDownloader.download( 183 karLSID, kSize, _ecogridRepository.getName(), authenticate); 184 log.debug("downloaded "+downloadedKar.toString()); 185 186 karLSID = null; 187 karFileSize = null; 188 karFileName = null; 189 } 190 } 191 } 192 } 193 } catch (RepositoryException e) { 194 // TODO Auto-generated catch block 195 e.printStackTrace(); 196 } 197 } 198 199 /** 200 * Download all components of WorkflowRuns (and KAR attributes necessary to download the KAR) 201 * found within KARs on the Ecogrid into WorkflowRunBuilders, from which you may attempt to 202 * construct WorkflowRuns. 203 * 204 * @param authenticate 205 * @return list of WorkflowRunBuilders, or null. 206 * @throws AuthenticationException 207 */ 208 public List<WorkflowRunBuilder> downloadAllWorkflowRuns(boolean authenticate) throws AuthenticationException { 209 210 ArrayList<WorkflowRunBuilder> workflowRunBuilders = new ArrayList<WorkflowRunBuilder>(); 211 EcogridRepository ecoRepo = getEcogridRepository(); 212 ResultsetType resultsetType = null; 213 214 try { 215 216 resultsetType = ecoRepo.arbitrarySearch(WorkflowRunUtil 217 .buildWorkflowRunPiecesQueryDoc(), authenticate); 218 if (resultsetType == null || resultsetType.getRecord() == null){ 219 log.debug("resultSetType or resultSetType.getRecord() is null"); 220 } 221 ResultsetTypeRecord[] resultsetTypeRecords = resultsetType.getRecord(); 222 ResultsetTypeResultsetMetadataRecordStructureReturnField[] rtrmrsrf = resultsetType 223 .getResultsetMetadata().getRecordStructure().getReturnField(); 224 225 HashMap<String, String> metadataMap = new HashMap<String, String>(); 226 for (int i = 0; i < rtrmrsrf.length; i++) { 227 String id = rtrmrsrf[i].getId(); 228 String name = rtrmrsrf[i].getName(); 229 log.debug("metadataMap.put(id:" + id + ", name:" + name + ")"); 230 metadataMap.put(id, name); 231 } 232 233 log.debug("there are " + resultsetTypeRecords.length + " resultsetTypeRecords"); 234 235 //FIXME remove need for this 236 int errorIdHack = 0; 237 238 for (int i = 0; i < resultsetTypeRecords.length; i++) { 239 WorkflowRunBuilder workflowRunBuilder = new WorkflowRunBuilder(); 240 241 ResultsetTypeRecord resultsetTypeRecord = resultsetTypeRecords[i]; 242 ResultsetTypeRecordReturnField[] resultsetTypeRecordReturnFields = resultsetTypeRecord 243 .getReturnField(); 244 245 if (resultsetTypeRecordReturnFields != null) { 246 247 for (int j = 0; j < resultsetTypeRecordReturnFields.length; j++) { 248 String resultsetTypeRecordReturnFieldId = resultsetTypeRecordReturnFields[j].getId(); 249 String resultsetTypeRecordReturnFieldValue = resultsetTypeRecordReturnFields[j].get_value(); 250 log.debug("resultsetTypeRecordReturnFieldId:" + resultsetTypeRecordReturnFieldId); 251 log.debug("resultsetTypeRecordReturnFieldValue:" + resultsetTypeRecordReturnFieldValue); 252 String returnFieldStatement = metadataMap.get(resultsetTypeRecordReturnFieldId); 253 254 if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_ANNOTATION)){ 255 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.ANNOTATION.toString(), resultsetTypeRecordReturnFieldValue, false); 256 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_DERIVEDFROM)){ 257 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.REFERRALLIST.toString(), resultsetTypeRecordReturnFieldValue, false); 258 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_DURATION)){ 259 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.DURATION.toString(), resultsetTypeRecordReturnFieldValue, false); 260 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_ENTITYID)){ 261 //this seems unneeded, something takes care of it down the line, presumably using execLSID 262 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_ERRORMESSAGES)){ 263 // we can ignore this parent property and just add children, 264 // the parent property will get added for us. 265 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_EXECID)){ 266 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.EXECID.toString(), resultsetTypeRecordReturnFieldValue, false); 267 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_EXECLSID)){ 268 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.EXECLSID.toString(), resultsetTypeRecordReturnFieldValue, false); 269 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_HOSTID)){ 270 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.HOSTID.toString(), resultsetTypeRecordReturnFieldValue, false); 271 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_MODULEDEPENDENCIES)){ 272 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.MODULEDEPENDENCIES.toString(), resultsetTypeRecordReturnFieldValue, false); 273 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_TAG)){ 274 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.SEMANTICTYPE.toString(), resultsetTypeRecordReturnFieldValue, false); 275 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_STARTTIME)){ 276 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.STARTTIME.toString(), resultsetTypeRecordReturnFieldValue, false); 277 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_SUBPROPERTY_ERROR)){ 278 // 279 //FIXME hardcoded error id. Even with the real error ids, they'll collide. Probably shouldn't even be kept in workflowrun xml. 280 // 281 errorIdHack++; 282 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.ERROR.toString() + errorIdHack, resultsetTypeRecordReturnFieldValue, true); 283 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_SUBPROPERTY_WORKFLOW_TAG)){ 284 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.SEMANTICTYPE.toString(), resultsetTypeRecordReturnFieldValue, true); 285 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_TYPE)){ 286 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.TYPE.toString(), resultsetTypeRecordReturnFieldValue, false); 287 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_USER)){ 288 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.USER.toString(), resultsetTypeRecordReturnFieldValue, false); 289 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_WORKFLOWLSID)){ 290 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.WORKFLOWLSID.toString(), resultsetTypeRecordReturnFieldValue, false); 291 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_WORKFLOWNAME)){ 292 workflowRunBuilder.addRunProperty(WorkflowRun.RunAttribute.WORKFLOWNAME.toString(), resultsetTypeRecordReturnFieldValue, false); 293 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARENTRY_WORKFLOWRUN_WORKFLOWTAGS)){ 294 // we can ignore this parent property and just add children, 295 // the parent property will get added for us. 296 } 297 else if (returnFieldStatement.equals(WorkflowRunUtil.KARFILENAME)){ 298 workflowRunBuilder.setKARFileName(resultsetTypeRecordReturnFieldValue); 299 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARLSID)){ 300 workflowRunBuilder.setKARLSID(resultsetTypeRecordReturnFieldValue); 301 }else if (returnFieldStatement.equals(WorkflowRunUtil.KARFILESIZE)){ 302 workflowRunBuilder.setKARFileSize(resultsetTypeRecordReturnFieldValue); 303 } 304 305 } 306 } 307 308 workflowRunBuilders.add(workflowRunBuilder); 309 } 310 311 312 } 313 catch(Exception e){ 314 315 } 316 return workflowRunBuilders; 317 } 318 319 /** 320 * Download all WorkflowRuns from this ProvenanceStore. 321 * ProvenanceStore must have an _ecogridRepository set. 322 * @deprecated use downloadAllWorkflowRuns instead. This method is less fragile 323 * but requires downloading all karXmls, which is extremely inefficient. 324 * @return 325 * @throws AuthenticationException 326 */ 327 public Map<KeplerLSID, WorkflowRun> downloadAllWorkflowRunsParsingKarXmls(boolean authenticate) throws AuthenticationException { 328 Map<KeplerLSID, WorkflowRun> workflowRuns = new LinkedHashMap<KeplerLSID, WorkflowRun>(); 329 330 EcogridRepository ecoRepo = getEcogridRepository(); 331 332 if (ecoRepo == null){ 333 log.error("Can't run an ecogrid query for a non-ecogrid ProvenanceStore"); 334 return workflowRuns; 335 } 336 337 Iterator<EcogridRepositoryResults> ecoReopResultItr = null; 338 try { 339 ecoReopResultItr = ecoRepo 340 .advancedSearch(WorkflowRunUtil.buildWorkflowRunQueryDocO(), authenticate); 341 } catch (RepositoryException e) { 342 // TODO Auto-generated catch block 343 e.printStackTrace(); 344 } 345 if (ecoReopResultItr != null) { 346 while (ecoReopResultItr.hasNext()) { 347 EcogridRepositoryResults ecoRepoResults = ecoReopResultItr 348 .next(); 349 KarEntry karEntry = ecoRepoResults.getKarEntry(); 350 if (karEntry.getType().equals(WorkflowRun.class.getName())) { 351 // System.out.println("ProvenanceStore getAllWorkflowRuns looking thru WORKFLOWRUNTYPE results:"+ 352 // ecoRepoResults.getName()); 353 InputStream runStream = karEntry.asInputStream(); 354 // usually uses WorkflowRunEntryHandler version of 355 // convertStreamToString: 356 String runString; 357 try { 358 runString = FileUtil.convertStreamToString(runStream); 359 MoMLParser parser = new MoMLParser(); 360 NamedObj no = parser.parse(runString); 361 if (no instanceof WorkflowRun) { 362 WorkflowRun run = (WorkflowRun) no; 363 // System.out.println("ProvenanceStore getAllWorkflowRuns successfully created a " 364 // +"WorkflowRun:" + run.getName() +" with execLSID:" + run.getExecLSID()); 365 if (workflowRuns.containsKey(run.getExecLSID())) { 366 log.warn("Got a duplicate run:" 367 + run.getExecLSID()); 368 } 369 workflowRuns.put(run.getExecLSID(), run); 370 } 371 } catch (IOException e) { 372 // TODO Auto-generated catch block 373 e.printStackTrace(); 374 } catch (Exception e) { 375 // TODO Auto-generated catch block 376 e.printStackTrace(); 377 } 378 } 379 } 380 } 381 382 return workflowRuns; 383 } 384 385 386 387 388 389 390}