001/* 002 * Copyright (c) 2003-2010 The Regents of the University of California. 003 * All rights reserved. 004 * 005 * '$Author: crawl $' 006 * '$Date: 2012-07-25 20:29:43 +0000 (Wed, 25 Jul 2012) $' 007 * '$Revision: 30284 $' 008 * 009 * Permission is hereby granted, without written agreement and without 010 * license or royalty fees, to use, copy, modify, and distribute this 011 * software and its documentation for any purpose, provided that the above 012 * copyright notice and the following two paragraphs appear in all copies 013 * of this software. 014 * 015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 019 * SUCH DAMAGE. 020 * 021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 026 * ENHANCEMENTS, OR MODIFICATIONS. 027 * 028 */ 029 030package org.kepler.moml; 031 032import java.io.ByteArrayOutputStream; 033import java.io.File; 034import java.io.FileInputStream; 035import java.io.IOException; 036import java.io.InputStream; 037import java.io.StringReader; 038 039import javax.xml.parsers.DocumentBuilder; 040import javax.xml.parsers.DocumentBuilderFactory; 041 042import org.apache.commons.logging.Log; 043import org.apache.commons.logging.LogFactory; 044import org.kepler.objectmanager.lsid.KeplerLSID; 045import org.w3c.dom.Document; 046import org.w3c.dom.Element; 047import org.w3c.dom.NamedNodeMap; 048import org.w3c.dom.Node; 049import org.w3c.dom.NodeList; 050import org.xml.sax.EntityResolver; 051import org.xml.sax.InputSource; 052import org.xml.sax.SAXException; 053 054import ptolemy.moml.MoMLParser; 055 056/** 057 * This class will contain methods for parsing out the Kepler Metadata from a 058 * MOML file. 059 * 060 * @author Aaron Schultz 061 */ 062public class KeplerMetadataExtractor { 063 064 private static final Log log = LogFactory 065 .getLog(KeplerMetadataExtractor.class.getName()); 066 private static final boolean isDebugging = log.isDebugEnabled(); 067 //private static final boolean isTracing = log.isTraceEnabled(); 068 069 public static KeplerActorMetadata extractActorMetadata(File momlFile) 070 throws Exception { 071 return extractActorMetadata(momlFile, true); 072 } 073 074 public static KeplerActorMetadata extractActorMetadata(File momlFile, boolean printError) 075 throws Exception { 076 FileInputStream fis = new FileInputStream(momlFile); 077 return extractActorMetadata(fis, printError); 078 } 079 080 /** Get the metadata from an input stream. 081 * @param actorStream the input stream 082 */ 083 public static KeplerActorMetadata extractActorMetadata( 084 InputStream actorStream) throws Exception { 085 return extractActorMetadata(actorStream, true); 086 } 087 088 /** Get the metadata from an input stream optionally printing output if a parsing error occurs. 089 * @param actorStream the input stream 090 * @param printError if true, print a stack trace and error message if a parsing error occurs. 091 */ 092 public static KeplerActorMetadata extractActorMetadata( 093 InputStream actorStream, boolean printError) throws Exception { 094 //if (isTracing) 095 //log.trace("ActorCacheObject(" + actorStream.getClass().getName() 096 //+ ")"); 097 098 KeplerActorMetadata kam = new KeplerActorMetadata(); 099 100 ByteArrayOutputStream byteout; 101 try { 102 // Copy 1024 bytes from actorStream to byteout 103 byteout = new ByteArrayOutputStream(); 104 byte[] b = new byte[1024]; 105 int numread = actorStream.read(b, 0, 1024); 106 while (numread != -1) { 107 byteout.write(b, 0, numread); 108 numread = actorStream.read(b, 0, 1024); 109 } 110 kam.setActorString(byteout.toString()); 111 112 // need to get actor name and id from the string 113 // thus build a DOM representation 114 String nameStr = null; 115 try { 116 //if (isTracing) log.trace(kam.getActorString()); 117 StringReader strR = new StringReader(kam.getActorString()); 118 119 InputSource xmlIn = new InputSource(strR); 120 DocumentBuilderFactory factory = DocumentBuilderFactory 121 .newInstance(); 122 DocumentBuilder builder = factory.newDocumentBuilder(); 123 builder.setEntityResolver(new EntityResolver() { 124 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { 125 if (MOML_PUBLIC_ID_1.equals(publicId)) { 126 return new InputSource(MoMLParser.class.getResourceAsStream("MoML_1.dtd")); 127 } 128 else { 129 return null; 130 } 131 } 132 }); 133 134 // TODO 135 // this causes http://bugzilla.ecoinformatics.org/show_bug.cgi?id=4671 136 // when File => Save Archive w/ wf w/ actor w/ < in the name: 137 Document doc = builder.parse(xmlIn); 138 //if (isTracing) log.trace(doc.toString()); 139 140 Element rootNode = doc.getDocumentElement(); 141 kam.setRootName(rootNode.getNodeName()); 142 143 // Get the value of the name attribute of the root node 144 NamedNodeMap nnm = rootNode.getAttributes(); 145 Node namenode = nnm.getNamedItem("name"); 146 nameStr = namenode.getNodeValue(); 147 kam.setName(nameStr); 148 149 boolean emptyKeplerDocumentation = true; 150 boolean foundKeplerDocumentation = false; 151 boolean foundUserLevelDocumentation = false; 152 boolean foundAuthor = false; 153 154 // Cycle through the children of the root node 155 NodeList probNodes = rootNode.getChildNodes(); 156 for (int i = 0; i < probNodes.getLength(); i++) { 157 Node child = probNodes.item(i); 158 159 if (child.hasAttributes()) { 160 161 NamedNodeMap childAttrs = child.getAttributes(); 162 Node idNode = childAttrs.getNamedItem("name"); 163 if (idNode != null) { 164 165 // the entityId 166 String nameval = idNode.getNodeValue(); 167 if (nameval.equals(NamedObjId.NAME)) { 168 Node idNode1 = childAttrs.getNamedItem("value"); 169 String idval = idNode1.getNodeValue(); 170 kam.setLsid(new KeplerLSID(idval)); 171 } 172 173 // the class name 174 if (nameval.equals("class")) { 175 Node idNode3 = childAttrs.getNamedItem("value"); 176 String classname = idNode3.getNodeValue(); 177 kam.setClassName(classname); 178 } 179 180 // the semantic types 181 if (nameval.startsWith("semanticType")) { 182 Node idNode2 = childAttrs.getNamedItem("value"); 183 String semtype = idNode2.getNodeValue(); 184 kam.addSemanticType(semtype); 185 } 186 187 // userLevelDocumentation must be contained in KeplerDocumentation 188 if (nameval.equals("userLevelDocumentation")) { 189 log.warn(nameStr + " userLevelDocumentation property should be contained in a KeplerDocumentation property."); 190 } else if(nameval.equals("KeplerDocumentation")) { 191 192 foundKeplerDocumentation = true; 193 194 final NodeList keplerDocNodeList = child.getChildNodes(); 195 if(keplerDocNodeList.getLength() > 0) { 196 197 emptyKeplerDocumentation = false; 198 199 for (int j = 0; j < keplerDocNodeList.getLength(); j++) { 200 final Node docChildNode = keplerDocNodeList.item(j); 201 final NamedNodeMap docChildNamedNodeMap = docChildNode.getAttributes(); 202 203 if(docChildNamedNodeMap != null) { 204 205 final Node docChildChildName = docChildNamedNodeMap.getNamedItem("name"); 206 207 if(docChildChildName != null) { 208 209 final String docChildChildNameValue = docChildChildName.getNodeValue(); 210 211 if(docChildChildNameValue.equals("userLevelDocumentation")) { 212 213 foundUserLevelDocumentation = true; 214 final String text = docChildNode.getTextContent(); 215 if(text == null || text.trim().isEmpty()) { 216 log.debug(nameStr + " has empty userLevelDocumentation."); 217 } 218 219 } else if(docChildChildNameValue.equals("author")) { 220 foundAuthor = true; 221 222 final String text = docChildNode.getTextContent(); 223 if(text == null || text.trim().isEmpty()) { 224 log.debug(nameStr + " has empty author documentation."); 225 } 226 227 } 228 } 229 } 230 } 231 } 232 } 233 234 if (nameval.startsWith(COPY_ATTRIBUTE_PREFIX)) { 235 String value = childAttrs.getNamedItem("value").getNodeValue(); 236 kam.addAttribute(nameval, value); 237 } 238 } 239 } 240 } 241 242 // check documentation 243 if(!foundKeplerDocumentation) { 244 log.debug(nameStr + " is missing KeplerDocumentation."); 245 } else if(emptyKeplerDocumentation) { 246 log.debug(nameStr + " KeplerDocumentation is empty."); 247 } else if(!foundUserLevelDocumentation && !foundAuthor) { 248 log.debug(nameStr + " is missing userLevelDocumentation and author documentation."); 249 } 250 else if(!foundUserLevelDocumentation) { 251 log.debug(nameStr + " is missing userLevelDocumentation."); 252 } 253 else if(!foundAuthor) { 254 log.debug(nameStr + " is missing author documentation."); 255 } 256 257 258 } catch (Exception e) { 259 if(printError) { 260 e.printStackTrace(); 261 System.out.println("Error parsing Actor KAR DOM \"" 262 + ((nameStr == null) ? byteout.toString().substring(0, 263 300) 264 + "..." : nameStr) + "\": " + e.getMessage()); 265 } 266 kam = null; 267 } 268 finally { 269 actorStream.close(); 270 byteout.close(); 271 } 272 } catch (Exception e) { 273 kam = null; 274 throw new Exception("Error extracting Actor Metadata: " 275 + e.getMessage()); 276 } 277 278 return kam; 279 } 280 281 public static final String COPY_ATTRIBUTE_PREFIX = "_wrapper"; 282 public static final String MOML_PUBLIC_ID_1 = "-//UC Berkeley//DTD MoML 1//EN"; 283}