001/*
002 * Copyright (c) 2003-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: crawl $'
006 * '$Date: 2012-07-25 20:29:43 +0000 (Wed, 25 Jul 2012) $' 
007 * '$Revision: 30284 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.kepler.moml;
031
032import java.io.ByteArrayOutputStream;
033import java.io.File;
034import java.io.FileInputStream;
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.StringReader;
038
039import javax.xml.parsers.DocumentBuilder;
040import javax.xml.parsers.DocumentBuilderFactory;
041
042import org.apache.commons.logging.Log;
043import org.apache.commons.logging.LogFactory;
044import org.kepler.objectmanager.lsid.KeplerLSID;
045import org.w3c.dom.Document;
046import org.w3c.dom.Element;
047import org.w3c.dom.NamedNodeMap;
048import org.w3c.dom.Node;
049import org.w3c.dom.NodeList;
050import org.xml.sax.EntityResolver;
051import org.xml.sax.InputSource;
052import org.xml.sax.SAXException;
053
054import ptolemy.moml.MoMLParser;
055
056/**
057 * This class will contain methods for parsing out the Kepler Metadata from a
058 * MOML file.
059 * 
060 * @author Aaron Schultz
061 */
062public class KeplerMetadataExtractor {
063
064        private static final Log log = LogFactory
065                        .getLog(KeplerMetadataExtractor.class.getName());
066        private static final boolean isDebugging = log.isDebugEnabled();
067        //private static final boolean isTracing = log.isTraceEnabled();
068
069        public static KeplerActorMetadata extractActorMetadata(File momlFile)
070                throws Exception {
071            return extractActorMetadata(momlFile, true);
072        }
073           
074        public static KeplerActorMetadata extractActorMetadata(File momlFile, boolean printError)
075                        throws Exception {
076                FileInputStream fis = new FileInputStream(momlFile);
077                return extractActorMetadata(fis, printError);
078        }
079
080    /** Get the metadata from an input stream. 
081     *  @param actorStream the input stream
082     */
083    public static KeplerActorMetadata extractActorMetadata(
084            InputStream actorStream) throws Exception {
085        return extractActorMetadata(actorStream, true);
086    }
087        
088        /** Get the metadata from an input stream optionally printing output if a parsing error occurs.
089         *  @param actorStream the input stream
090         *  @param printError if true, print a stack trace and error message if a parsing error occurs.
091         */
092        public static KeplerActorMetadata extractActorMetadata(
093                        InputStream actorStream, boolean printError) throws Exception {
094                //if (isTracing)
095                        //log.trace("ActorCacheObject(" + actorStream.getClass().getName()
096                                        //+ ")");
097
098                KeplerActorMetadata kam = new KeplerActorMetadata();
099
100                ByteArrayOutputStream byteout;
101                try {
102                        // Copy 1024 bytes from actorStream to byteout
103                        byteout = new ByteArrayOutputStream();
104                        byte[] b = new byte[1024];
105                        int numread = actorStream.read(b, 0, 1024);
106                        while (numread != -1) {
107                                byteout.write(b, 0, numread);
108                                numread = actorStream.read(b, 0, 1024);
109                        }
110                        kam.setActorString(byteout.toString());
111
112                        // need to get actor name and id from the string
113                        // thus build a DOM representation
114                        String nameStr = null;
115                        try {
116                                //if (isTracing) log.trace(kam.getActorString());
117                                StringReader strR = new StringReader(kam.getActorString());
118                                
119                                InputSource xmlIn = new InputSource(strR);
120                                DocumentBuilderFactory factory = DocumentBuilderFactory
121                                                .newInstance();
122                                DocumentBuilder builder = factory.newDocumentBuilder();
123                                builder.setEntityResolver(new EntityResolver() {
124                                        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
125                                                if (MOML_PUBLIC_ID_1.equals(publicId)) {
126                                                        return new InputSource(MoMLParser.class.getResourceAsStream("MoML_1.dtd"));
127                                                }
128                                                else {
129                                                        return null;
130                                                }
131                                        }
132                                });
133                                
134                                // TODO 
135                                // this causes http://bugzilla.ecoinformatics.org/show_bug.cgi?id=4671
136                                // when File => Save Archive w/ wf w/ actor w/ < in the name:
137                                Document doc = builder.parse(xmlIn);
138                                //if (isTracing) log.trace(doc.toString());
139
140                                Element rootNode = doc.getDocumentElement();
141                                kam.setRootName(rootNode.getNodeName());
142
143                                // Get the value of the name attribute of the root node
144                                NamedNodeMap nnm = rootNode.getAttributes();
145                                Node namenode = nnm.getNamedItem("name");
146                                nameStr = namenode.getNodeValue();
147                                kam.setName(nameStr);
148
149                                boolean emptyKeplerDocumentation = true;
150                                boolean foundKeplerDocumentation = false;
151                boolean foundUserLevelDocumentation = false;
152                boolean foundAuthor = false;
153
154                                // Cycle through the children of the root node
155                                NodeList probNodes = rootNode.getChildNodes();
156                                for (int i = 0; i < probNodes.getLength(); i++) {
157                                        Node child = probNodes.item(i);
158
159                                        if (child.hasAttributes()) {
160
161                                                NamedNodeMap childAttrs = child.getAttributes();
162                                                Node idNode = childAttrs.getNamedItem("name");
163                                                if (idNode != null) {
164
165                                                        // the entityId
166                                                        String nameval = idNode.getNodeValue();
167                                                        if (nameval.equals(NamedObjId.NAME)) {
168                                                                Node idNode1 = childAttrs.getNamedItem("value");
169                                                                String idval = idNode1.getNodeValue();
170                                                                kam.setLsid(new KeplerLSID(idval));
171                                                        }
172
173                                                        // the class name
174                                                        if (nameval.equals("class")) {
175                                                                Node idNode3 = childAttrs.getNamedItem("value");
176                                                                String classname = idNode3.getNodeValue();
177                                                                kam.setClassName(classname);
178                                                        }
179
180                                                        // the semantic types
181                                                        if (nameval.startsWith("semanticType")) {
182                                                                Node idNode2 = childAttrs.getNamedItem("value");
183                                                                String semtype = idNode2.getNodeValue();
184                                                                kam.addSemanticType(semtype);
185                                                        }
186                                                        
187                                                        // userLevelDocumentation must be contained in KeplerDocumentation 
188                                                        if (nameval.equals("userLevelDocumentation")) {
189                                                            log.warn(nameStr + " userLevelDocumentation property should be contained in a KeplerDocumentation property.");
190                                                        } else if(nameval.equals("KeplerDocumentation")) {
191                                                            
192                                                            foundKeplerDocumentation = true;
193                                                            
194                                                            final NodeList keplerDocNodeList = child.getChildNodes();
195                                                            if(keplerDocNodeList.getLength() > 0) {
196
197                                                                emptyKeplerDocumentation = false;
198
199                                                                for (int j = 0; j < keplerDocNodeList.getLength(); j++) {
200                                                            final Node docChildNode = keplerDocNodeList.item(j);
201                                                            final NamedNodeMap docChildNamedNodeMap = docChildNode.getAttributes();
202                                                            
203                                                            if(docChildNamedNodeMap != null) {
204                                                                
205                                                                final Node docChildChildName = docChildNamedNodeMap.getNamedItem("name");
206                                                                
207                                                                if(docChildChildName != null) {
208                                                                
209                                                                    final String docChildChildNameValue = docChildChildName.getNodeValue();
210                                                                    
211                                                                    if(docChildChildNameValue.equals("userLevelDocumentation")) {
212                                                                        
213                                                                        foundUserLevelDocumentation = true;
214                                                                        final String text = docChildNode.getTextContent();
215                                                                        if(text == null || text.trim().isEmpty()) {
216                                                                            log.debug(nameStr + " has empty userLevelDocumentation.");
217                                                                        }
218                                                                        
219                                                                    } else if(docChildChildNameValue.equals("author")) {
220                                                                        foundAuthor = true;
221
222                                                                        final String text = docChildNode.getTextContent();
223                                                    if(text == null || text.trim().isEmpty()) {
224                                                        log.debug(nameStr + " has empty author documentation.");
225                                                    }
226                                                                        
227                                                                    }
228                                                                }
229                                                            }                                                       
230                                                                }   
231                                                            }
232                                                        }
233                                                                                                                
234                                                        if (nameval.startsWith(COPY_ATTRIBUTE_PREFIX)) {
235                                                                String value = childAttrs.getNamedItem("value").getNodeValue();
236                                                                kam.addAttribute(nameval, value);
237                                                        }
238                                                }
239                                        }
240                                }
241                                
242                                // check documentation
243                                if(!foundKeplerDocumentation) {
244                                    log.debug(nameStr + " is missing KeplerDocumentation.");
245                                } else if(emptyKeplerDocumentation) {
246                                    log.debug(nameStr + " KeplerDocumentation is empty.");
247                                } else if(!foundUserLevelDocumentation && !foundAuthor) {
248                                    log.debug(nameStr + " is missing userLevelDocumentation and author documentation.");
249                                }
250                                else if(!foundUserLevelDocumentation) {
251                                    log.debug(nameStr + " is missing userLevelDocumentation.");
252                                }
253                                else if(!foundAuthor) {
254                                    log.debug(nameStr + " is missing author documentation.");
255                                }
256                                
257                                
258                        } catch (Exception e) {
259                            if(printError) {
260                                e.printStackTrace();
261                                System.out.println("Error parsing Actor KAR DOM \""
262                                                + ((nameStr == null) ? byteout.toString().substring(0,
263                                                                300)
264                                                                + "..." : nameStr) + "\": " + e.getMessage());
265                            }
266                                kam = null;
267                        }
268                        finally {
269                            actorStream.close();
270                            byteout.close();
271                        }
272                } catch (Exception e) {
273                        kam = null;
274                        throw new Exception("Error extracting Actor Metadata: "
275                                        + e.getMessage());
276                }
277
278                return kam;
279        }
280        
281        public static final String COPY_ATTRIBUTE_PREFIX = "_wrapper";  
282        public static final String MOML_PUBLIC_ID_1 = "-//UC Berkeley//DTD MoML 1//EN"; 
283}