001/* An actor that produces an array that lists the contents of a directory.
002
003   @Copyright (c) 2003-2017 The Regents of the University of California.
004   All rights reserved.
005
006   Permission is hereby granted, without written agreement and without
007   license or royalty fees, to use, copy, modify, and distribute this
008   software and its documentation for any purpose, provided that the
009   above copyright notice and the following two paragraphs appear in all
010   copies of this software.
011
012   IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
013   FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
014   ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
015   THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
016   SUCH DAMAGE.
017
018   THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
019   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
021   PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
022   CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
023   ENHANCEMENTS, OR MODIFICATIONS.
024
025   PT_COPYRIGHT_VERSION 2
026   COPYRIGHTENDKEY
027 */
028package ptolemy.actor.lib.io;
029
030import java.io.BufferedReader;
031import java.io.File;
032import java.io.FilenameFilter;
033import java.io.IOException;
034import java.io.InputStreamReader;
035import java.net.URL;
036import java.net.URLConnection;
037import java.net.URLDecoder;
038import java.util.ArrayList;
039import java.util.Enumeration;
040import java.util.Iterator;
041import java.util.LinkedList;
042import java.util.List;
043import java.util.StringTokenizer;
044import java.util.jar.JarEntry;
045import java.util.jar.JarFile;
046
047import ptolemy.actor.lib.SequenceSource;
048import ptolemy.actor.parameters.FilePortParameter;
049import ptolemy.data.ArrayToken;
050import ptolemy.data.BooleanToken;
051import ptolemy.data.StringToken;
052import ptolemy.data.expr.Parameter;
053import ptolemy.data.expr.StringParameter;
054import ptolemy.data.type.ArrayType;
055import ptolemy.data.type.BaseType;
056import ptolemy.kernel.CompositeEntity;
057import ptolemy.kernel.util.Attribute;
058import ptolemy.kernel.util.IllegalActionException;
059import ptolemy.kernel.util.NameDuplicationException;
060import ptolemy.util.FileUtilities;
061import ptolemy.util.RecursiveFileFilter;
062
063///////////////////////////////////////////////////////////////////
064//// DirectoryListing
065
066/**
067 Given a URL or directory name, produce an array of file names in that
068 directory that match an (optional) pattern.
069
070 <p>The file names include the complete path, unless <i>relative</i>
071 is set to true, in which case, the names are relative to the
072 directory. The pattern is a regular expression. For a reference on
073 regular expression syntax see: <a href="http://download.oracle.com/javase/tutorial/essential/regex/#in_browser">http://download.oracle.com/javase/tutorial/essential/regex/</a></p>
074
075 <p>If <i>directoryOrURL</i> is a local directory (not a URL), then you can
076 optionally list only contained files or directories.
077 If <i>listOnlyDirectories</i> is true, then only directories will be
078 listed on the output.  If <i>listOnlyFiles</i> is true, then only
079 files will be listed on the output. If both are true, then an exception
080 is thrown.</p>
081
082 <p>If <i>directoryOrURL</i> is a URL, then this actor assumes that the
083 server will list the contents of the referenced directory in an
084 HTML file where each file listed will have the following form:</p>
085
086 <pre>
087 &lt;a href="filename"&gt;filename&lt;/a&gt;
088 </pre>
089
090 <p>If the filename is longer than 20 characters, then only the first
091 20 characters of the two appearances of the filename are compared,
092 since some servers truncate the file names.</p>
093
094 <p>If <i>allowEmptyDirectory</i> controls whether reading an empty directory
095 will throw an exception.</p>
096
097 <p>Note that DirectoryListing returns the contents of the directory
098 in a different order depending on whether one is using the Sun JVM
099 or the IBM JVM.  Thus, you may want to connect the output to an
100 ArraySort actor.</p>
101
102 @author  Christopher Hylands, Edward A. Lee, Contributor: Jianwu Wang
103 @version $Id$
104 @since Ptolemy II 4.0
105 @Pt.ProposedRating Yellow (eal)
106 @Pt.AcceptedRating Red (liuj)
107 */
108public class DirectoryListing extends SequenceSource implements FilenameFilter {
109    /** Construct an actor with the given container and name.
110     *  @param container The container.
111     *  @param name The name of this actor.
112     *  @exception IllegalActionException If the actor cannot be contained
113     *   by the proposed container.
114     *  @exception NameDuplicationException If the container already has an
115     *   actor with this name.
116     */
117    public DirectoryListing(CompositeEntity container, String name)
118            throws IllegalActionException, NameDuplicationException {
119        super(container, name);
120
121        // Tell the file browser to allow only selection of directories.
122        directoryOrURL = new FilePortParameter(this, "directoryOrURL");
123        new Parameter(directoryOrURL, "allowFiles", BooleanToken.FALSE);
124        new Parameter(directoryOrURL, "allowDirectories", BooleanToken.TRUE);
125
126        output.setTypeEquals(new ArrayType(BaseType.STRING));
127
128        pattern = new StringParameter(this, "pattern");
129        pattern.setExpression("");
130
131        listOnlyDirectories = new Parameter(this, "listOnlyDirectories");
132        listOnlyDirectories.setTypeEquals(BaseType.BOOLEAN);
133        listOnlyDirectories.setExpression("false");
134
135        listOnlyFiles = new Parameter(this, "listOnlyFiles");
136        listOnlyFiles.setTypeEquals(BaseType.BOOLEAN);
137        listOnlyFiles.setExpression("false");
138
139        allowEmptyDirectory = new Parameter(this, "allowEmptyDirectory");
140        allowEmptyDirectory.setTypeEquals(BaseType.BOOLEAN);
141        allowEmptyDirectory.setExpression("false");
142
143        // Show the firingCountLimit parameter last.
144        firingCountLimit.moveToLast();
145
146        recursive = new Parameter(this, "recursive");
147        recursive.setTypeEquals(BaseType.BOOLEAN);
148        recursive.setExpression("false");
149
150        relative = new Parameter(this, "relative");
151        relative.setTypeEquals(BaseType.BOOLEAN);
152        relative.setExpression("false");
153    }
154
155    ///////////////////////////////////////////////////////////////////
156    ////                     ports and parameters                  ////
157
158    /** The directory name or URL from which to read.  This is a string with
159     *  any form accepted by {@link ptolemy.actor.parameters.FilePortParameter}.
160     *  By default, this is empty. An empty string is equivalent to specifying
161     *  "$CWD", the current working directory.
162     */
163    public FilePortParameter directoryOrURL;
164
165    /** If true, and <i>directoryOrURL</i> refers to a local directory (not a URL),
166     *  then only directories will be listed on the output. If <i>directoryOrURL</i>
167     *  is a URL, then this parameter is ignored (there appears to be no reliable
168     *  way to tell whether the URL refers to a directory or file).
169     *  This is a boolean that defaults to false.
170     */
171    public Parameter listOnlyDirectories;
172
173    /** If true, and <i>directoryOrURL</i> refers to a local directory (not a URL),
174     *  then only files will be listed on the output. If <i>directoryOrURL</i>
175     *  is a URL, then this parameter is ignored (there appears to be no reliable
176     *  way to tell whether the URL refers to a directory or file).
177     *  This is a boolean that defaults to false.
178     */
179    public Parameter listOnlyFiles;
180
181    /** If true, and <i>directoryOrURL</i> refers to a local directory
182     *  (not a URL), that is empty, then the output will be empty
183     *  string array and no exception is reported. If
184     *  <i>directoryOrURL</i> is a URL, then this parameter is ignored
185     *  (there appears to be no reliable way to tell whether the URL
186     *  refers to a directory or file).  This is a boolean that
187     *  defaults to false.
188     */
189    public Parameter allowEmptyDirectory;
190
191    /** If non-empty, then only output file and directory names that
192     *  match the specified (regular expression) pattern.
193     *  The default value of this parameter is the empty String "",
194     *  which indicates that everything matches.
195     */
196    public StringParameter pattern;
197
198    /** Whether files in the subdirectories should be searched as well.
199     *  The recursive parameter only works if <i>directoryOrURL</i>
200     *  refers to a local directory (not a URL).
201     *  This is a boolean that defaults to false.
202     */
203    public Parameter recursive;
204
205    /** If true, then produce an array with file names relative to the
206     *  specified directory. This is a boolean that defaults to false,
207     *  which causes the absolute (complete) path to be produced.
208     */
209    public Parameter relative;
210
211    ///////////////////////////////////////////////////////////////////
212    ////                         public methods                    ////
213
214    /** Return true if the specified name matches the specified pattern,
215     *  or if no pattern has been specified.
216     *  @param directory The directory in which the file was found
217     *   (ignored, but required by the FilenameFilter interface).
218     *  @param name The name of the file or directory.
219     *  @return True if the specified name matches.
220     */
221    @Override
222    public boolean accept(File directory, String name) {
223        // The accept() method is here primarily for backward
224        // compatibility as the DirectoryList class implements
225        // FileFilter.  It could be thet Kepler is using this.
226        if (_recursiveFileFilter == null) {
227            _recursiveFileFilter = new RecursiveFileFilter(_recursive,
228                    true /*includeFiles*/, true /*includeDirectories*/,
229                    _listOnlyFiles, _listOnlyDirectories, _pattern,
230                    false /*escape*/);
231        }
232        return _recursiveFileFilter.accept(directory, name);
233    }
234
235    /** Override the base class to locally cache parameter values.
236     *  @param attribute The attribute that has changed.
237     *  @exception IllegalActionException If the specified attribute
238     *   is <i>URL</i> and the file cannot be opened.
239     */
240    @Override
241    public void attributeChanged(Attribute attribute)
242            throws IllegalActionException {
243        // Parameter values are cached to avoid code duplication.
244        if (attribute == pattern) {
245            _pattern = pattern.stringValue();
246        } else if (attribute == listOnlyDirectories) {
247            _listOnlyDirectories = ((BooleanToken) listOnlyDirectories
248                    .getToken()).booleanValue();
249        } else if (attribute == listOnlyFiles) {
250            _listOnlyFiles = ((BooleanToken) listOnlyFiles.getToken())
251                    .booleanValue();
252        } else if (attribute == recursive) {
253            _recursive = ((BooleanToken) recursive.getToken()).booleanValue();
254        } else {
255            super.attributeChanged(attribute);
256        }
257    }
258
259    /** Output an array containing file and/or directory names.
260     *  @exception IllegalActionException If there's no director or
261     *   if the directory or URL is invalid.
262     */
263    @Override
264    public void fire() throws IllegalActionException {
265        super.fire();
266
267        directoryOrURL.update();
268
269        URL sourceURL = directoryOrURL.asURL();
270
271        if (sourceURL == null) {
272            // Nothing was specified. Default to the current working directory.
273            directoryOrURL.setExpression("$CWD");
274            sourceURL = directoryOrURL.asURL();
275            if (sourceURL == null) {
276                throw new IllegalActionException(this,
277                        "Cannot determine current working directory.");
278            }
279        }
280
281        boolean emptyDirectoryAllow = ((BooleanToken) allowEmptyDirectory
282                .getToken()).booleanValue();
283
284        if (sourceURL.getProtocol().equals("file")) {
285            File sourceFile = directoryOrURL.asFile();
286
287            if (sourceFile.isDirectory()) {
288                if (_debugging) {
289                    _debug("Reading directory.");
290                }
291
292                File[] listedFiles = RecursiveFileFilter.listFiles(sourceFile,
293                        _recursive, true /*includeFiles*/,
294                        true /*includeDirectories*/, _listOnlyFiles,
295                        _listOnlyDirectories, _pattern, false /*escape*/);
296
297                ArrayList<StringToken> result = new ArrayList<StringToken>();
298                for (File file : listedFiles) {
299
300                    String path = file.getAbsolutePath();
301                    if (((BooleanToken) relative.getToken()).booleanValue()) {
302                        // Strip off the directory name and trailing slash.
303                        int slash = 1;
304                        String directoryName = sourceFile.getAbsolutePath();
305                        if (directoryName.endsWith("/")) {
306                            slash = 0;
307                        }
308                        path = path.substring(directoryName.length() + slash);
309                    }
310
311                    if (_debugging) {
312                        _debug("Path: " + path);
313                    }
314
315                    result.add(new StringToken(path));
316                }
317
318                if (!emptyDirectoryAllow) {
319                    if (result.size() == 0) {
320                        throw new IllegalActionException(this,
321                                "No files or directories that match the pattern.");
322                    }
323                }
324
325                StringToken[] resultArray = new StringToken[result.size()];
326
327                for (int i = 0; i < resultArray.length; i++) {
328                    resultArray[i] = result.get(i);
329                }
330
331                output.broadcast(new ArrayToken(BaseType.STRING, resultArray));
332            } else if (sourceFile.isFile()) {
333                StringToken[] result = new StringToken[1];
334                result[0] = new StringToken(sourceFile.toString());
335
336                if (_debugging) {
337                    _debug("Listing just the specified file: "
338                            + result[0].stringValue());
339                }
340
341                output.broadcast(new ArrayToken(BaseType.STRING, result));
342            } else {
343                throw new IllegalActionException("'" + directoryOrURL
344                        + "' is neither a file " + "nor a directory.");
345            }
346        } else {
347            try {
348                _readURL(sourceURL);
349            } catch (IOException ex) {
350                throw new IllegalActionException(this, ex,
351                        "Error reading the URL \'" + directoryOrURL + "\'.");
352            }
353        }
354    }
355
356    ///////////////////////////////////////////////////////////////////
357    ////                         private methods                   ////
358
359    /** Read the URL and produce output.
360     *  @param sourceURL The source URL.
361     */
362    private void _readURL(URL sourceURL)
363            throws IOException, IllegalActionException {
364        // Handle urls here.
365        if (_debugging) {
366            _debug("Reading URL: " + sourceURL);
367        }
368
369        // Follow redirects such as http -> https.
370        sourceURL = FileUtilities.followRedirects(sourceURL);
371
372        List<StringToken> resultsList = new LinkedList<StringToken>();
373
374        URLConnection urlConnection = sourceURL.openConnection();
375        String contentType = urlConnection.getContentType();
376
377        if (!contentType.startsWith("text/html")
378                && !contentType.startsWith("text/plain")) {
379            if (!sourceURL.toString().startsWith("jar:")) {
380                throw new IllegalActionException(this,
381                        "Could not parse '" + directoryOrURL.stringValue()
382                                + "'; as URL '" + sourceURL
383                                + "', it is not \"text/html\", "
384                                + "or \"text/plain\", it is: "
385                                + urlConnection.getContentType());
386            } else {
387                // Reading from a directory from a jar file.
388
389                if (_recursiveFileFilter == null) {
390                    _recursiveFileFilter = new RecursiveFileFilter(_recursive,
391                            true /*includeFiles*/, true /*includeDirectories*/,
392                            _listOnlyFiles, _listOnlyDirectories, _pattern,
393                            false /*escape*/);
394                }
395
396                // Every thing after the !/
397                String path = sourceURL.getPath()
398                        .substring(sourceURL.getPath().indexOf("!/") + 2);
399
400                // Get rid of the jar:file and read up to the !
401                String jarPath = sourceURL.getPath().substring(5,
402                        sourceURL.getPath().indexOf("!"));
403                JarFile jar = null;
404                try {
405                    jar = new JarFile(URLDecoder.decode(jarPath, "UTF-8"));
406                    Enumeration<JarEntry> entries = jar.entries();
407                    while (entries.hasMoreElements()) {
408                        String name = entries.nextElement().getName();
409                        if (name.startsWith(path)) {
410                            String entry = name.substring(path.length());
411                            int slashIndex = entry.indexOf("/");
412                            if (slashIndex >= 0) {
413                                entry = entry.substring(0, slashIndex);
414                            }
415
416                            // FIXME: recursion on URLs not yet supported.
417                            if (_recursiveFileFilter.accept(null, entry)) {
418                                StringToken results = new StringToken(entry);
419                                // Add the results here if it is not present.
420                                // We want to preserve the order, otherwise we could
421                                // use a Set.
422                                if (!resultsList.contains(results)) {
423                                    resultsList.add(results);
424                                }
425                            }
426                        }
427                    }
428                } finally {
429                    if (jar != null) {
430                        jar.close();
431                    }
432                }
433            }
434        } else {
435
436            BufferedReader in = null;
437            try {
438                in = new BufferedReader(
439                        new InputStreamReader(urlConnection.getInputStream()));
440
441                if (!contentType.startsWith("text/plain")
442                        && !urlConnection.getURL().toString().endsWith("/")) {
443                    // text/plain urls need not end with /, but
444                    // text/html urls _must_ end with / since the web server
445                    // will rewrite them for us.
446                    throw new IllegalActionException(this,
447                            "Could not parse '" + directoryOrURL.stringValue()
448                                    + "'; it needs to end with '/'");
449                }
450
451                // Parse the contents in a haphazard fashion.
452                // The idea is that we look for the <BODY> line and
453                // then look for lines that contain HREF
454                // If we find a line like HREF="foo">foo, then we report
455                // foo as being a file.
456                // A more robust way would be to use a spider, see
457                // http://www.acme.com/java/software/WebList.html
458                String line;
459                String target = null;
460                boolean sawBody = false;
461                boolean sawHREF = false;
462
463                while ((line = in.readLine()) != null) {
464                    line = line.trim();
465
466                    if (_debugging) {
467                        _debug(line);
468                    }
469
470                    if (line.startsWith("<BODY") || line.startsWith("<body")) {
471                        if (_debugging) {
472                            _debug("Saw Body tag");
473                        }
474                        sawBody = true;
475                    } else {
476                        if (sawBody) {
477                            StringTokenizer tokenizer = new StringTokenizer(
478                                    line, "<\" >=");
479
480                            while (tokenizer.hasMoreTokens()) {
481                                String token = tokenizer.nextToken();
482
483                                if (token.compareToIgnoreCase("HREF") == 0) {
484                                    if (_debugging) {
485                                        _debug("Saw HREF");
486                                    }
487                                    sawHREF = true;
488                                    target = null;
489                                } else {
490                                    if (sawHREF) {
491                                        if (target == null) {
492                                            // FIXME: Here, we should check that target
493                                            // is a relative pathname.
494                                            target = token;
495                                        } else {
496                                            // Check to see whether the first 20
497                                            // characters of the token are
498                                            // the same as the last token.
499                                            String reference = target;
500
501                                            if (reference.length() > 20) {
502                                                reference = target.substring(0,
503                                                        20);
504                                            }
505
506                                            if (!token.startsWith(reference)) {
507                                                if (_debugging) {
508                                                    _debug("token \"" + token
509                                                            + "\" does not start with href.");
510                                                }
511                                                sawHREF = false;
512                                            } else {
513                                                // Having the first argument be null causes
514                                                // accept to determine if target is a file or
515                                                // directory by checking for a trailing /.
516                                                if (accept(null, target)) {
517                                                    if (_debugging) {
518                                                        _debug("target \""
519                                                                + token
520                                                                + "\" was accepted.");
521                                                    }
522
523                                                    // Make sure directoryOrURL ends with a slash.
524                                                    String base = directoryOrURL
525                                                            .stringValue();
526
527                                                    if (!base.endsWith("/")) {
528                                                        base = base + "/";
529                                                    }
530
531                                                    // FIXME: Is there any way to tell whether
532                                                    // the result is a directory or file?
533                                                    resultsList.add(
534                                                            new StringToken(base
535                                                                    + target));
536                                                } else {
537                                                    if (_debugging) {
538                                                        _debug("target \""
539                                                                + token
540                                                                + "\" was not accepted.");
541                                                    }
542                                                }
543
544                                                sawHREF = false;
545                                            }
546                                        }
547                                    }
548                                }
549                            }
550                        }
551                    }
552                }
553            } finally {
554                if (in != null) {
555                    in.close();
556                }
557            }
558        }
559
560        if (_debugging) {
561            _debug("----- end of listing.");
562            _debug("----- extracted results:");
563
564            Iterator results = resultsList.iterator();
565
566            while (results.hasNext()) {
567                _debug(((StringToken) results.next()).stringValue());
568            }
569        }
570
571        StringToken[] results = new StringToken[resultsList.size()];
572        output.broadcast(
573                new ArrayToken(BaseType.STRING, resultsList.toArray(results)));
574    }
575
576    ///////////////////////////////////////////////////////////////////
577    ////                         private members                   ////
578
579    /** Cached value of listOnlyDirectories parameter. */
580    private boolean _listOnlyDirectories;
581
582    /** Cached value of listOnlyFiles parameter. */
583    private boolean _listOnlyFiles;
584
585    /** Cached value of patternparameter. */
586    private String _pattern;
587
588    /** Cached value of recursive parameter. */
589    private boolean _recursive;
590
591    /** RecursiveFileFilter used by the accept() method of this class. */
592    private RecursiveFileFilter _recursiveFileFilter;
593}