001/* An actor that reads a URL naming a directory and outputs each
002 element of the directory one at a time.
003
004 @Copyright (c) 1998-2017 The Regents of the University of California.
005 All rights reserved.
006
007 Permission is hereby granted, without written agreement and without
008 license or royalty fees, to use, copy, modify, and distribute this
009 software and its documentation for any purpose, provided that the
010 above copyright notice and the following two paragraphs appear in all
011 copies of this software.
012
013 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
014 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
015 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
016 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
017 SUCH DAMAGE.
018
019 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
020 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
021 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
022 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
023 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
024 ENHANCEMENTS, OR MODIFICATIONS.
025
026 PT_COPYRIGHT_VERSION 2
027 COPYRIGHTENDKEY
028 */
029package ptolemy.actor.lib;
030
031import java.io.BufferedReader;
032import java.io.File;
033import java.io.IOException;
034import java.io.InputStreamReader;
035import java.net.MalformedURLException;
036import java.net.URL;
037import java.net.URLConnection;
038import java.util.LinkedList;
039import java.util.List;
040import java.util.StringTokenizer;
041
042import ptolemy.data.BooleanToken;
043import ptolemy.data.StringToken;
044import ptolemy.data.expr.Parameter;
045import ptolemy.data.type.BaseType;
046import ptolemy.kernel.CompositeEntity;
047import ptolemy.kernel.util.Attribute;
048import ptolemy.kernel.util.IllegalActionException;
049import ptolemy.kernel.util.KernelException;
050import ptolemy.kernel.util.NameDuplicationException;
051import ptolemy.util.FileUtilities;
052
053///////////////////////////////////////////////////////////////////
054//// URLDirectoryReader
055
056/**
057 This actor reads a URL and if the URL names a directory, it outputs
058 the name of each file or subdirectory contained in the directory.
059 If the URL names a file, then it outputs the name of that file.
060
061 <p>If the <i>repeat</i> flag is true, then the sequence of file
062 names is repeated indefinitely.
063 If the <i>refresh</i> flag is true, and the <i>repeat</i> flag is
064 true, then the directory is re-read before repeating the sequence of
065 files and subdirectories.
066
067 <p>If the <i>endsWith</i> String parameter is non-null and non-empty,
068 then only file names or subdirectories that end with the value
069 of the <i>endsWith</i> parameter are output.
070
071 <p>One alternative implementation would be that if the URL named a file,
072 then the actor would output the names of the files and subdirectories
073 in the directory that contains the file.
074 <br>Another alternative implementation would output the names of the
075 files and subdirectories in an array.
076 <br>An extension would be to include a filter parameter that could be
077 a regular expression that would allow us to filter the file names.
078 <br> Should this actor extend URLReader or SequenceActor?
079
080 @author  Christopher Hylands
081 @version $Id$
082 @since Ptolemy II 2.0
083 @Pt.ProposedRating Red (liuj)
084 @Pt.AcceptedRating Red (liuj)
085 @deprecated Use DirectoryListing instead.
086 */
087@Deprecated
088public class URLDirectoryReader extends URLReader {
089    /** Construct an actor with the given container and name.
090     *  @param container The container.
091     *  @param name The name of this actor.
092     *  @exception IllegalActionException If the actor cannot be contained
093     *   by the proposed container.
094     *  @exception NameDuplicationException If the container already has an
095     *   actor with this name.
096     */
097    public URLDirectoryReader(CompositeEntity container, String name)
098            throws IllegalActionException, NameDuplicationException {
099        super(container, name);
100
101        // Set the type of the output port.
102        output.setTypeEquals(BaseType.STRING);
103
104        // Set the endsWith String.
105        endsWith = new Parameter(this, "endsWith", new StringToken(""));
106        endsWith.setTypeEquals(BaseType.STRING);
107        attributeChanged(endsWith);
108
109        // Set the repeat Flag.
110        repeat = new Parameter(this, "repeat", new BooleanToken(false));
111        repeat.setTypeEquals(BaseType.BOOLEAN);
112        attributeChanged(repeat);
113    }
114
115    ///////////////////////////////////////////////////////////////////
116    ////                     ports and parameters                  ////
117
118    /** If non-null and non-empty, then only output file names and sub
119     *  directories that end with this String value.
120     *        The default value of this parameter is the empty String "".
121     */
122    public Parameter endsWith;
123
124    /** Repeat after outputting all elements of the directory.
125     *        The default value of this parameter is a false BooleanToken.
126     */
127    public Parameter repeat;
128
129    ///////////////////////////////////////////////////////////////////
130    ////                         public methods                    ////
131
132    /** If the specified attribute is <i>URL</i>, then close
133     *  the current file (if there is one) and open the new one.
134     *  @param attribute The attribute that has changed.
135     *  @exception IllegalActionException If the specified attribute
136     *   is <i>URL</i> and the file cannot be opened.
137     */
138    @Override
139    public void attributeChanged(Attribute attribute)
140            throws IllegalActionException {
141        if (attribute == repeat) {
142            _repeatFlag = ((BooleanToken) repeat.getToken()).booleanValue();
143        } else if (attribute == endsWith) {
144            StringToken endsWithToken = (StringToken) endsWith.getToken();
145
146            if (endsWithToken == null) {
147                _endsWithValue = null;
148            } else {
149                _endsWithValue = endsWithToken.stringValue();
150            }
151        }
152
153        super.attributeChanged(attribute);
154    }
155
156    /** Output the data read in the prefire.
157     *  @exception IllegalActionException If there's no director.
158     */
159    @Override
160    public void fire() throws IllegalActionException {
161        super.fire();
162        output.broadcast(new StringToken(_data[_iterationCount]));
163    }
164
165    /** Open the file at the URL, and set the width of the output.
166     *  @exception IllegalActionException Not thrown in this base class
167     */
168    @Override
169    public void initialize() throws IllegalActionException {
170        super.initialize();
171        _iterationCount = 0;
172    }
173
174    /** Update the iteration counter until it exceeds the number of
175     *  elements in the directory.  If the <i>repeat</i> parameter
176     *  is true, then repeat the same sequence of directory elements
177     *  again.  If the <i>repeat</i> and <i>refresh</i> parameters
178     *  are both true, then reread the directory before repeating
179     *  the sequence of directory elements
180     *
181     *  @exception IllegalActionException If the sourceURL is not valid.
182     */
183    @Override
184    public boolean postfire() throws IllegalActionException {
185        _iterationCount++;
186
187        if (_iterationCount >= _data.length) {
188            if (!_repeatFlag) {
189                return false;
190            } else {
191                _iterationCount = 0;
192
193                if (_refreshFlag) {
194                    _data = _list(_source, _endsWithValue);
195                }
196            }
197        }
198
199        return super.postfire();
200    }
201
202    /** Read one row from the input and prepare for output them.
203     *  @exception IllegalActionException If the <i>sourceURL</i> is invalid.
204     */
205    @Override
206    public boolean prefire() throws IllegalActionException {
207        try {
208            _data = _list(_source, _endsWithValue);
209            return super.prefire();
210        } catch (Exception ex) {
211            throw new IllegalActionException(this, ex, "prefire() failed");
212        }
213    }
214
215    ///////////////////////////////////////////////////////////////////
216    ////                         private methods                   ////
217
218    /** If the URL names a directory return an array containing
219     *  the names of the files and subdirectories contained in the
220     *  directory.  If the URL names a file, then return an array
221     *  of size 1 containing the name of the file.  If the URL
222     *  names neither a file or directory, return null.
223     *
224     *  @param source The filename or URL to open
225     *  @param endsWith If non-null, then only files or subdirectories
226     *  that end with this string are reported.
227     *  @return An array of Strings where each element of the array
228     *  names a file or subdirectory.
229     *  @exception IllegalActionException If the source is a malformed
230     *  URL
231     */
232    private String[] _list(String source, String endsWith)
233            throws IllegalActionException {
234        if (source.startsWith("file:")) {
235            return _listFile(source, endsWith);
236        } else {
237            try {
238                return _listFileOrURL(source, endsWith);
239            } catch (Exception ex) {
240                throw new IllegalActionException("Could not open '" + source
241                        + ": " + KernelException.stackTraceToString(ex));
242            }
243        }
244    }
245
246    /** Return files and directories contained in the source url.
247     *  @param source The source URL to query for files and subdirectories.
248     *  The source url must be a String using the "file:" protocol.
249     *  @param endsWith If non-null and of length greater than 0,
250     *  then only files or subdirectories that end with this string
251     *  are reported.
252     *  @return An array containing the files and subdirectories in
253     *  the source URL.
254     *  @exception IllegalActionException If the source does not have
255     *  the file: protocol, or if the source is neither a file
256     *  nor a directory, or if there is some other problem.
257     */
258    private String[] _listFile(String source, String endsWith)
259            throws IllegalActionException {
260        try {
261            URL sourceURL = new URL(source);
262
263            if (sourceURL.getProtocol().equals("file")) {
264                // First, try opening the source as a file.
265                File file = new File(sourceURL.getFile());
266
267                if (file.isDirectory()) {
268                    if (!source.endsWith("/")) {
269                        source = source + "/";
270                    }
271
272                    // Note: we could use listFiles(FileFilter) here.
273                    // but since the filter is fairly simple, we don't
274                    File[] files = file.listFiles();
275                    List resultsList = new LinkedList();
276
277                    if (files != null) {
278                        for (File file2 : files) {
279                            String filename = file2.getName();
280
281                            if (endsWith == null || endsWith.length() == 0
282                                    || filename.endsWith(endsWith)) {
283                                resultsList.add(source + filename);
284                            }
285                        }
286                    }
287
288                    String[] results = new String[resultsList.size()];
289                    return (String[]) resultsList.toArray(results);
290                } else if (file.isFile()) {
291                    return new String[] { file.toString() };
292                } else {
293                    throw new IllegalActionException("'" + source
294                            + "' is neither a file " + "or a directory?");
295                }
296            } else {
297                // FIXME: handle urls here.
298                throw new IllegalActionException("'" + source + "' does not "
299                        + "have the file: protocol");
300            }
301        } catch (Exception ex) {
302            throw new IllegalActionException(
303                    "Could not open '" + source + "' :" + ex);
304        }
305    }
306
307    /** Return files and directories contained in the source url.
308     *  This method attempts to parse the html results returned by
309     *  reading a URL connection, so the parsing may fail.  If the URL
310     *  uses the http: protocol, then the remote webserver
311     *  configuration determines whether it is possible to read the
312     *  contents of a directory.  Usually, the server has to have
313     *  directory listing enabled, and the default html file
314     *  (index.htm, index.html, default.htm etc. ) must not be present.
315     *
316     *  @param source The source URL to query for files and subdirectories.
317     *  The source url must be a String using the "file:" protocol.
318     *  @param endsWith If non-null and of length greater than 0,
319     *  then only files or subdirectories that end with this string
320     *  are reported.
321     *  @return An array containing the files and subdirectories in
322     *  the source URL.
323     *  @exception IllegalActionException If the source does not have
324     *  the file: protocol, or if the source is neither a file
325     *  nor a directory, or if there is some other problem.  */
326    private static String[] _listFileOrURL(String source, String endsWith)
327            throws MalformedURLException, IOException {
328        // Follow redirects such as http -> https.
329        URL url = FileUtilities.followRedirects(new URL(source));
330
331        URLConnection urlConnection = url.openConnection();
332        String contentType = urlConnection.getContentType();
333
334        if (!contentType.startsWith("text/html")
335                && !contentType.startsWith("text/plain")) {
336            throw new RuntimeException("Could not parse '" + source
337                    + "', it is not \"text/html\", "
338                    + "or \"text/plain\", it is: "
339                    + urlConnection.getContentType());
340        }
341
342        List resultsList = new LinkedList();
343
344        BufferedReader in = null;
345
346        try {
347            in = new BufferedReader(
348                    new InputStreamReader(urlConnection.getInputStream()));
349
350            if (!contentType.startsWith("text/plain")
351                    && !urlConnection.getURL().toString().endsWith("/")) {
352                // text/plain urls need not end with /, but
353                // text/html urls _must_ end with / since the web server
354                // will rewrite them for us.
355                throw new RuntimeException("Could not parse '" + source
356                        + "', it does not end with '/'");
357            }
358
359            if (!source.endsWith("/")) {
360                source += "/";
361            }
362
363            // Parse the contents in a haphazard fashion.
364            // The idea is that we look for the <BODY> line and
365            // then looks for lines that contain HREF
366            // If we find a line like HREF="foo">foo, then we report
367            // foo as being a file.
368            // A more robust way would be to use a spider, see
369            // http://www.acme.com/java/software/WebList.html
370            String line;
371            String target = null;
372            boolean sawBody = false;
373            boolean sawHREF = false;
374
375            while ((line = in.readLine()) != null) {
376                line = line.trim();
377
378                if (line.startsWith("<BODY") || line.startsWith("<body")) {
379                    sawBody = true;
380                } else {
381                    if (sawBody) {
382                        StringTokenizer tokenizer = new StringTokenizer(line,
383                                "<\" >=");
384
385                        while (tokenizer.hasMoreTokens()) {
386                            String token = tokenizer.nextToken();
387
388                            if (token.compareToIgnoreCase("HREF") == 0) {
389                                sawHREF = true;
390                                target = null;
391                            } else {
392                                if (sawHREF) {
393                                    if (target == null) {
394                                        // Here, we should check that target
395                                        // is a relative pathname.
396                                        target = token;
397                                    } else {
398                                        // Check to see if the token is
399                                        // the same as the last token.
400                                        if (token.compareTo(target) != 0) {
401                                            sawHREF = false;
402                                        } else {
403                                            // If we were really brave, we
404                                            // could try opening a connection
405                                            // here to verify that the target
406                                            // exists.
407                                            if (endsWith == null
408                                                    || endsWith.length() == 0
409                                                    || target.endsWith(
410                                                            endsWith)) {
411                                                resultsList
412                                                        .add(source + target);
413                                            }
414
415                                            sawHREF = false;
416                                        }
417                                    }
418                                }
419                            }
420                        }
421                    }
422                }
423            }
424        } finally {
425            if (in != null) {
426                in.close();
427            }
428        }
429
430        String[] results = new String[resultsList.size()];
431        return (String[]) resultsList.toArray(results);
432    }
433
434    ///////////////////////////////////////////////////////////////////
435    ////                         private members                   ////
436    // If non-null and non-empty, then we only output file names and
437    // subdirectories that match this String.
438    private String _endsWithValue;
439
440    // Count of the iterations.
441    private int _iterationCount = 0;
442
443    // An array containing the files and subdirectories in the directory
444    // named by sourceURL.
445    // FIXME: Should we clone this?
446    private String[] _data;
447
448    // Flag to indicate whether or not to repeat the sequence.
449    private boolean _repeatFlag;
450}