001/* An actor that reads a URL naming a directory and outputs each 002 element of the directory one at a time. 003 004 @Copyright (c) 1998-2017 The Regents of the University of California. 005 All rights reserved. 006 007 Permission is hereby granted, without written agreement and without 008 license or royalty fees, to use, copy, modify, and distribute this 009 software and its documentation for any purpose, provided that the 010 above copyright notice and the following two paragraphs appear in all 011 copies of this software. 012 013 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 014 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 015 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 016 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 017 SUCH DAMAGE. 018 019 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 020 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 021 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 022 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 023 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 024 ENHANCEMENTS, OR MODIFICATIONS. 025 026 PT_COPYRIGHT_VERSION 2 027 COPYRIGHTENDKEY 028 */ 029package ptolemy.actor.lib; 030 031import java.io.BufferedReader; 032import java.io.File; 033import java.io.IOException; 034import java.io.InputStreamReader; 035import java.net.MalformedURLException; 036import java.net.URL; 037import java.net.URLConnection; 038import java.util.LinkedList; 039import java.util.List; 040import java.util.StringTokenizer; 041 042import ptolemy.data.BooleanToken; 043import ptolemy.data.StringToken; 044import ptolemy.data.expr.Parameter; 045import ptolemy.data.type.BaseType; 046import ptolemy.kernel.CompositeEntity; 047import ptolemy.kernel.util.Attribute; 048import ptolemy.kernel.util.IllegalActionException; 049import ptolemy.kernel.util.KernelException; 050import ptolemy.kernel.util.NameDuplicationException; 051import ptolemy.util.FileUtilities; 052 053/////////////////////////////////////////////////////////////////// 054//// URLDirectoryReader 055 056/** 057 This actor reads a URL and if the URL names a directory, it outputs 058 the name of each file or subdirectory contained in the directory. 059 If the URL names a file, then it outputs the name of that file. 060 061 <p>If the <i>repeat</i> flag is true, then the sequence of file 062 names is repeated indefinitely. 063 If the <i>refresh</i> flag is true, and the <i>repeat</i> flag is 064 true, then the directory is re-read before repeating the sequence of 065 files and subdirectories. 066 067 <p>If the <i>endsWith</i> String parameter is non-null and non-empty, 068 then only file names or subdirectories that end with the value 069 of the <i>endsWith</i> parameter are output. 070 071 <p>One alternative implementation would be that if the URL named a file, 072 then the actor would output the names of the files and subdirectories 073 in the directory that contains the file. 074 <br>Another alternative implementation would output the names of the 075 files and subdirectories in an array. 076 <br>An extension would be to include a filter parameter that could be 077 a regular expression that would allow us to filter the file names. 078 <br> Should this actor extend URLReader or SequenceActor? 079 080 @author Christopher Hylands 081 @version $Id$ 082 @since Ptolemy II 2.0 083 @Pt.ProposedRating Red (liuj) 084 @Pt.AcceptedRating Red (liuj) 085 @deprecated Use DirectoryListing instead. 086 */ 087@Deprecated 088public class URLDirectoryReader extends URLReader { 089 /** Construct an actor with the given container and name. 090 * @param container The container. 091 * @param name The name of this actor. 092 * @exception IllegalActionException If the actor cannot be contained 093 * by the proposed container. 094 * @exception NameDuplicationException If the container already has an 095 * actor with this name. 096 */ 097 public URLDirectoryReader(CompositeEntity container, String name) 098 throws IllegalActionException, NameDuplicationException { 099 super(container, name); 100 101 // Set the type of the output port. 102 output.setTypeEquals(BaseType.STRING); 103 104 // Set the endsWith String. 105 endsWith = new Parameter(this, "endsWith", new StringToken("")); 106 endsWith.setTypeEquals(BaseType.STRING); 107 attributeChanged(endsWith); 108 109 // Set the repeat Flag. 110 repeat = new Parameter(this, "repeat", new BooleanToken(false)); 111 repeat.setTypeEquals(BaseType.BOOLEAN); 112 attributeChanged(repeat); 113 } 114 115 /////////////////////////////////////////////////////////////////// 116 //// ports and parameters //// 117 118 /** If non-null and non-empty, then only output file names and sub 119 * directories that end with this String value. 120 * The default value of this parameter is the empty String "". 121 */ 122 public Parameter endsWith; 123 124 /** Repeat after outputting all elements of the directory. 125 * The default value of this parameter is a false BooleanToken. 126 */ 127 public Parameter repeat; 128 129 /////////////////////////////////////////////////////////////////// 130 //// public methods //// 131 132 /** If the specified attribute is <i>URL</i>, then close 133 * the current file (if there is one) and open the new one. 134 * @param attribute The attribute that has changed. 135 * @exception IllegalActionException If the specified attribute 136 * is <i>URL</i> and the file cannot be opened. 137 */ 138 @Override 139 public void attributeChanged(Attribute attribute) 140 throws IllegalActionException { 141 if (attribute == repeat) { 142 _repeatFlag = ((BooleanToken) repeat.getToken()).booleanValue(); 143 } else if (attribute == endsWith) { 144 StringToken endsWithToken = (StringToken) endsWith.getToken(); 145 146 if (endsWithToken == null) { 147 _endsWithValue = null; 148 } else { 149 _endsWithValue = endsWithToken.stringValue(); 150 } 151 } 152 153 super.attributeChanged(attribute); 154 } 155 156 /** Output the data read in the prefire. 157 * @exception IllegalActionException If there's no director. 158 */ 159 @Override 160 public void fire() throws IllegalActionException { 161 super.fire(); 162 output.broadcast(new StringToken(_data[_iterationCount])); 163 } 164 165 /** Open the file at the URL, and set the width of the output. 166 * @exception IllegalActionException Not thrown in this base class 167 */ 168 @Override 169 public void initialize() throws IllegalActionException { 170 super.initialize(); 171 _iterationCount = 0; 172 } 173 174 /** Update the iteration counter until it exceeds the number of 175 * elements in the directory. If the <i>repeat</i> parameter 176 * is true, then repeat the same sequence of directory elements 177 * again. If the <i>repeat</i> and <i>refresh</i> parameters 178 * are both true, then reread the directory before repeating 179 * the sequence of directory elements 180 * 181 * @exception IllegalActionException If the sourceURL is not valid. 182 */ 183 @Override 184 public boolean postfire() throws IllegalActionException { 185 _iterationCount++; 186 187 if (_iterationCount >= _data.length) { 188 if (!_repeatFlag) { 189 return false; 190 } else { 191 _iterationCount = 0; 192 193 if (_refreshFlag) { 194 _data = _list(_source, _endsWithValue); 195 } 196 } 197 } 198 199 return super.postfire(); 200 } 201 202 /** Read one row from the input and prepare for output them. 203 * @exception IllegalActionException If the <i>sourceURL</i> is invalid. 204 */ 205 @Override 206 public boolean prefire() throws IllegalActionException { 207 try { 208 _data = _list(_source, _endsWithValue); 209 return super.prefire(); 210 } catch (Exception ex) { 211 throw new IllegalActionException(this, ex, "prefire() failed"); 212 } 213 } 214 215 /////////////////////////////////////////////////////////////////// 216 //// private methods //// 217 218 /** If the URL names a directory return an array containing 219 * the names of the files and subdirectories contained in the 220 * directory. If the URL names a file, then return an array 221 * of size 1 containing the name of the file. If the URL 222 * names neither a file or directory, return null. 223 * 224 * @param source The filename or URL to open 225 * @param endsWith If non-null, then only files or subdirectories 226 * that end with this string are reported. 227 * @return An array of Strings where each element of the array 228 * names a file or subdirectory. 229 * @exception IllegalActionException If the source is a malformed 230 * URL 231 */ 232 private String[] _list(String source, String endsWith) 233 throws IllegalActionException { 234 if (source.startsWith("file:")) { 235 return _listFile(source, endsWith); 236 } else { 237 try { 238 return _listFileOrURL(source, endsWith); 239 } catch (Exception ex) { 240 throw new IllegalActionException("Could not open '" + source 241 + ": " + KernelException.stackTraceToString(ex)); 242 } 243 } 244 } 245 246 /** Return files and directories contained in the source url. 247 * @param source The source URL to query for files and subdirectories. 248 * The source url must be a String using the "file:" protocol. 249 * @param endsWith If non-null and of length greater than 0, 250 * then only files or subdirectories that end with this string 251 * are reported. 252 * @return An array containing the files and subdirectories in 253 * the source URL. 254 * @exception IllegalActionException If the source does not have 255 * the file: protocol, or if the source is neither a file 256 * nor a directory, or if there is some other problem. 257 */ 258 private String[] _listFile(String source, String endsWith) 259 throws IllegalActionException { 260 try { 261 URL sourceURL = new URL(source); 262 263 if (sourceURL.getProtocol().equals("file")) { 264 // First, try opening the source as a file. 265 File file = new File(sourceURL.getFile()); 266 267 if (file.isDirectory()) { 268 if (!source.endsWith("/")) { 269 source = source + "/"; 270 } 271 272 // Note: we could use listFiles(FileFilter) here. 273 // but since the filter is fairly simple, we don't 274 File[] files = file.listFiles(); 275 List resultsList = new LinkedList(); 276 277 if (files != null) { 278 for (File file2 : files) { 279 String filename = file2.getName(); 280 281 if (endsWith == null || endsWith.length() == 0 282 || filename.endsWith(endsWith)) { 283 resultsList.add(source + filename); 284 } 285 } 286 } 287 288 String[] results = new String[resultsList.size()]; 289 return (String[]) resultsList.toArray(results); 290 } else if (file.isFile()) { 291 return new String[] { file.toString() }; 292 } else { 293 throw new IllegalActionException("'" + source 294 + "' is neither a file " + "or a directory?"); 295 } 296 } else { 297 // FIXME: handle urls here. 298 throw new IllegalActionException("'" + source + "' does not " 299 + "have the file: protocol"); 300 } 301 } catch (Exception ex) { 302 throw new IllegalActionException( 303 "Could not open '" + source + "' :" + ex); 304 } 305 } 306 307 /** Return files and directories contained in the source url. 308 * This method attempts to parse the html results returned by 309 * reading a URL connection, so the parsing may fail. If the URL 310 * uses the http: protocol, then the remote webserver 311 * configuration determines whether it is possible to read the 312 * contents of a directory. Usually, the server has to have 313 * directory listing enabled, and the default html file 314 * (index.htm, index.html, default.htm etc. ) must not be present. 315 * 316 * @param source The source URL to query for files and subdirectories. 317 * The source url must be a String using the "file:" protocol. 318 * @param endsWith If non-null and of length greater than 0, 319 * then only files or subdirectories that end with this string 320 * are reported. 321 * @return An array containing the files and subdirectories in 322 * the source URL. 323 * @exception IllegalActionException If the source does not have 324 * the file: protocol, or if the source is neither a file 325 * nor a directory, or if there is some other problem. */ 326 private static String[] _listFileOrURL(String source, String endsWith) 327 throws MalformedURLException, IOException { 328 // Follow redirects such as http -> https. 329 URL url = FileUtilities.followRedirects(new URL(source)); 330 331 URLConnection urlConnection = url.openConnection(); 332 String contentType = urlConnection.getContentType(); 333 334 if (!contentType.startsWith("text/html") 335 && !contentType.startsWith("text/plain")) { 336 throw new RuntimeException("Could not parse '" + source 337 + "', it is not \"text/html\", " 338 + "or \"text/plain\", it is: " 339 + urlConnection.getContentType()); 340 } 341 342 List resultsList = new LinkedList(); 343 344 BufferedReader in = null; 345 346 try { 347 in = new BufferedReader( 348 new InputStreamReader(urlConnection.getInputStream())); 349 350 if (!contentType.startsWith("text/plain") 351 && !urlConnection.getURL().toString().endsWith("/")) { 352 // text/plain urls need not end with /, but 353 // text/html urls _must_ end with / since the web server 354 // will rewrite them for us. 355 throw new RuntimeException("Could not parse '" + source 356 + "', it does not end with '/'"); 357 } 358 359 if (!source.endsWith("/")) { 360 source += "/"; 361 } 362 363 // Parse the contents in a haphazard fashion. 364 // The idea is that we look for the <BODY> line and 365 // then looks for lines that contain HREF 366 // If we find a line like HREF="foo">foo, then we report 367 // foo as being a file. 368 // A more robust way would be to use a spider, see 369 // http://www.acme.com/java/software/WebList.html 370 String line; 371 String target = null; 372 boolean sawBody = false; 373 boolean sawHREF = false; 374 375 while ((line = in.readLine()) != null) { 376 line = line.trim(); 377 378 if (line.startsWith("<BODY") || line.startsWith("<body")) { 379 sawBody = true; 380 } else { 381 if (sawBody) { 382 StringTokenizer tokenizer = new StringTokenizer(line, 383 "<\" >="); 384 385 while (tokenizer.hasMoreTokens()) { 386 String token = tokenizer.nextToken(); 387 388 if (token.compareToIgnoreCase("HREF") == 0) { 389 sawHREF = true; 390 target = null; 391 } else { 392 if (sawHREF) { 393 if (target == null) { 394 // Here, we should check that target 395 // is a relative pathname. 396 target = token; 397 } else { 398 // Check to see if the token is 399 // the same as the last token. 400 if (token.compareTo(target) != 0) { 401 sawHREF = false; 402 } else { 403 // If we were really brave, we 404 // could try opening a connection 405 // here to verify that the target 406 // exists. 407 if (endsWith == null 408 || endsWith.length() == 0 409 || target.endsWith( 410 endsWith)) { 411 resultsList 412 .add(source + target); 413 } 414 415 sawHREF = false; 416 } 417 } 418 } 419 } 420 } 421 } 422 } 423 } 424 } finally { 425 if (in != null) { 426 in.close(); 427 } 428 } 429 430 String[] results = new String[resultsList.size()]; 431 return (String[]) resultsList.toArray(results); 432 } 433 434 /////////////////////////////////////////////////////////////////// 435 //// private members //// 436 // If non-null and non-empty, then we only output file names and 437 // subdirectories that match this String. 438 private String _endsWithValue; 439 440 // Count of the iterations. 441 private int _iterationCount = 0; 442 443 // An array containing the files and subdirectories in the directory 444 // named by sourceURL. 445 // FIXME: Should we clone this? 446 private String[] _data; 447 448 // Flag to indicate whether or not to repeat the sequence. 449 private boolean _repeatFlag; 450}