001/* An actor that produces an array that lists the contents of a directory. 002 003 @Copyright (c) 2003-2017 The Regents of the University of California. 004 All rights reserved. 005 006 Permission is hereby granted, without written agreement and without 007 license or royalty fees, to use, copy, modify, and distribute this 008 software and its documentation for any purpose, provided that the 009 above copyright notice and the following two paragraphs appear in all 010 copies of this software. 011 012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 016 SUCH DAMAGE. 017 018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 023 ENHANCEMENTS, OR MODIFICATIONS. 024 025 PT_COPYRIGHT_VERSION 2 026 COPYRIGHTENDKEY 027 */ 028package ptolemy.actor.lib.io; 029 030import java.io.BufferedReader; 031import java.io.File; 032import java.io.FilenameFilter; 033import java.io.IOException; 034import java.io.InputStreamReader; 035import java.net.URL; 036import java.net.URLConnection; 037import java.net.URLDecoder; 038import java.util.ArrayList; 039import java.util.Enumeration; 040import java.util.Iterator; 041import java.util.LinkedList; 042import java.util.List; 043import java.util.StringTokenizer; 044import java.util.jar.JarEntry; 045import java.util.jar.JarFile; 046 047import ptolemy.actor.lib.SequenceSource; 048import ptolemy.actor.parameters.FilePortParameter; 049import ptolemy.data.ArrayToken; 050import ptolemy.data.BooleanToken; 051import ptolemy.data.StringToken; 052import ptolemy.data.expr.Parameter; 053import ptolemy.data.expr.StringParameter; 054import ptolemy.data.type.ArrayType; 055import ptolemy.data.type.BaseType; 056import ptolemy.kernel.CompositeEntity; 057import ptolemy.kernel.util.Attribute; 058import ptolemy.kernel.util.IllegalActionException; 059import ptolemy.kernel.util.NameDuplicationException; 060import ptolemy.util.FileUtilities; 061import ptolemy.util.RecursiveFileFilter; 062 063/////////////////////////////////////////////////////////////////// 064//// DirectoryListing 065 066/** 067 Given a URL or directory name, produce an array of file names in that 068 directory that match an (optional) pattern. 069 070 <p>The file names include the complete path, unless <i>relative</i> 071 is set to true, in which case, the names are relative to the 072 directory. The pattern is a regular expression. For a reference on 073 regular expression syntax see: <a href="http://download.oracle.com/javase/tutorial/essential/regex/#in_browser">http://download.oracle.com/javase/tutorial/essential/regex/</a></p> 074 075 <p>If <i>directoryOrURL</i> is a local directory (not a URL), then you can 076 optionally list only contained files or directories. 077 If <i>listOnlyDirectories</i> is true, then only directories will be 078 listed on the output. If <i>listOnlyFiles</i> is true, then only 079 files will be listed on the output. If both are true, then an exception 080 is thrown.</p> 081 082 <p>If <i>directoryOrURL</i> is a URL, then this actor assumes that the 083 server will list the contents of the referenced directory in an 084 HTML file where each file listed will have the following form:</p> 085 086 <pre> 087 <a href="filename">filename</a> 088 </pre> 089 090 <p>If the filename is longer than 20 characters, then only the first 091 20 characters of the two appearances of the filename are compared, 092 since some servers truncate the file names.</p> 093 094 <p>If <i>allowEmptyDirectory</i> controls whether reading an empty directory 095 will throw an exception.</p> 096 097 <p>Note that DirectoryListing returns the contents of the directory 098 in a different order depending on whether one is using the Sun JVM 099 or the IBM JVM. Thus, you may want to connect the output to an 100 ArraySort actor.</p> 101 102 @author Christopher Hylands, Edward A. Lee, Contributor: Jianwu Wang 103 @version $Id$ 104 @since Ptolemy II 4.0 105 @Pt.ProposedRating Yellow (eal) 106 @Pt.AcceptedRating Red (liuj) 107 */ 108public class DirectoryListing extends SequenceSource implements FilenameFilter { 109 /** Construct an actor with the given container and name. 110 * @param container The container. 111 * @param name The name of this actor. 112 * @exception IllegalActionException If the actor cannot be contained 113 * by the proposed container. 114 * @exception NameDuplicationException If the container already has an 115 * actor with this name. 116 */ 117 public DirectoryListing(CompositeEntity container, String name) 118 throws IllegalActionException, NameDuplicationException { 119 super(container, name); 120 121 // Tell the file browser to allow only selection of directories. 122 directoryOrURL = new FilePortParameter(this, "directoryOrURL"); 123 new Parameter(directoryOrURL, "allowFiles", BooleanToken.FALSE); 124 new Parameter(directoryOrURL, "allowDirectories", BooleanToken.TRUE); 125 126 output.setTypeEquals(new ArrayType(BaseType.STRING)); 127 128 pattern = new StringParameter(this, "pattern"); 129 pattern.setExpression(""); 130 131 listOnlyDirectories = new Parameter(this, "listOnlyDirectories"); 132 listOnlyDirectories.setTypeEquals(BaseType.BOOLEAN); 133 listOnlyDirectories.setExpression("false"); 134 135 listOnlyFiles = new Parameter(this, "listOnlyFiles"); 136 listOnlyFiles.setTypeEquals(BaseType.BOOLEAN); 137 listOnlyFiles.setExpression("false"); 138 139 allowEmptyDirectory = new Parameter(this, "allowEmptyDirectory"); 140 allowEmptyDirectory.setTypeEquals(BaseType.BOOLEAN); 141 allowEmptyDirectory.setExpression("false"); 142 143 // Show the firingCountLimit parameter last. 144 firingCountLimit.moveToLast(); 145 146 recursive = new Parameter(this, "recursive"); 147 recursive.setTypeEquals(BaseType.BOOLEAN); 148 recursive.setExpression("false"); 149 150 relative = new Parameter(this, "relative"); 151 relative.setTypeEquals(BaseType.BOOLEAN); 152 relative.setExpression("false"); 153 } 154 155 /////////////////////////////////////////////////////////////////// 156 //// ports and parameters //// 157 158 /** The directory name or URL from which to read. This is a string with 159 * any form accepted by {@link ptolemy.actor.parameters.FilePortParameter}. 160 * By default, this is empty. An empty string is equivalent to specifying 161 * "$CWD", the current working directory. 162 */ 163 public FilePortParameter directoryOrURL; 164 165 /** If true, and <i>directoryOrURL</i> refers to a local directory (not a URL), 166 * then only directories will be listed on the output. If <i>directoryOrURL</i> 167 * is a URL, then this parameter is ignored (there appears to be no reliable 168 * way to tell whether the URL refers to a directory or file). 169 * This is a boolean that defaults to false. 170 */ 171 public Parameter listOnlyDirectories; 172 173 /** If true, and <i>directoryOrURL</i> refers to a local directory (not a URL), 174 * then only files will be listed on the output. If <i>directoryOrURL</i> 175 * is a URL, then this parameter is ignored (there appears to be no reliable 176 * way to tell whether the URL refers to a directory or file). 177 * This is a boolean that defaults to false. 178 */ 179 public Parameter listOnlyFiles; 180 181 /** If true, and <i>directoryOrURL</i> refers to a local directory 182 * (not a URL), that is empty, then the output will be empty 183 * string array and no exception is reported. If 184 * <i>directoryOrURL</i> is a URL, then this parameter is ignored 185 * (there appears to be no reliable way to tell whether the URL 186 * refers to a directory or file). This is a boolean that 187 * defaults to false. 188 */ 189 public Parameter allowEmptyDirectory; 190 191 /** If non-empty, then only output file and directory names that 192 * match the specified (regular expression) pattern. 193 * The default value of this parameter is the empty String "", 194 * which indicates that everything matches. 195 */ 196 public StringParameter pattern; 197 198 /** Whether files in the subdirectories should be searched as well. 199 * The recursive parameter only works if <i>directoryOrURL</i> 200 * refers to a local directory (not a URL). 201 * This is a boolean that defaults to false. 202 */ 203 public Parameter recursive; 204 205 /** If true, then produce an array with file names relative to the 206 * specified directory. This is a boolean that defaults to false, 207 * which causes the absolute (complete) path to be produced. 208 */ 209 public Parameter relative; 210 211 /////////////////////////////////////////////////////////////////// 212 //// public methods //// 213 214 /** Return true if the specified name matches the specified pattern, 215 * or if no pattern has been specified. 216 * @param directory The directory in which the file was found 217 * (ignored, but required by the FilenameFilter interface). 218 * @param name The name of the file or directory. 219 * @return True if the specified name matches. 220 */ 221 @Override 222 public boolean accept(File directory, String name) { 223 // The accept() method is here primarily for backward 224 // compatibility as the DirectoryList class implements 225 // FileFilter. It could be thet Kepler is using this. 226 if (_recursiveFileFilter == null) { 227 _recursiveFileFilter = new RecursiveFileFilter(_recursive, 228 true /*includeFiles*/, true /*includeDirectories*/, 229 _listOnlyFiles, _listOnlyDirectories, _pattern, 230 false /*escape*/); 231 } 232 return _recursiveFileFilter.accept(directory, name); 233 } 234 235 /** Override the base class to locally cache parameter values. 236 * @param attribute The attribute that has changed. 237 * @exception IllegalActionException If the specified attribute 238 * is <i>URL</i> and the file cannot be opened. 239 */ 240 @Override 241 public void attributeChanged(Attribute attribute) 242 throws IllegalActionException { 243 // Parameter values are cached to avoid code duplication. 244 if (attribute == pattern) { 245 _pattern = pattern.stringValue(); 246 } else if (attribute == listOnlyDirectories) { 247 _listOnlyDirectories = ((BooleanToken) listOnlyDirectories 248 .getToken()).booleanValue(); 249 } else if (attribute == listOnlyFiles) { 250 _listOnlyFiles = ((BooleanToken) listOnlyFiles.getToken()) 251 .booleanValue(); 252 } else if (attribute == recursive) { 253 _recursive = ((BooleanToken) recursive.getToken()).booleanValue(); 254 } else { 255 super.attributeChanged(attribute); 256 } 257 } 258 259 /** Output an array containing file and/or directory names. 260 * @exception IllegalActionException If there's no director or 261 * if the directory or URL is invalid. 262 */ 263 @Override 264 public void fire() throws IllegalActionException { 265 super.fire(); 266 267 directoryOrURL.update(); 268 269 URL sourceURL = directoryOrURL.asURL(); 270 271 if (sourceURL == null) { 272 // Nothing was specified. Default to the current working directory. 273 directoryOrURL.setExpression("$CWD"); 274 sourceURL = directoryOrURL.asURL(); 275 if (sourceURL == null) { 276 throw new IllegalActionException(this, 277 "Cannot determine current working directory."); 278 } 279 } 280 281 boolean emptyDirectoryAllow = ((BooleanToken) allowEmptyDirectory 282 .getToken()).booleanValue(); 283 284 if (sourceURL.getProtocol().equals("file")) { 285 File sourceFile = directoryOrURL.asFile(); 286 287 if (sourceFile.isDirectory()) { 288 if (_debugging) { 289 _debug("Reading directory."); 290 } 291 292 File[] listedFiles = RecursiveFileFilter.listFiles(sourceFile, 293 _recursive, true /*includeFiles*/, 294 true /*includeDirectories*/, _listOnlyFiles, 295 _listOnlyDirectories, _pattern, false /*escape*/); 296 297 ArrayList<StringToken> result = new ArrayList<StringToken>(); 298 for (File file : listedFiles) { 299 300 String path = file.getAbsolutePath(); 301 if (((BooleanToken) relative.getToken()).booleanValue()) { 302 // Strip off the directory name and trailing slash. 303 int slash = 1; 304 String directoryName = sourceFile.getAbsolutePath(); 305 if (directoryName.endsWith("/")) { 306 slash = 0; 307 } 308 path = path.substring(directoryName.length() + slash); 309 } 310 311 if (_debugging) { 312 _debug("Path: " + path); 313 } 314 315 result.add(new StringToken(path)); 316 } 317 318 if (!emptyDirectoryAllow) { 319 if (result.size() == 0) { 320 throw new IllegalActionException(this, 321 "No files or directories that match the pattern."); 322 } 323 } 324 325 StringToken[] resultArray = new StringToken[result.size()]; 326 327 for (int i = 0; i < resultArray.length; i++) { 328 resultArray[i] = result.get(i); 329 } 330 331 output.broadcast(new ArrayToken(BaseType.STRING, resultArray)); 332 } else if (sourceFile.isFile()) { 333 StringToken[] result = new StringToken[1]; 334 result[0] = new StringToken(sourceFile.toString()); 335 336 if (_debugging) { 337 _debug("Listing just the specified file: " 338 + result[0].stringValue()); 339 } 340 341 output.broadcast(new ArrayToken(BaseType.STRING, result)); 342 } else { 343 throw new IllegalActionException("'" + directoryOrURL 344 + "' is neither a file " + "nor a directory."); 345 } 346 } else { 347 try { 348 _readURL(sourceURL); 349 } catch (IOException ex) { 350 throw new IllegalActionException(this, ex, 351 "Error reading the URL \'" + directoryOrURL + "\'."); 352 } 353 } 354 } 355 356 /////////////////////////////////////////////////////////////////// 357 //// private methods //// 358 359 /** Read the URL and produce output. 360 * @param sourceURL The source URL. 361 */ 362 private void _readURL(URL sourceURL) 363 throws IOException, IllegalActionException { 364 // Handle urls here. 365 if (_debugging) { 366 _debug("Reading URL: " + sourceURL); 367 } 368 369 // Follow redirects such as http -> https. 370 sourceURL = FileUtilities.followRedirects(sourceURL); 371 372 List<StringToken> resultsList = new LinkedList<StringToken>(); 373 374 URLConnection urlConnection = sourceURL.openConnection(); 375 String contentType = urlConnection.getContentType(); 376 377 if (!contentType.startsWith("text/html") 378 && !contentType.startsWith("text/plain")) { 379 if (!sourceURL.toString().startsWith("jar:")) { 380 throw new IllegalActionException(this, 381 "Could not parse '" + directoryOrURL.stringValue() 382 + "'; as URL '" + sourceURL 383 + "', it is not \"text/html\", " 384 + "or \"text/plain\", it is: " 385 + urlConnection.getContentType()); 386 } else { 387 // Reading from a directory from a jar file. 388 389 if (_recursiveFileFilter == null) { 390 _recursiveFileFilter = new RecursiveFileFilter(_recursive, 391 true /*includeFiles*/, true /*includeDirectories*/, 392 _listOnlyFiles, _listOnlyDirectories, _pattern, 393 false /*escape*/); 394 } 395 396 // Every thing after the !/ 397 String path = sourceURL.getPath() 398 .substring(sourceURL.getPath().indexOf("!/") + 2); 399 400 // Get rid of the jar:file and read up to the ! 401 String jarPath = sourceURL.getPath().substring(5, 402 sourceURL.getPath().indexOf("!")); 403 JarFile jar = null; 404 try { 405 jar = new JarFile(URLDecoder.decode(jarPath, "UTF-8")); 406 Enumeration<JarEntry> entries = jar.entries(); 407 while (entries.hasMoreElements()) { 408 String name = entries.nextElement().getName(); 409 if (name.startsWith(path)) { 410 String entry = name.substring(path.length()); 411 int slashIndex = entry.indexOf("/"); 412 if (slashIndex >= 0) { 413 entry = entry.substring(0, slashIndex); 414 } 415 416 // FIXME: recursion on URLs not yet supported. 417 if (_recursiveFileFilter.accept(null, entry)) { 418 StringToken results = new StringToken(entry); 419 // Add the results here if it is not present. 420 // We want to preserve the order, otherwise we could 421 // use a Set. 422 if (!resultsList.contains(results)) { 423 resultsList.add(results); 424 } 425 } 426 } 427 } 428 } finally { 429 if (jar != null) { 430 jar.close(); 431 } 432 } 433 } 434 } else { 435 436 BufferedReader in = null; 437 try { 438 in = new BufferedReader( 439 new InputStreamReader(urlConnection.getInputStream())); 440 441 if (!contentType.startsWith("text/plain") 442 && !urlConnection.getURL().toString().endsWith("/")) { 443 // text/plain urls need not end with /, but 444 // text/html urls _must_ end with / since the web server 445 // will rewrite them for us. 446 throw new IllegalActionException(this, 447 "Could not parse '" + directoryOrURL.stringValue() 448 + "'; it needs to end with '/'"); 449 } 450 451 // Parse the contents in a haphazard fashion. 452 // The idea is that we look for the <BODY> line and 453 // then look for lines that contain HREF 454 // If we find a line like HREF="foo">foo, then we report 455 // foo as being a file. 456 // A more robust way would be to use a spider, see 457 // http://www.acme.com/java/software/WebList.html 458 String line; 459 String target = null; 460 boolean sawBody = false; 461 boolean sawHREF = false; 462 463 while ((line = in.readLine()) != null) { 464 line = line.trim(); 465 466 if (_debugging) { 467 _debug(line); 468 } 469 470 if (line.startsWith("<BODY") || line.startsWith("<body")) { 471 if (_debugging) { 472 _debug("Saw Body tag"); 473 } 474 sawBody = true; 475 } else { 476 if (sawBody) { 477 StringTokenizer tokenizer = new StringTokenizer( 478 line, "<\" >="); 479 480 while (tokenizer.hasMoreTokens()) { 481 String token = tokenizer.nextToken(); 482 483 if (token.compareToIgnoreCase("HREF") == 0) { 484 if (_debugging) { 485 _debug("Saw HREF"); 486 } 487 sawHREF = true; 488 target = null; 489 } else { 490 if (sawHREF) { 491 if (target == null) { 492 // FIXME: Here, we should check that target 493 // is a relative pathname. 494 target = token; 495 } else { 496 // Check to see whether the first 20 497 // characters of the token are 498 // the same as the last token. 499 String reference = target; 500 501 if (reference.length() > 20) { 502 reference = target.substring(0, 503 20); 504 } 505 506 if (!token.startsWith(reference)) { 507 if (_debugging) { 508 _debug("token \"" + token 509 + "\" does not start with href."); 510 } 511 sawHREF = false; 512 } else { 513 // Having the first argument be null causes 514 // accept to determine if target is a file or 515 // directory by checking for a trailing /. 516 if (accept(null, target)) { 517 if (_debugging) { 518 _debug("target \"" 519 + token 520 + "\" was accepted."); 521 } 522 523 // Make sure directoryOrURL ends with a slash. 524 String base = directoryOrURL 525 .stringValue(); 526 527 if (!base.endsWith("/")) { 528 base = base + "/"; 529 } 530 531 // FIXME: Is there any way to tell whether 532 // the result is a directory or file? 533 resultsList.add( 534 new StringToken(base 535 + target)); 536 } else { 537 if (_debugging) { 538 _debug("target \"" 539 + token 540 + "\" was not accepted."); 541 } 542 } 543 544 sawHREF = false; 545 } 546 } 547 } 548 } 549 } 550 } 551 } 552 } 553 } finally { 554 if (in != null) { 555 in.close(); 556 } 557 } 558 } 559 560 if (_debugging) { 561 _debug("----- end of listing."); 562 _debug("----- extracted results:"); 563 564 Iterator results = resultsList.iterator(); 565 566 while (results.hasNext()) { 567 _debug(((StringToken) results.next()).stringValue()); 568 } 569 } 570 571 StringToken[] results = new StringToken[resultsList.size()]; 572 output.broadcast( 573 new ArrayToken(BaseType.STRING, resultsList.toArray(results))); 574 } 575 576 /////////////////////////////////////////////////////////////////// 577 //// private members //// 578 579 /** Cached value of listOnlyDirectories parameter. */ 580 private boolean _listOnlyDirectories; 581 582 /** Cached value of listOnlyFiles parameter. */ 583 private boolean _listOnlyFiles; 584 585 /** Cached value of patternparameter. */ 586 private String _pattern; 587 588 /** Cached value of recursive parameter. */ 589 private boolean _recursive; 590 591 /** RecursiveFileFilter used by the accept() method of this class. */ 592 private RecursiveFileFilter _recursiveFileFilter; 593}