001/* 002 * Copyright (c) 2004-2010 The Regents of the University of California. 003 * All rights reserved. 004 * 005 * '$Author: welker $' 006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 007 * '$Revision: 24234 $' 008 * 009 * Permission is hereby granted, without written agreement and without 010 * license or royalty fees, to use, copy, modify, and distribute this 011 * software and its documentation for any purpose, provided that the above 012 * copyright notice and the following two paragraphs appear in all copies 013 * of this software. 014 * 015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 019 * SUCH DAMAGE. 020 * 021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 026 * ENHANCEMENTS, OR MODIFICATIONS. 027 * 028 */ 029 030package org.kepler.io; 031 032import java.io.ByteArrayOutputStream; 033import java.io.File; 034import java.text.ParsePosition; 035import java.text.SimpleDateFormat; 036import java.util.ArrayList; 037import java.util.Arrays; 038import java.util.Calendar; 039import java.util.Comparator; 040import java.util.Date; 041import java.util.Hashtable; 042 043import org.apache.commons.logging.Log; 044import org.apache.commons.logging.LogFactory; 045import org.kepler.ssh.ExecException; 046import org.kepler.ssh.ExecFactory; 047import org.kepler.ssh.ExecInterface; 048import org.kepler.util.FilenameFilter_RegularPattern; 049 050/** 051 * Get the actual list of files in a directory and provide a difference from the 052 * previous listing. Do the listing either on a local dir with Java or on a 053 * remote machine using SSH. Do the listing with a set of patterns. For each 054 * file, provide its - name, - size (in bytes) - date (in UTC seconds) 055 */ 056public class DirectoryListing { 057 058 private boolean localExec = true; // local directory or a remote site? 059 private File ldir; // the local directory (as File) 060 private FilenameFilter_RegularPattern localFilter; // the local filename filter 061 private ExecInterface execObj; // class for remote execution 062 private String target; // the remote machine (user@host:port) if not local 063 private String rdir; // the directory on the remote site (as String) 064 065 private String[] filemasks; // the file masks (as used in ls command) 066 private FileInfo[] currentList; // the list of files after the last listing 067 private Hashtable prevList; // the previous list of files (in hashTable for 068 // fast lookup) 069 070 private static final Log log = LogFactory.getLog(DirectoryListing.class 071 .getName()); 072 private static final boolean isDebugging = log.isDebugEnabled(); 073 074 private boolean sortByDate = true; 075 076 private SimpleDateFormat sdf = new SimpleDateFormat("MMM dd yyyy HH:mm"); 077 private SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm"); 078 private String currentYear; 079 080 /** 081 * Constructor only for local directories. Throws IllegalArgumentException 082 * if local directory does not exist. 083 */ 084 public DirectoryListing(File directory, String[] filemasks) 085 throws IllegalArgumentException { 086 this(null, directory.getAbsolutePath(), filemasks); 087 } 088 089 /** 090 * Constructor for remote or local directories. For local machine, specify 091 * target as null, "", or "local" 092 */ 093 public DirectoryListing(String target, String directory, String[] filemasks) 094 throws IllegalArgumentException { 095 096 if (target == null) 097 target = "local"; 098 this.target = target; // just for logging 099 100 // get the host from target string 101 String host; 102 int atPos = target.indexOf('@'); 103 int colonPos = target.indexOf(':'); 104 if (colonPos >= 0 && colonPos > atPos) 105 host = target.substring(atPos + 1, colonPos); 106 else 107 host = target.substring(atPos + 1); 108 109 // local or remote target? 110 if (host.trim().equals("") || host.trim().equals("local")) { 111 localExec = true; 112 ldir = new File(directory); 113 } else { 114 localExec = false; 115 try { 116 rdir = directory; 117 execObj = ExecFactory.getExecObject(target); 118 119 } catch (ExecException e) { 120 String errText = new String("Error at execution:\n" 121 + e.getMessage()); 122 123 log.error(errText); 124 } 125 } 126 127 this.filemasks = filemasks; 128 setMask(this.filemasks); 129 130 // set current year to parse output of 'ls -l' dates correctly 131 int year = Calendar.getInstance().get(Calendar.YEAR); 132 currentYear = new String("" + year); 133 // System.out.println("Current year is: "+currentYear); 134 135 } 136 137 /** 138 * (Re)set the file mask. This can be reset before any listing. Previous 139 * listing will be forgotten. 140 */ 141 public void setMask(String[] filemasks) { 142 // convert file masks pattern to regular expression 143 if (filemasks != null && filemasks.length > 0) { 144 localFilter = new FilenameFilter_RegularPattern(); 145 for (int i = 0; i < filemasks.length; i++) { 146 String p1 = filemasks[i].replaceAll("\\.", "\\\\."); 147 String p2 = p1.replaceAll("\\*", ".*"); 148 String p3 = p2.replaceAll("\\?", "."); 149 String p4 = p3.replaceAll("\\+", "\\\\+"); 150 localFilter.add(p4); 151 152 if (isDebugging) 153 log.debug("pattern conversion: [" + p4 + "] = [" + p1 154 + "] -> [" + p2 + "] -> [" + p3 + "]"); 155 } 156 } 157 158 prevList = new Hashtable(); // forget all past elements 159 currentList = null; // forget all last listed elements 160 } 161 162 /** 163 * Kept for compatibility with older codes. 164 */ 165 public int list() throws ExecException { 166 return list(false); 167 } 168 169 /** 170 * List the directory now. Returns the number of files, -1 on error; 171 * Input: useLsOnly: false -> execute 'ls -l' on remote machine 172 */ 173 public int list( boolean useLsOnly ) throws ExecException { 174 FileInfo[] newList; 175 if (localExec) { 176 if (!ldir.isDirectory()) { 177 throw new ExecException("org.kepler.io.DirectoryListing: " 178 + ldir + " is not an existing local directory."); 179 } 180 File[] files = ldir.listFiles(localFilter); 181 newList = new FileInfo[files.length]; 182 for (int i = 0; i < files.length; i++) 183 newList[i] = new FileInfo(files[i].getName(), 184 files[i].length(), files[i].lastModified() / 1000); 185 } else { 186 // String command = new String("ls -tr " + rdir + (pattern!=null ? 187 // " | egrep "+pattern : "")); 188 String command; 189 if (useLsOnly) { 190 command = new String("ls " + rdir); 191 } else { 192 StringBuffer cmd = new StringBuffer("cd " + rdir + "; ls -ld"); 193 for (int i = 0; i < filemasks.length; i++) { 194 cmd.append(" "+filemasks[i]); 195 } 196 command = cmd.toString(); 197 } 198 199 ByteArrayOutputStream streamOut = new ByteArrayOutputStream(); 200 ByteArrayOutputStream streamErr = new ByteArrayOutputStream(); 201 202 //System.out.println("Exec command: " + command); 203 int exitCode = execObj.executeCmd(command, streamOut, streamErr); 204 205 if (exitCode != 0 && exitCode != 2) { // 2: not all masks produced result 206 log.error("Error when making connection to " + target 207 + ": exitCode = " + exitCode + " Stdout= \n" 208 + streamOut + " Stderr= \n" + streamErr); 209 return -1; 210 } 211 212 //System.out.println("---- result from machine ----\n" + 213 // streamOut); 214 newList = filter(streamOut.toString().split("\n"), useLsOnly); 215 //System.out.println("---- result after filter: " + newList); 216 217 } 218 219 if (sortByDate && !useLsOnly) { 220 long stime = System.currentTimeMillis(); 221 Arrays.sort(newList, new Comparator() { 222 public int compare(Object a, Object b) { 223 FileInfo f1 = (FileInfo) a; 224 FileInfo f2 = (FileInfo) b; 225 226 long diff = (f1.getDate() - f2.getDate()); 227 int retval = 0; 228 if (diff < 0) 229 retval = -1; 230 else if (diff > 0) 231 retval = 1; 232 else 233 retval = f1.getName().compareTo(f2.getName()); 234 235 // System.out.println(f1.getDate() + "\t" + f1.getName() + 236 // "\t" + 237 // f2.getDate() + "\t" + f2.getName() + "\t" + 238 // (int) diff + "\t" + retval); 239 240 return retval; 241 242 // return f1.getName().compareTo(f2.getName()); 243 } 244 }); 245 246 long len = System.currentTimeMillis() - stime; 247 248 // System.out.println("--- Sorted list ---"); 249 // for (int i=0; i<files.length; i++) 250 // System.out.println(files[i].lastModified() + "\t" + 251 // files[i].getName() ); 252 // System.out.println("-----------------"); 253 // System.out.println("Time to sort: " + len); 254 // System.exit(1); 255 } 256 257 // first, put the previous list into a hash set 258 if (currentList != null) { 259 for (int i = 0; i < currentList.length; i++) { 260 prevList.put(currentList[i].getName(), currentList[i]); 261 } 262 if (isDebugging) 263 log.debug("\n" + currentList.length 264 + " items added to hash set of total size " 265 + prevList.size() + "\n"); 266 } 267 currentList = newList; 268 return currentList.length; 269 } 270 271 private FileInfo[] filter(String[] files, boolean lsOnly) { 272 ArrayList al = new ArrayList(); 273 // System.out.println("---- Filtering ----\n"); 274 275 if (files == null) 276 return new FileInfo[] {}; 277 278 for (int i = 0; i < files.length; i++) { 279 String[] fi = files[i].split(" +", 9); // split the a line of 280 // "ls -l" 281 // 1. ls output format is just a single file name per file 282 // 2. traditional ls -l output format 283 // -rw-r--r-- 1 pnb ORNL\dom 775280 Aug 21 14:45 coupling.pdf 284 // -rw-r--r-- 1 pnb ORNL\dom 988107 Jul 30 2008 xgcmonwf.pdf 285 // fi[4] is the size, fi[5]-fi[7] gives the date, fi[8] is the name 286 // 287 // 3. newer ls -l output format 288 // -rw-r--r-- 1 pnorbert ccsstaff 572 2008-02-01 13:15 sshterm.txt 289 // fi[4] is the size, fi[5]-fi[6] gives the date, fi[7] is the name 290 291 // System.out.println(" " + files[i]); 292 293 if (lsOnly) { 294 // 1. ls output 295 if (localFilter.accept(null, fi[0])) { 296 FileInfo fileInfo = new FileInfo( fi[0], -1, -1); 297 al.add(fileInfo); 298 } 299 300 } else if (fi.length >= 9 && localFilter.accept(null, fi[8])) { 301 // 2. traditional ls -l output format 302 // System.out.println("--- " + fi[0] + " | " + fi[1] + " | " + 303 // fi[2] + " | " + 304 // fi[3] + " | " + fi[4] + " | " + fi[5] + " | " + 305 // fi[6] + " | " + fi[7] + " | " + fi[8] ); 306 Long size = new Long(fi[4]); 307 FileInfo fileInfo = new FileInfo(fi[8], size.longValue(), 308 getUTC(fi[5], fi[6], fi[7])); 309 al.add(fileInfo); 310 // System.out.println("+ " + files[i]); 311 312 } else if ( fi.length == 8 && localFilter.accept(null, fi[7].trim() )) { 313 // 3. newer ls -l output format 314 //System.out.println("--- " + fi[0] + " | " + fi[1] + " | " + fi[2] + " | " + 315 // fi[3] + " | " + fi[4] + " | " + fi[5] + " | " + 316 // fi[6] + " | " + fi[7] ); 317 Long size = new Long(fi[4]); 318 FileInfo fileInfo = new FileInfo( fi[7].trim(), size.longValue(), 319 getUTC(fi[5], fi[6]) ); 320 al.add(fileInfo); 321 //System.out.println("+ " + files[i]); 322 } else { 323 // System.out.println(" " + files[i]); 324 } 325 } 326 327 FileInfo[] newList = new FileInfo[al.size()]; 328 for (int i = 0; i < al.size(); i++) { 329 newList[i] = (FileInfo) al.get(i); 330 } 331 return newList; 332 } 333 334 /* 335 * create the UTC seconds date from the 'ls -l' output (max minute 336 * resolution...) 337 */ 338 private long getUTC(String month, String day, String timeORyear) { 339 String dateStr; 340 if (timeORyear.indexOf(':') > -1) { // this is hh:mm 341 dateStr = month + " " + day + " " + currentYear + " " + timeORyear; 342 } else { // this is yyyy year 343 dateStr = month + " " + day + " " + timeORyear + " 00:00"; 344 } 345 346 ParsePosition pp = new ParsePosition(0); 347 Date d = sdf.parse(dateStr, pp); 348 // System.out.println(" Parsed " + dateStr + " to " + d); 349 long utc = d.getTime() / 1000; 350 return utc; 351 } 352 353/* create the UTC seconds date from the new 'ls -l' output (max minute resolution...) 354 -rw-r--r-- 1 pnorbert ccsstaff 572 2008-02-01 13:15 sshterm.txt 355*/ 356 private long getUTC( String date_, String time_) { 357 String dateStr = date_ + " " + time_; 358 ParsePosition pp = new ParsePosition(0); 359 Date d = sdf2.parse( dateStr, pp); 360 //System.out.println("getUTC: Parsed " + dateStr + " to " + d); 361 long utc = d.getTime() / 1000; 362 return utc; 363 } 364 365 /** Get the list itself. */ 366 public FileInfo[] getList() { 367 return currentList; 368 } 369 370 /** 371 * Get the list of 'new' files, i.e. that are in currentList and not in 372 * prevList. If parameter 'checkModifications' is false, the difference 373 * between currentList and prevList will be based on the file names only. If 374 * it is true, files' date and size is also checked, i.e. modified files 375 * will also be retruned. 376 * 377 * @return FileInfo[] of filenames 378 */ 379 public FileInfo[] getNewFiles(boolean checkModifications) { 380 381 if (currentList == null) 382 return null; 383 384 int initial_length = currentList.length - prevList.size(); 385 if (initial_length <= 0) 386 initial_length = 1; 387 388 ArrayList newList = new ArrayList(initial_length); 389 // the final size can be greater if modified files are inserted as well 390 391 boolean isNew; 392 for (int i = 0; i < currentList.length; i++) { 393 // it is a new file 394 // if the current element's name is not in the hash table 395 isNew = !prevList.containsKey(currentList[i].getName()); 396 397 if (checkModifications && !isNew) { 398 // file is in the hash table 399 // but we should check the size and date as well 400 FileInfo old = (FileInfo) prevList 401 .get(currentList[i].getName()); 402 if (old.getDate() != currentList[i].getDate() 403 || old.getSize() != currentList[i].getSize()) { 404 405 isNew = true; 406 log.debug("Modified file found: " 407 + currentList[i].getName() + "; size " 408 + currentList[i].getSize() + "/" + old.getSize() 409 + "; date " + currentList[i].getDate() + "/" 410 + old.getDate()); 411 } 412 413 } 414 415 if (isNew) { 416 newList.add(currentList[i]); 417 } 418 } 419 420 log.debug("Number of new files = " + newList.size()); 421 422 return (FileInfo[]) newList.toArray(new FileInfo[0]); 423 } 424 425}