001/*
002 * Copyright (c) 2004-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: welker $'
006 * '$Date: 2010-05-06 05:21:26 +0000 (Thu, 06 May 2010) $' 
007 * '$Revision: 24234 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.kepler.io;
031
032import java.io.ByteArrayOutputStream;
033import java.io.File;
034import java.text.ParsePosition;
035import java.text.SimpleDateFormat;
036import java.util.ArrayList;
037import java.util.Arrays;
038import java.util.Calendar;
039import java.util.Comparator;
040import java.util.Date;
041import java.util.Hashtable;
042
043import org.apache.commons.logging.Log;
044import org.apache.commons.logging.LogFactory;
045import org.kepler.ssh.ExecException;
046import org.kepler.ssh.ExecFactory;
047import org.kepler.ssh.ExecInterface;
048import org.kepler.util.FilenameFilter_RegularPattern;
049
050/**
051 * Get the actual list of files in a directory and provide a difference from the
052 * previous listing. Do the listing either on a local dir with Java or on a
053 * remote machine using SSH. Do the listing with a set of patterns. For each
054 * file, provide its - name, - size (in bytes) - date (in UTC seconds)
055 */
056public class DirectoryListing {
057
058        private boolean localExec = true; // local directory or a remote site?
059        private File ldir; // the local directory (as File)
060        private FilenameFilter_RegularPattern localFilter; // the local filename filter
061        private ExecInterface execObj; // class for remote execution
062        private String target; // the remote machine (user@host:port) if not local
063        private String rdir; // the directory on the remote site (as String)
064
065        private String[] filemasks;     // the file masks (as used in ls command)
066        private FileInfo[] currentList; // the list of files after the last listing
067        private Hashtable prevList; // the previous list of files (in hashTable for
068                                                                // fast lookup)
069
070        private static final Log log = LogFactory.getLog(DirectoryListing.class
071                        .getName());
072        private static final boolean isDebugging = log.isDebugEnabled();
073
074        private boolean sortByDate = true;
075
076        private SimpleDateFormat sdf = new SimpleDateFormat("MMM dd yyyy HH:mm");
077        private SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm");
078        private String currentYear;
079
080        /**
081         * Constructor only for local directories. Throws IllegalArgumentException
082         * if local directory does not exist.
083         */
084        public DirectoryListing(File directory, String[] filemasks)
085                        throws IllegalArgumentException {
086                this(null, directory.getAbsolutePath(), filemasks);
087        }
088
089        /**
090         * Constructor for remote or local directories. For local machine, specify
091         * target as null, "", or "local"
092         */
093        public DirectoryListing(String target, String directory, String[] filemasks)
094                        throws IllegalArgumentException {
095
096                if (target == null)
097                        target = "local";
098                this.target = target; // just for logging
099
100                // get the host from target string
101                String host;
102                int atPos = target.indexOf('@');
103                int colonPos = target.indexOf(':');
104                if (colonPos >= 0 && colonPos > atPos)
105                        host = target.substring(atPos + 1, colonPos);
106                else
107                        host = target.substring(atPos + 1);
108
109                // local or remote target?
110                if (host.trim().equals("") || host.trim().equals("local")) {
111                        localExec = true;
112                        ldir = new File(directory);
113                } else {
114                        localExec = false;
115                        try {
116                                rdir = directory;
117                                execObj = ExecFactory.getExecObject(target);
118
119                        } catch (ExecException e) {
120                                String errText = new String("Error at execution:\n"
121                                        + e.getMessage());
122
123                            log.error(errText);
124                        }
125                }
126
127                this.filemasks = filemasks;
128                setMask(this.filemasks);
129
130                // set current year to parse output of 'ls -l' dates correctly
131                int year = Calendar.getInstance().get(Calendar.YEAR);
132                currentYear = new String("" + year);
133                // System.out.println("Current year is: "+currentYear);
134
135        }
136
137        /**
138         * (Re)set the file mask. This can be reset before any listing. Previous
139         * listing will be forgotten.
140         */
141        public void setMask(String[] filemasks) {
142                // convert file masks pattern to regular expression
143                if (filemasks != null && filemasks.length > 0) {
144                        localFilter = new FilenameFilter_RegularPattern();
145                        for (int i = 0; i < filemasks.length; i++) {
146                                String p1 = filemasks[i].replaceAll("\\.", "\\\\.");
147                                String p2 = p1.replaceAll("\\*", ".*");
148                                String p3 = p2.replaceAll("\\?", ".");
149                                String p4 = p3.replaceAll("\\+", "\\\\+");
150                                localFilter.add(p4);
151
152                                if (isDebugging)
153                                        log.debug("pattern conversion: [" + p4 + "] = [" + p1
154                                                        + "] -> [" + p2 + "] -> [" + p3 + "]");
155                        }
156                }
157
158                prevList = new Hashtable(); // forget all past elements
159                currentList = null; // forget all last listed elements
160        }
161
162        /**
163         * Kept for compatibility with older codes. 
164         */
165        public int list() throws ExecException {
166                return list(false);
167        }
168        
169        /**
170         * List the directory now. Returns the number of files, -1 on error;
171         * Input: useLsOnly: false -> execute 'ls -l' on remote machine
172         */
173        public int list( boolean useLsOnly ) throws ExecException {
174                FileInfo[] newList;
175                if (localExec) {
176                        if (!ldir.isDirectory()) {
177                                throw new ExecException("org.kepler.io.DirectoryListing: "
178                                                + ldir + " is not an existing local directory.");
179                        }
180                        File[] files = ldir.listFiles(localFilter);
181                        newList = new FileInfo[files.length];
182                        for (int i = 0; i < files.length; i++)
183                                newList[i] = new FileInfo(files[i].getName(),
184                                                files[i].length(), files[i].lastModified() / 1000);
185                } else {
186                        // String command = new String("ls -tr " + rdir + (pattern!=null ?
187                        // " | egrep "+pattern : ""));
188                        String command;
189                        if (useLsOnly) {
190                                command = new String("ls " + rdir);
191                        } else {
192                                StringBuffer cmd = new StringBuffer("cd " + rdir + "; ls -ld");
193                                for (int i = 0; i < filemasks.length; i++) {
194                                        cmd.append(" "+filemasks[i]);
195                                }
196                                command = cmd.toString();
197                        }
198                        
199                        ByteArrayOutputStream streamOut = new ByteArrayOutputStream();
200                        ByteArrayOutputStream streamErr = new ByteArrayOutputStream();
201
202                        //System.out.println("Exec command: " + command);
203                        int exitCode = execObj.executeCmd(command, streamOut, streamErr);
204
205                        if (exitCode != 0 && exitCode != 2) { // 2: not all masks produced result
206                                log.error("Error when making connection to " + target
207                                                + ": exitCode = " + exitCode + "  Stdout= \n"
208                                                + streamOut + "  Stderr= \n" + streamErr);
209                                return -1;
210                        }
211
212                        //System.out.println("---- result from machine ----\n" +
213                        //     streamOut);
214                        newList = filter(streamOut.toString().split("\n"), useLsOnly);
215                        //System.out.println("---- result after filter: " + newList);
216
217                }
218
219                if (sortByDate && !useLsOnly) {
220                        long stime = System.currentTimeMillis();
221                        Arrays.sort(newList, new Comparator() {
222                                public int compare(Object a, Object b) {
223                                        FileInfo f1 = (FileInfo) a;
224                                        FileInfo f2 = (FileInfo) b;
225
226                                        long diff = (f1.getDate() - f2.getDate());
227                                        int retval = 0;
228                                        if (diff < 0)
229                                                retval = -1;
230                                        else if (diff > 0)
231                                                retval = 1;
232                                        else
233                                                retval = f1.getName().compareTo(f2.getName());
234
235                                        // System.out.println(f1.getDate() + "\t" + f1.getName() +
236                                        // "\t" +
237                                        // f2.getDate() + "\t" + f2.getName() + "\t" +
238                                        // (int) diff + "\t" + retval);
239
240                                        return retval;
241
242                                        // return f1.getName().compareTo(f2.getName());
243                                }
244                        });
245
246                        long len = System.currentTimeMillis() - stime;
247
248                        // System.out.println("--- Sorted list ---");
249                        // for (int i=0; i<files.length; i++)
250                        // System.out.println(files[i].lastModified() + "\t" +
251                        // files[i].getName() );
252                        // System.out.println("-----------------");
253                        // System.out.println("Time to sort: " + len);
254                        // System.exit(1);
255                }
256
257                // first, put the previous list into a hash set
258                if (currentList != null) {
259                        for (int i = 0; i < currentList.length; i++) {
260                                prevList.put(currentList[i].getName(), currentList[i]);
261                        }
262                        if (isDebugging)
263                                log.debug("\n" + currentList.length
264                                                + " items added to hash set of total size "
265                                                + prevList.size() + "\n");
266                }
267                currentList = newList;
268                return currentList.length;
269        }
270
271        private FileInfo[] filter(String[] files, boolean lsOnly) {
272                ArrayList al = new ArrayList();
273                // System.out.println("---- Filtering ----\n");
274
275                if (files == null)
276                        return new FileInfo[] {};
277
278                for (int i = 0; i < files.length; i++) {
279                        String[] fi = files[i].split(" +", 9); // split the a line of
280                                                                                                        // "ls -l"
281                        // 1. ls output format is just a single file name per file
282                        // 2. traditional ls -l output format
283                        //  -rw-r--r--   1 pnb  ORNL\dom  775280 Aug 21 14:45 coupling.pdf
284                        //  -rw-r--r--   1 pnb  ORNL\dom  988107 Jul 30  2008 xgcmonwf.pdf
285                        // fi[4] is the size, fi[5]-fi[7] gives the date, fi[8] is the name
286                        //
287                        // 3. newer ls -l output format
288                        // -rw-r--r--  1 pnorbert ccsstaff       572 2008-02-01 13:15 sshterm.txt
289                        // fi[4] is the size, fi[5]-fi[6] gives the date, fi[7] is the name
290
291                        // System.out.println("    " + files[i]);
292
293                        if (lsOnly) {
294                                // 1. ls output
295                                if (localFilter.accept(null, fi[0])) {
296                                        FileInfo fileInfo = new FileInfo( fi[0], -1, -1);
297                                        al.add(fileInfo);
298                                }
299
300                        } else if (fi.length >= 9 && localFilter.accept(null, fi[8])) {
301                                // 2. traditional ls -l output format
302                                // System.out.println("--- " + fi[0] + " | " + fi[1] + " | " +
303                                // fi[2] + " | " +
304                                // fi[3] + " | " + fi[4] + " | " + fi[5] + " | " +
305                                // fi[6] + " | " + fi[7] + " | " + fi[8] );
306                                Long size = new Long(fi[4]);
307                                FileInfo fileInfo = new FileInfo(fi[8], size.longValue(),
308                                                getUTC(fi[5], fi[6], fi[7]));
309                                al.add(fileInfo);
310                                // System.out.println("+ " + files[i]);
311
312                        } else if ( fi.length == 8 && localFilter.accept(null, fi[7].trim() )) {
313                                // 3. newer ls -l output format
314                                //System.out.println("--- " + fi[0] + " | " + fi[1] + " | " + fi[2] + " | " +
315                                //                              fi[3] +  " | " + fi[4] + " | " + fi[5] + " | " +
316                                //                              fi[6] +  " | " + fi[7] );
317                Long size = new Long(fi[4]);
318                FileInfo fileInfo = new FileInfo( fi[7].trim(), size.longValue(),
319                                                  getUTC(fi[5], fi[6]) );
320                al.add(fileInfo);
321                                //System.out.println("+ " + files[i]);
322                        } else {
323                                // System.out.println("  " + files[i]);
324                        }
325                }
326
327                FileInfo[] newList = new FileInfo[al.size()];
328                for (int i = 0; i < al.size(); i++) {
329                        newList[i] = (FileInfo) al.get(i);
330                }
331                return newList;
332        }
333
334        /*
335         * create the UTC seconds date from the 'ls -l' output (max minute
336         * resolution...)
337         */
338        private long getUTC(String month, String day, String timeORyear) {
339                String dateStr;
340                if (timeORyear.indexOf(':') > -1) { // this is hh:mm
341                        dateStr = month + " " + day + " " + currentYear + " " + timeORyear;
342                } else { // this is yyyy year
343                        dateStr = month + " " + day + " " + timeORyear + " 00:00";
344                }
345
346                ParsePosition pp = new ParsePosition(0);
347                Date d = sdf.parse(dateStr, pp);
348                // System.out.println(" Parsed " + dateStr + " to " + d);
349                long utc = d.getTime() / 1000;
350                return utc;
351        }
352
353/* create the UTC seconds date from the new 'ls -l' output (max minute resolution...)
354        -rw-r--r--  1 pnorbert ccsstaff       572 2008-02-01 13:15 sshterm.txt
355*/
356        private long getUTC( String date_, String time_) {
357                String dateStr = date_ + " " + time_;
358                ParsePosition pp = new ParsePosition(0);
359                Date d = sdf2.parse( dateStr, pp);
360                //System.out.println("getUTC: Parsed " + dateStr + " to " + d);
361                long utc = d.getTime() / 1000;
362                return utc;
363        }
364
365        /** Get the list itself. */
366        public FileInfo[] getList() {
367                return currentList;
368        }
369
370        /**
371         * Get the list of 'new' files, i.e. that are in currentList and not in
372         * prevList. If parameter 'checkModifications' is false, the difference
373         * between currentList and prevList will be based on the file names only. If
374         * it is true, files' date and size is also checked, i.e. modified files
375         * will also be retruned.
376         * 
377         * @return FileInfo[] of filenames
378         */
379        public FileInfo[] getNewFiles(boolean checkModifications) {
380
381                if (currentList == null)
382                        return null;
383
384                int initial_length = currentList.length - prevList.size();
385                if (initial_length <= 0)
386                        initial_length = 1;
387
388                ArrayList newList = new ArrayList(initial_length);
389                // the final size can be greater if modified files are inserted as well
390
391                boolean isNew;
392                for (int i = 0; i < currentList.length; i++) {
393                        // it is a new file
394                        // if the current element's name is not in the hash table
395                        isNew = !prevList.containsKey(currentList[i].getName());
396
397                        if (checkModifications && !isNew) {
398                                // file is in the hash table
399                                // but we should check the size and date as well
400                                FileInfo old = (FileInfo) prevList
401                                                .get(currentList[i].getName());
402                                if (old.getDate() != currentList[i].getDate()
403                                                || old.getSize() != currentList[i].getSize()) {
404
405                                        isNew = true;
406                                        log.debug("Modified file found: "
407                                                        + currentList[i].getName() + "; size "
408                                                        + currentList[i].getSize() + "/" + old.getSize()
409                                                        + "; date " + currentList[i].getDate() + "/"
410                                                        + old.getDate());
411                                }
412
413                        }
414
415                        if (isNew) {
416                                newList.add(currentList[i]);
417                        }
418                }
419
420                log.debug("Number of new files = " + newList.size());
421
422                return (FileInfo[]) newList.toArray(new FileInfo[0]);
423        }
424
425}