001/*
002 * Copyright (c) 2004-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: crawl $'
006 * '$Date: 2012-07-27 18:35:29 +0000 (Fri, 27 Jul 2012) $' 
007 * '$Revision: 30295 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.kepler.job;
031
032import org.apache.commons.logging.Log;
033import org.apache.commons.logging.LogFactory;
034
035/**
036 * Support class for IBM's LoadLeveler job manager support Class JobManager uses
037 * the methods of a supporter class to submit jobs and check status
038 */
039public class JobSupportLoadLeveler implements JobSupport {
040
041        private static final Log log = LogFactory
042                        .getLog(JobSupportLoadLeveler.class.getName());
043        private static final boolean isDebugging = log.isDebugEnabled();
044
045        public JobSupportLoadLeveler() {
046        }
047
048        public void init(String llBinPath) {
049                if (llBinPath != null && !llBinPath.trim().equals("")) {
050                        String binPath = new String(llBinPath);
051                        if (!llBinPath.endsWith("/"))
052                                binPath += "/";
053                        _llSubmitCmd = binPath + _llSubmitCmd;
054                        _llStatusCmd = binPath + _llStatusCmd;
055                        _llDeleteCmd = binPath + _llDeleteCmd;
056                }
057        }
058
059        /**
060         * Create a submission file for the specific job manager, based on the
061         * information available in Job: - executable name - input files - output
062         * files - arguments for the job
063         */
064        public boolean createSubmitFile(String filename, Job job) {
065
066                return false;
067        }
068
069        /**
070         * Submit command for LoadLeveler return: the command for submission
071         */
072    public String getSubmitCmd(String submitFile, String options, Job job) throws JobException {
073
074        if(job.getDependentJobs() != null) {
075            throw new JobException("Support for job dependencies with Load Leveler has not been implemented.");
076        }
077        
078                String _commandStr;
079                if (options != null)
080                        _commandStr = _llSubmitCmd + " " + options + " " + submitFile;
081                else
082                        _commandStr = _llSubmitCmd + " " + submitFile;
083
084                return _commandStr;
085        }
086
087        /**
088         * Parse output of submission and get information: jobID return String jobID
089         * on success throws JobException at failure (will contain the error stream
090         * or output stream)
091         */
092        public String parseSubmitOutput(String output, String error)
093                        throws JobException {
094
095                // System.out.println("====LoadLeveler parse: picking the jobid from output...");
096                /*
097                 * LoadLeveler llsubmit output is several lines: on success, there is a
098                 * line: "llsubmit: The job "jobID" has been submitted." if submitfile
099                 * does not exists or other error:??
100                 */
101                String jobID = null;
102
103                String sa[] = output.split("\n"); // cut up lines
104                int idx;
105                for (int i = 0; i < sa.length; i++) {
106                        // if (isDebugging) log.debug("LoadLeveler status string " + i +
107                        // " = "+ sa[i]);
108                        idx = sa[i].indexOf(" has been submitted");
109                        if (idx > -1) {
110                                // Successful job submission, jobID is in this line.
111                                // Cut to the second quote, excluding the quote.
112                                String temp = output.substring(0, idx - 1);
113                                // start of jobid string after the first quote
114                                int qidx = output.indexOf("\"");
115                                if (qidx > -1) {
116                                        // cut from the first quote, excluding the qoute
117                                        jobID = temp.substring(qidx + 1);
118                                        if (isDebugging)
119                                                log.debug("LoadLeveler parse: jobID = " + jobID
120                                                                + " temp = " + temp);
121                                }
122                        }
123                }
124
125                if (jobID == null) {
126                        if (error != null && error.length() > 0)
127                                throw new JobException(
128                                                "Error at submission of LoadLeveler job: " + error);
129                        else
130                                throw new JobException(
131                                                "Error at submission of LoadLeveler job: " + output);
132                }
133                return jobID;
134        } // end-of-submit
135
136        /**
137         * Get the command to ask the status of the job return: the String of
138         * command
139         */
140        public String getStatusCmd(String jobID) {
141                String _commandStr = _llStatusCmd + jobID;
142                return _commandStr;
143        }
144
145        /**
146         * Parse output of status check command and get status info return: a
147         * JobStatusInfo object, or throws an JobException with the error output
148         */
149        public JobStatusInfo parseStatusOutput(String jobID, int exitCode,
150                        String output, String error) throws JobException {
151
152                // LoadLeveler status prints to stdout always, and never to stderror.
153                // exitCode != 0 is error, but exitCode==0 still may mean that job is
154                // not in queue.
155                // If job is in queue, the formatted report looks like:
156                // Step Id Owner Queue Date ST
157                // ------------------------ ----------- ----------- --
158                // s00601.287247.0 jxhan 07/12 09:10 NQ
159                //
160                // 1 job step(s) in query, 0 waiting, 0 pending, 0 running, 1 held, 0
161                // preempted
162                //
163                // If job is not in the queue anymore, the message is
164                // ""llq: There is currently no job status to report."
165
166                // System.out.println("+++++ status: picking the status from output" );
167                JobStatusInfo stat = new JobStatusInfo();
168                stat.statusCode = JobStatusCode.NotInQueue;
169
170                if (exitCode != 0) {
171                        // error case, error text in output
172                        throw new JobException("LoadLeveler status query error:\n" + output);
173                }
174
175                // now we have 0 exitCode, so either get status info, or no job message
176
177                boolean foundStatus = false;
178                String localJobID = createLocalJobID(jobID); // a trick for LoadLeveler
179
180                String sa[] = output.split("\n");
181                for (int i = 0; i < sa.length; i++) {
182                        // if (isDebugging) log.debug("LoadLeveler status string " + i +
183                        // " = "+ sa[i]);
184                        if (sa[i].trim().startsWith(localJobID)) {
185                                String vals[] = sa[i].trim().split("( )+", 9);
186                                if (vals.length >= 5) {
187                                        String reportedJobID = vals[0].trim();
188                                        stat.owner = vals[1].trim();
189                                        stat.submissionTime = vals[2].trim() + " " + vals[3].trim();
190                                        stat.runTime = new String("N/A");
191                                        String sts = vals[4].trim();
192
193                                        if (sts.equals("R") || // running
194                                                        sts.equals("ST") || // starting
195                                                        sts.equals("P") || // pending
196                                                        sts.equals("CK") || // checkpointing
197                                                        sts.equals("CP") || // prepare to complete
198                                                        sts.equals("C") || // completed
199                                                        sts.equals("E") || // preempted
200                                                        sts.equals("EP") || // preempt pending
201                                                        sts.equals("MP") // resume pending
202                                        ) {
203
204                                                stat.statusCode = JobStatusCode.Running;
205
206                                        } else if (sts.equals("I") || // idle
207                                                        sts.equals("NQ") || // not queued (for running)
208                                                        sts.equals("HU") || // user hold
209                                                        sts.equals("H") || // user hold
210                                                        sts.equals("HS") || // system hold
211                                                        sts.equals("S") || // system hold
212                                                        sts.equals("D") || // deferred
213                                                        sts.equals("V") || // vacated
214                                                        sts.equals("VP") || // vacated pending
215                                                        sts.equals("RP") // remove pending
216                                        ) {
217
218                                                stat.statusCode = JobStatusCode.Wait;
219
220                                        } else if (sts.equals("CA") || // cancelled
221                                                        sts.equals("TX") || // terminated
222                                                        sts.equals("RM") // removed
223                                        ) {
224
225                                                stat.statusCode = JobStatusCode.NotInQueue;
226
227                                        } else {
228                                                /*
229                                                 * possible states: NR never run X rejected XP reject
230                                                 * pending
231                                                 */
232                                                stat.statusCode = JobStatusCode.Error;
233                                        }
234                                        foundStatus = true;
235                                        if (isDebugging)
236                                                log.debug("LoadLeveler status Values: jobid="
237                                                                + stat.jobID + " owner=" + stat.owner
238                                                                + " submissionTime=" + stat.submissionTime
239                                                                + " status=[" + sts + "]");
240                                }
241                        }
242                }
243                // System.out.println("LoadLeveler status = " + stat.statusCode);
244
245                if (!foundStatus) {
246                        if (output != null && output.length() > 0) {
247                                // it can be the message: llq: There is currently no job status
248                                // to report.
249                                if (output
250                                                .startsWith("llq: There is currently no job status to report.")) {
251                                        stat.statusCode = JobStatusCode.NotInQueue;
252                                } else {
253                                        log.warn("Output string = [" + output + "] len="
254                                                        + output.length());
255                                        stat.statusCode = JobStatusCode.Error;
256                                }
257                        } else { // unknown thing happened, output is null
258                                throw new JobException(
259                                                "LoadLeveler status produced an unknown situation for job "
260                                                                + jobID);
261                        }
262                }
263
264                return stat;
265        }
266
267        /**
268         * Get the command to remove a job from queue (either running or waiting
269         * jobs). return: the String of command
270         */
271        public String getDeleteCmd(String jobID) {
272                String _commandStr = _llDeleteCmd + jobID;
273                return _commandStr;
274        }
275
276        /**
277         * Parse output of delete command. return: true or false indicating that the
278         * command was successful or not
279         */
280        public boolean parseDeleteOutput(String jobID, int exitCode, String output,
281                        String error) throws JobException {
282                if (exitCode == 0)
283                        return true;
284                else
285                        return false;
286        }
287
288        /**
289         * Create the usable jobID "host.job.step" from the "fullhostname.job.step".
290         * Submission reports jobID with the full hostname, e.g.
291         * s00509.nersc.gov.410337.0 Status query / delete works for such ID,
292         * however, they report the id with short hostname, e.g. s00509.410337.0 so
293         * we need that short id to get the status.
294         */
295        private String createLocalJobID(String fullJobID) {
296                String vals[] = fullJobID.trim().split("\\.");
297                if (vals.length <= 3) {
298                        // our theory does not fit reality. Not NERSC? Just return as it is.
299                        return fullJobID;
300                }
301
302                String id = new String ();
303                if (vals.length > 4)    // for format s00509.nersc.gov.410337.0
304                        id = vals[0] + "." + vals[vals.length - 2] + "." + vals[vals.length - 1];
305                else                                    // for format s00509.nersc.gov.410337
306                        id = vals[0] + "." + vals[vals.length - 1];
307                
308                //System.out.println("full id = " + fullJobID + "   job id = " + id);
309                return id;
310        }
311
312        // ////////////////////////////////////////////////////////////////////
313        // // private variables ////
314
315        // The combined command to execute.
316        private String _llSubmitCmd = "llsubmit ";
317  //some machines may NOT support -j option
318  //private String _llStatusCmd = "llq -f %id %o %dq %st -j ";
319  //-j looks like an optional option even on machines that support it   
320        private String _llStatusCmd = "llq -f %id %o %dq %st ";  
321        private String _llDeleteCmd = "llcancel ";
322
323        public String getTaskStatusCmd(String jobID) throws NotSupportedException {
324                throw new NotSupportedException("Task parallel jobs are not supported");
325        }
326
327        public TaskParallelJobStatusInfo parseTaskStatusOutput(String jobID,
328                        int numTasks, int exitCode, String output, String error)
329                        throws JobException, NotSupportedException {
330                throw new NotSupportedException("Task parallel jobs are not supported");
331        }
332
333} // end-of-class-JobSupportLoadLeveler