001/*
002 * Copyright (c) 2004-2010 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * 
006 * 
007 * Permission is hereby granted, without written agreement and without
008 * license or royalty fees, to use, copy, modify, and distribute this
009 * software and its documentation for any purpose, provided that the above
010 * copyright notice and the following two paragraphs appear in all copies
011 * of this software.
012 *
013 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
014 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
015 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
016 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
017 * SUCH DAMAGE.
018 *
019 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
020 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
021 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
022 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
023 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
024 * ENHANCEMENTS, OR MODIFICATIONS.
025 *
026 */
027
028package org.kepler.job;
029
030import org.apache.commons.logging.Log;
031import org.apache.commons.logging.LogFactory;
032
033/**
034 * Support class for SLURM job manager support Class Job uses the methods of a
035 * supporter class to submit jobs and check status
036 */
037public class JobSupportSLURM implements JobSupport {
038
039        private static final Log log = LogFactory.getLog(JobSupportSLURM.class
040                        .getName());
041        private static final boolean isDebugging = log.isDebugEnabled();
042
043        public JobSupportSLURM() {
044        }
045
046        public void init(String slurmBinPath) {
047                if (slurmBinPath != null && !slurmBinPath.trim().equals("")) {
048                        String binPath = new String(slurmBinPath);
049                        if (!slurmBinPath.endsWith("/"))
050                                binPath += "/";
051                        _slurmSubmitCmd = binPath + _slurmSubmitCmd;
052                        _slurmStatusCmd = binPath + _slurmStatusCmd;
053                        _slurmDeleteCmd = binPath + _slurmDeleteCmd;
054                }
055        }
056
057        /**
058         * Create a submission file for the specific job manager, based on the
059         * information available in Job: - executable name - input files - output
060         * files - arguments for the job
061         */
062        public boolean createSubmitFile(String filename, Job job) {
063
064                return false;
065        }
066
067        /**
068         * Submit command for SLURM return: the command for submission
069         */
070    public String getSubmitCmd(String submitFile, String options, Job job) throws JobException {
071
072                StringBuffer _commandStr = new StringBuffer(_slurmSubmitCmd);
073                
074                // see if there are any dependent jobs
075                Job[] dependentJobs = job.getDependentJobs();
076                if(dependentJobs != null) {
077                    _commandStr.append("--dependency=afterok");
078                    for(Job dependentJob : dependentJobs) {
079                        _commandStr.append(":" + dependentJob.status.jobID);
080                    }
081                }
082                
083                if (options != null) {
084                        _commandStr.append(" " + options);
085                }
086                
087                _commandStr.append(" " + submitFile);
088
089                return _commandStr.toString();
090        }
091
092        /**
093         * Parse output of submission and get information: jobID return String jobID
094         * on success throws JobException at failure (will contain the error stream
095         * or output stream)
096         */
097        public String parseSubmitOutput(String output, String error)
098                        throws JobException {
099
100        
101                
102                // System.out.println("====SLURM parse: picking the jobid from output...");
103                /*
104                 * SLURM sbatch output: on success, it is: Submitted batch job 8125
105                 * Your job 102368 ("sge.cmd") has been submitted.
106                 * on error, messages are printed on stderr, stdout is
107                 * empty
108                 */
109                
110                String jobID = null;
111                String lines[] = output.split("\\r?\\n");
112                int max = lines.length;
113                String jobIDLine = null;
114                for(int i=0; i<max; i++) {
115                        if (lines[i].matches("Submitted batch job [0-9]*.*")) {
116                                jobIDLine = lines[i];
117                            System.out.println("Debug Print JOBIDLine " + jobIDLine);
118                                }
119                        }
120                
121
122                //if (idx > -1) {
123                        
124                        //String firstrow = output.substring(0, idx);
125                        //System.out.println("Debugging :::" + output.substring(output.length()-8, output.length()) +"<<<<");
126                        //if (firstrow.matches("Submitted batch job [0-9]*.*")) {
127                        
128                                String s = jobIDLine.substring(20, jobIDLine.length());
129                                //int toIdx = idx;
130                                jobID = s;
131                                //test
132                                //System.out.println("JOBID debug --s print " + s);
133                                //System.out.println("JOBID rectify --idx print " + idx);
134                                //System.out.println("JOBID rectify --jobID print " + jobID);
135                                //System.out.println("JOBID rectify --firstrow print " + firstrow);
136                        //test
137                        
138                        if (isDebugging)
139                                log.debug("SLURM parse: jobID = " + jobID + " jobIDLine = "
140                                                + jobIDLine);
141                
142
143                if (jobID == null) {
144                        if (error != null && error.length() > 0)
145                                throw new JobException("Error at submission of SLURM job: "
146                                                + error);
147                        else
148                                throw new JobException("Error at submission of SLURM job: "
149                                                + output);
150                }
151                
152                //test
153                                //System.out.println("JOBID rectify --jobID print " + jobID);
154
155            //test
156                                
157                return jobID;
158        } // end-of-submit
159
160        /**
161         * Get the command to ask the status of the job return: the String of
162         * command
163         */
164        public String getStatusCmd(String jobID) {
165                String _commandStr = _slurmStatusCmd + jobID;
166                return _commandStr;
167        }
168
169        /**
170         * Parse output of status check command and get status info return: a
171         * JobStatusInfo object, or throws an JobException with the error output
172         */
173        public JobStatusInfo parseStatusOutput(String jobID, int exitCode,
174                        String output, String error) throws JobException {
175
176                // SLURM status does not use exitCode. It can show error, but in real it
177                // can mean only that
178                // job is not in the queue anymore, which is good...
179
180                // System.out.println("+++++ status: picking the status from output" );
181                JobStatusInfo stat = new JobStatusInfo();
182                stat.statusCode = JobStatusCode.NotInQueue;
183
184                boolean foundStatus = false;
185
186                String sa[] = output.split("\n");
187                
188                int idx;
189                for (int i = 0; i < sa.length; i++) {
190                        // System.out.println("SLURM status string " + i + " = "+ sa[i]);
191                        String vals[] = sa[i].trim().split("( )+", 9);
192                        if (jobID.startsWith(vals[0].trim())) { // jobID may be longer than
193                                                                                                        // the first field which is
194                                                                                                        // limited in length
195                                if (vals.length >= 5) {
196                                        stat.jobID = jobID;
197                                        String jobName = vals[2].trim();
198                                        stat.owner = vals[3].trim();
199                                        stat.runTime = vals[5].trim();
200                                        String sts = vals[4].trim();
201                                        switch (sts) {
202                                        case "CD":
203                                                stat.statusCode = JobStatusCode.NotInQueue;
204                                                break;
205                                        case "CG":
206                                        case "R":
207                                                stat.statusCode = JobStatusCode.Running;
208                                                break;
209                                        case "S":
210                                        case "Q":
211                                        case "W":
212                                        case "PD":
213                                                stat.statusCode = JobStatusCode.Wait;
214                                                break;
215                                        default:
216                                                stat.statusCode = JobStatusCode.Wait;
217                                        }
218                                        foundStatus = true;
219                                        if (isDebugging)
220                                                log.debug("SLURM status Values: jobid=" + stat.jobID
221                                                                + " owner=" + stat.owner + " runTime="
222                                                                + stat.runTime + " status=[" + sts + "]");
223                                }
224                        }
225                }
226                // System.out.println("SLURM status = " + stat.statusCode);
227
228                if (!foundStatus) {
229                        if (error != null && error.length() > 0) {
230                                // it can be the message: qstat: Unknown Job Id ...
231                                if (error.startsWith("SLURM: Unknown Job Id")) {
232                                        stat.jobID = jobID;
233                                        stat.statusCode = JobStatusCode.NotInQueue;
234                                } else {
235                                        log.warn("Error string = [" + error + "] len="
236                                                        + error.length());
237                                        stat.jobID = jobID;
238                                        stat.statusCode = JobStatusCode.Error;
239                                }
240                        } else { // not an error, just job is not in the job queue now
241                                stat.jobID = jobID;
242                                stat.statusCode = JobStatusCode.NotInQueue;
243                        }
244                }
245
246                return stat;
247        }
248
249        /**
250         * Get the command to remove a job from queue (either running or waiting
251         * jobs). return: the String of command
252         */
253        public String getDeleteCmd(String jobID) {
254                String _commandStr = _slurmDeleteCmd + jobID;
255                return _commandStr;
256        }
257
258        /**
259         * Parse output of delete command. return: true or false indicating that the
260         * command was successful or not
261         */
262        public boolean parseDeleteOutput(String jobID, int exitCode, String output,
263                        String error) throws JobException {
264                if (exitCode == 0)
265                        return true;
266                else
267                        return false;
268        }
269
270        // ////////////////////////////////////////////////////////////////////
271        // // private variables ////
272
273        // The combined command to execute.
274        private String _slurmSubmitCmd = "sbatch ";
275        private String _slurmStatusCmd = "squeue --job=";
276        private String _slurmDeleteCmd = "scancel ";
277
278        public String getTaskStatusCmd(String jobID) throws NotSupportedException {
279                //return job status command as SLURM doesn't support task status command
280                return getStatusCmd(jobID);
281        }
282
283        public TaskParallelJobStatusInfo parseTaskStatusOutput(String jobID,
284                        int numTasks, int exitCode, String output, String error)
285                        throws JobException, NotSupportedException {
286                JobStatusInfo jobstatus = parseStatusOutput(jobID, exitCode, output, error);
287                TaskParallelJobStatusInfo taskStatus = new TaskParallelJobStatusInfo(jobstatus);
288                for(int i=0;i<numTasks;i++){
289                        taskStatus.taskStatusCodes.put(Integer.toString(i), jobstatus.statusCode);
290                }
291                return taskStatus;
292        }
293
294} // end-of-class-JobSupportSLURM