aboutsummaryrefslogtreecommitdiff
path: root/src/gui_backend_slurm.c
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2020-09-02 12:11:28 +0200
committerThomas White <taw@physics.org>2020-09-02 12:11:28 +0200
commitdbe2734d875f9dec5a3b28b9619eb5eeb9446121 (patch)
treeeabbcd12c24b582584ed34c2c6082eb6533e36b4 /src/gui_backend_slurm.c
parent5bd6c58c82eba7109cb6365440aa43f4d577ed67 (diff)
Fix job tracking logic
Diffstat (limited to 'src/gui_backend_slurm.c')
-rw-r--r--src/gui_backend_slurm.c22
1 files changed, 18 insertions, 4 deletions
diff --git a/src/gui_backend_slurm.c b/src/gui_backend_slurm.c
index f51af9e9..504bf3d4 100644
--- a/src/gui_backend_slurm.c
+++ b/src/gui_backend_slurm.c
@@ -97,7 +97,20 @@ static int job_running(uint32_t job_id)
/* FIXME: Distinguish error cond from job complete */
}
- /* FIXME: Check that job is actually still running */
+ switch ( job_info->job_array[0].job_state & JOB_STATE_BASE ) {
+
+ /* Only the following states are reasons to keep on watching
+ * the job */
+ case JOB_PENDING :
+ case JOB_RUNNING :
+ case JOB_SUSPENDED :
+ running = 1;
+ break;
+
+ default :
+ running = 0;
+ break;
+ }
slurm_free_job_info_msg(job_info);
@@ -118,9 +131,9 @@ static int get_task_status(void *job_priv,
n_proc += read_number_processed(job->stderr_filenames[i]);
- if ( (job->job_ids[i] != 0)
- && !job_running(job->job_ids[i]) )
- {
+ if ( job->job_ids[i] == 0 ) continue;
+
+ if ( !job_running(job->job_ids[i]) ) {
job->job_ids[i] = 0;
} else {
all_complete = 0;
@@ -138,6 +151,7 @@ static void cancel_task(void *job_priv)
int i;
struct slurm_job *job = job_priv;
for ( i=0; i<job->n_blocks; i++) {
+ if ( job->job_ids[i] == 0 ) continue;
STATUS("Stopping SLURM job %i\n", job->job_ids[i]);
if ( slurm_kill_job(job->job_ids[i], SIGINT, 0) ) {
ERROR("Couldn't stop job: %s\n",