Helper script: Batch job monitoring

ICON Training - Hands-on Session

Helper script: Batch job monitoring#


This Jupyter notebook provides a method for continuously monitoring the status of your HPC jobs. The notebook cell below executes a loop that refreshes its output until the kernel is manually interrupted. It is important to choose an appropriate polling interval to avoid excessive load on the scheduler.

import asyncio
import os
import subprocess
from IPython.display import display, clear_output

async def poll_command(cmd, interval=10):
    while True:
        # Execute the command and capture output
        proc = await asyncio.create_subprocess_shell(
            cmd,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE
        )
        stdout, stderr = await proc.communicate()
        output = stdout.decode() if stdout else ''
        error = stderr.decode() if stderr else ''
        
        clear_output(wait=True)
        print(f"Command: {cmd}\n")
        print(output)
        if error:
            print("Error:", error)
        
        await asyncio.sleep(interval)  # Non-blocking sleep[3][5]

# Get the current user from the environment
user = os.environ.get('USER')
cmd = f'squeue -u {user}; date'

# Start the polling coroutine
asyncio.ensure_future(poll_command(cmd, interval=10))