ICON Training - Hands-on Session
Helper script: Batch job monitoring#
This Jupyter notebook provides a method for continuously monitoring the status of your HPC jobs. The notebook cell below executes a loop that refreshes its output until the kernel is manually interrupted. It is important to choose an appropriate polling interval to avoid excessive load on the scheduler.
import asyncio
import os
import subprocess
from IPython.display import display, clear_output
async def poll_command(cmd, interval=10):
while True:
# Execute the command and capture output
proc = await asyncio.create_subprocess_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
output = stdout.decode() if stdout else ''
error = stderr.decode() if stderr else ''
clear_output(wait=True)
print(f"Command: {cmd}\n")
print(output)
if error:
print("Error:", error)
await asyncio.sleep(interval) # Non-blocking sleep[3][5]
# Get the current user from the environment
user = os.environ.get('USER')
cmd = f'squeue -u {user}; date'
# Start the polling coroutine
asyncio.ensure_future(poll_command(cmd, interval=10))