Job History#
by Silvia Mazzoni, DesignSafe, 2025
Using the job ID (uuid), you can access detailed metadata on a job via Tapis’s getJobHistory method.
The command is used to retrieve detailed information about each stage of a job.
You can retrieve information about the duration of each stage.
We are going to use a python function for all this, it collects the output
Using local utilities library
Connect to Tapis#
t=OpsUtils.connect_tapis()
-- Checking Tapis token --
Token loaded from file. Token is still valid!
Token expires at: 2025-08-21T02:49:32+00:00
Token expires in: 3:39:29.216361
-- LOG IN SUCCESSFUL! --
Get Job History from Tapis#
Purpose: Get a timestamped history of job status changes.
This is valuable data because it echoes all stages of your job
get_tapis_job_history_data.py
# ../OpsUtils/OpsUtils/Tapis/get_tapis_job_history_data.py
def get_tapis_job_history_data(
t,
jobUuid: str,
print_out: bool = True,
return_data: bool = False,
get_job_error_message: bool = False,
):
"""
Retrieve and summarize a TACC/Tapis job's history, including step durations,
data-transfer metrics, and any job error messages. Optionally prints a
readable summary (Jupyter-friendly if ipywidgets is available) and/or
returns structured data for further analysis.
What it does
------------
- Calls `t.jobs.getJobHistory(jobUuid=...)`.
- Computes time spent in each job status (e.g., QUEUED, RUNNING).
- Extracts transfer metrics for input/archive staging events.
- Collects JOB_ERROR_MESSAGE entries (if present).
- Prints accordion-style sections in Jupyter (if ipywidgets is available),
otherwise prints plain text sections.
- Optionally returns structured dictionaries.
Parameters
----------
t : tapipy.tapis.Tapis
Authenticated Tapis client.
jobUuid : str
Job UUID to inspect.
print_out : bool, default True
If True, print summaries (accordion in Jupyter, plain text otherwise).
return_data : bool, default False
If True, return structured data dictionaries (see Returns).
get_job_error_message : bool, default False
If True, ensure error messages section prints (if any are found).
Returns
-------
dict | int | None
- When `return_data=True`, returns:
{
"StepsMetricsDict": { "created": {...}, "duration": {..., "TOTAL": seconds} },
"DataTransfersDict": { : {metrics...}, ... },
"JobHistory": ,
"JobErrorList": [ {event, created, eventDetail, jobStatus, message}, ... ]
}
- If an API error occurs (fetching history), returns `-1`.
- Otherwise returns `None`.
Example
-------
# Print to screen and get data back:
info = get_tapis_job_history_data(t, jobUuid, print_out=True, return_data=True)
print(info["StepsMetricsDict"]["duration"].get("RUNNING", 0))
Author
------
Silvia Mazzoni, DesignSafe (silviamazzoni@yahoo.com)
Date
----
2025-08-14
Version
-------
1.0
"""
# Silvia Mazzoni, 2025
from datetime import datetime, timezone
from OpsUtils import OpsUtils
# --- Helpers -------------------------------------------------------------
def _parse_tacc_time(ts: str) -> float:
"""
Parse Tapis/TACC ISO timestamps to a Unix epoch (float seconds).
Accepts microseconds or no microseconds; 'Z' treated as UTC.
"""
if not ts:
return 0.0
s = ts.strip().replace("Z", "+00:00")
try:
# Try with microseconds first
dt = datetime.fromisoformat(s)
except ValueError:
# Fallbacks if some variants sneak in; best-effort
try:
# Remove fractional seconds if present and retry
if "." in s:
s2 = s.split(".", 1)[0] + "+00:00"
dt = datetime.fromisoformat(s2)
else:
raise
except Exception:
# Return 0.0 if we cannot parse
return 0.0
# Make sure it's timezone-aware UTC
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.timestamp()
def _get_job_history(t, jobUuid):
try:
return t.jobs.getJobHistory(jobUuid=jobUuid)
except Exception as e:
print(e)
return -1
# --- Fetch history -------------------------------------------------------
JobHistory = _get_job_history(t, jobUuid)
if JobHistory == -1:
return -1
# --- Print toggles -------------------------------------------------------
printStepDurations = bool(print_out)
printInput = bool(print_out)
printAllSteps = bool(print_out)
printLastStep = bool(print_out)
printJobErrorMessage = bool(print_out) or bool(get_job_error_message)
# --- Accumulators --------------------------------------------------------
STATdur = [
"\n++++++++++++++++++++++++++++++++++++++++++++++++++",
"+ STEP DURATION +",
"++++++++++++++++++++++++++++++++++++++++++++++++++",
]
INPUTdur = []
AllStepList = [
"\n++++++++++++++++++++++++++++++++++++++++++++++++++",
"+ STEP DETAILS +",
"++++++++++++++++++++++++++++++++++++++++++++++++++",
]
LastStepList = [
"\n++++++++++++++++++++++++++++++++++++++++++++++++++",
"+ LAST-STEP DETAILS +",
"++++++++++++++++++++++++++++++++++++++++++++++++++",
]
totalT = 0.0
stepDict = {"created": {}, "duration": {}}
transfersDict = {}
JobErrorList = []
# --- Iterate through history --------------------------------------------
NHistoryLines = len(JobHistory)
prev_created_ts = 0.0
prev_status = None
prev_created_str = None
for idx, thisHistoryLine in enumerate(JobHistory):
AllStepList.append(f"Step {idx+1} of {NHistoryLines}")
AllStepList.append("-" * 40)
if idx == NHistoryLines - 1:
LastStepList.append(f"Step {idx+1} of {NHistoryLines}")
LastStepList.append("-" * 40)
Hdict = thisHistoryLine.__dict__
flat_hist = OpsUtils.flatten_dict(Hdict)
event = flat_hist.get("event", "")
created = flat_hist.get("created", "")
created_ts = _parse_tacc_time(created)
eventDetail = flat_hist.get("eventDetail", "")
# Track status durations
if event == "JOB_NEW_STATUS":
new_status = flat_hist.get("description.newJobStatus", "")
old_status = flat_hist.get("description.oldJobStatus", "")
if prev_created_ts > 0 and prev_status:
dTime = round(created_ts - prev_created_ts, 1)
pad = " " * max(1, 20 - len(prev_status))
STATdur.append(f" {prev_status}:{pad} {dTime} sec \t created: {prev_created_str}")
stepDict["created"][prev_status] = prev_created_str
stepDict["duration"][prev_status] = dTime
totalT += dTime
# advance previous pointers
prev_created_ts = created_ts
prev_created_str = created
prev_status = new_status
elif event in ("JOB_INPUT_TRANSACTION_ID", "JOB_ARCHIVE_TRANSACTION_ID"):
# Label section by event type
header_label = "INPUT TRANSFER" if event == "JOB_INPUT_TRANSACTION_ID" else "ARCHIVE TRANSFER"
INPUTdur.append(f"\n----------------------\n {header_label}\n----------------------")
INPUTdur.append(" ------------- Transfer Summary -------------")
transfersDict[event] = {}
# Summary numbers (if available)
for k in ("estimatedTotalBytes", "totalBytesTransferred", "completeTransfers", "totalTransfers"):
v = flat_hist.get(f"transferSummary.{k}")
pad = " " * max(1, 25 - len(k))
INPUTdur.append(f" {k}:{pad} {v}")
transfersDict[event][k] = v
# Durations
INPUTdur.append(" ------------- Transfer Duration -------------")
created_t = _parse_tacc_time(flat_hist.get("transferSummary.created", ""))
start_t = _parse_tacc_time(flat_hist.get("transferSummary.startTime", ""))
end_t = _parse_tacc_time(flat_hist.get("transferSummary.endTime", ""))
d1 = round(start_t - created_t, 1) if (start_t and created_t) else None
d2 = round(end_t - start_t, 1) if (end_t and start_t) else None
d3 = round(end_t - created_t, 1) if (end_t and created_t) else None
INPUTdur.append(f" Create-to-TransferStart Duration: {d1} sec")
INPUTdur.append(f" TransferStart-to-TransferEnd Duration: {d2} sec")
INPUTdur.append(f" Create-to-TransferEnd Duration: {d3} sec")
transfersDict[event]["Create-to-TransferStart Duration"] = d1
transfersDict[event]["TransferStart-to-TransferEnd Duration"] = d2
transfersDict[event]["Create-to-TransferEnd Duration"] = d3
elif event == "JOB_ERROR_MESSAGE":
JobErrorDict = {
"event": event,
"created": flat_hist.get("created"),
"eventDetail": flat_hist.get("eventDetail"),
"jobStatus": flat_hist.get("description.jobStatus"),
"message": flat_hist.get("description.message"),
}
JobErrorList.append(JobErrorDict)
# Append all flattened fields (nice for inspection)
for k, n in flat_hist.items():
if n is None or str(n) == "":
continue
line = f"{k:<35} : {n}"
AllStepList.append(line)
if idx == NHistoryLines - 1:
LastStepList.append(line)
AllStepList.append("-" * 40)
if idx == NHistoryLines - 1:
LastStepList.append("-" * 40)
# Finalize totals (note: if job is still RUNNING/QUEUED the last segment is open)
STATdur.append(f" Total Duration: {round(totalT, 1)} sec")
stepDict["duration"]["TOTAL"] = round(totalT, 1)
# --- Printing (Jupyter accordion if available) ---------------------------
def _print_sections_plain():
if printJobErrorMessage and JobErrorList:
print("\n++++++++++++++++++++++++++++")
print("++++++ JOB-ERROR MESSAGE ++++++")
print("++++++++++++++++++++++++++++")
for d in JobErrorList:
for k, v in d.items():
print(f"{k}:\t{v}")
print("------")
if printStepDurations:
for line in STATdur:
print(line)
if printInput and INPUTdur:
print("\n".join(INPUTdur))
if printAllSteps:
print("\n".join(AllStepList))
if printLastStep:
print("\n".join(LastStepList))
def _print_sections_widgets():
import ipywidgets as widgets
from IPython.display import display
history_out = widgets.Output()
acc = widgets.Accordion(children=[history_out])
acc.set_title(0, f"Job History Data ({jobUuid})")
acc.selected_index = 0
display(acc)
with history_out:
print("\n++++++++++++++++++++++++++++")
print("++++++ JOB-HISTORY DATA ++++++")
print("++++++++++++++++++++++++++++++")
print(f"++++++ jobUuid: {jobUuid}")
print("+++++++++++++++++++++++++")
if printJobErrorMessage and JobErrorList:
out = widgets.Output()
acc2 = widgets.Accordion(children=[out])
acc2.set_title(0, "Job ERROR MESSAGE")
acc2.selected_index = 0
with history_out:
display(acc2)
with out:
for d in JobErrorList:
print("\n++++++++++++++++++++++++++++++++++++++++++++++++++")
print("+ JOB_ERROR_MESSAGE +")
print("++++++++++++++++++++++++++++++++++++++++++++++++++")
for k, v in d.items():
print(f"{k}:\t{v}")
print("------")
if printStepDurations:
out = widgets.Output()
acc2 = widgets.Accordion(children=[out])
acc2.set_title(0, "Steps Duration")
with history_out:
display(acc2)
with out:
for line in STATdur:
print(line)
print("++++++++++++++++++++++++++++++++++++++++++++++++++")
if printInput and INPUTdur:
out = widgets.Output()
acc2 = widgets.Accordion(children=[out])
acc2.set_title(0, "Job-Stagings Info")
with history_out:
display(acc2)
with out:
for line in INPUTdur:
print(line)
print("++++++++++++++++++++++++++++++++++++++++++++++++++")
if printAllSteps:
out = widgets.Output()
acc2 = widgets.Accordion(children=[out])
acc2.set_title(0, "ALL Steps Info")
with history_out:
display(acc2)
with out:
for line in AllStepList:
print(line)
print("++++++++++++++++++++++++++++++++++++++++++++++++++")
if printLastStep:
out = widgets.Output()
acc2 = widgets.Accordion(children=[out])
acc2.set_title(0, "Last Step Info")
with history_out:
display(acc2)
with out:
for line in LastStepList:
print(line)
print("++++++++++++++++++++++++++++++++++++++++++++++++++")
if print_out:
try:
_print_sections_widgets()
except Exception:
_print_sections_plain()
# --- Return data ---------------------------------------------------------
if return_data:
return {
"StepsMetricsDict": stepDict,
"DataTransfersDict": transfersDict,
"JobHistory": JobHistory,
"JobErrorList": JobErrorList,
}
else:
return
FINISHED JOB#
jobUuid = '4dfa35e1-15cd-48fd-a090-f348544dee1f-007'
OpsUtils.get_tapis_job_history_data(t, jobUuid)
SAME JOB#
print nothing.
JobHistoryData = OpsUtils.get_tapis_job_history_data(t, jobUuid, print_out=False,return_data=True)
FAILED JOB#
jobUuid = '0ee27b36-13aa-4eba-a8d2-7ca52d134cae-007'
OpsUtils.get_tapis_job_history_data(t, jobUuid)
ONE MORE FAILED JOB#
jobUuid = 'fdd58183-173d-4a97-b67f-76467f4e9ec3-007'
OpsUtils.get_tapis_job_history_data(t, jobUuid)
YET ANOTHER FAILED JOB#
jobUuid = '487fa9b9-c73b-48fc-bda8-1a0f47d5d483-007'
OpsUtils.get_tapis_job_history_data(t, jobUuid)
YES ANOTHER FAILED JOB#
jobUuid = 'b35ab3c2-0436-4c98-9066-367b9db67f9c-007'
OpsUtils.get_tapis_job_history_data(t, jobUuid)