Download All Job Output#
Get list of ALL job-output files recursively with OPTIONAL download
by Silvia Mazzoni, DesignSafe, 2025
I have created a single python function that will drill into the job output files recursively and returns both relative and full paths (not absolute, these are paths within tapis).
This function gives the user the option to download the files locally.
In this notebook we will copy the files over to our local project directory within DesignSafe.
Using local utilities library
Connect to Tapis#
Yes, you need to first connect to Tapis, this authenticates you
t=OpsUtils.connect_tapis()
-- Checking Tapis token --
Token loaded from file. Token is still valid!
Token expires at: 2025-08-21T02:49:32+00:00
Token expires in: 3:38:32.074526
-- LOG IN SUCCESSFUL! --
User Input – job id#
jobUuid = '4dfa35e1-15cd-48fd-a090-f348544dee1f-007'
Download Files#
get_tapis_job_all_files.py
# ../OpsUtils/OpsUtils/Tapis/get_tapis_job_all_files.py
def get_tapis_job_all_files(
t, jobUuid,
displayIt=10,
target_dir=False,
overwrite=False,
display_file_content=True
):
"""
Recursively retrieves all output files from a Tapis job, optionally downloading them.
This function connects to the Tapis job output system, traverses the job's complete
output directory structure recursively, and collects:
- Local-style relative file paths (to recreate directory structure on disk),
- Full absolute Tapis paths for direct API use or metadata,
- Raw item objects returned by Tapis (which include size, lastModified, etc.),
- The total file count.
It can also automatically download these files into a local directory, preserving
the folder hierarchy.
Parameters
----------
t : Tapis
An authenticated Tapis client object (typically created with connect_tapis()).
jobUuid : str
The UUID of the Tapis job whose output files you want to inspect or download.
displayIt : bool or int, optional
Controls printed output:
- False or 0: completely silent.
- True or 1: prints all files in all directories.
- int >= 2: prints at most `displayIt` files per directory,
then indicates suppression.
target_dir : bool, None, or str, optional
Determines whether to download files:
- False or None: does not download files, only lists them.
- True: downloads files into a default directory './OutFiles_{jobUuid}'.
- str: downloads files into the specified local directory.
overwrite : bool, optional
If True, overwrites existing local files. If False (default), skips already
existing files.
Returns
-------
dict
{
'Nfiles': total number of files found,
'LocalPath': list of relative paths (like 'results/output.txt'),
'FullPath': list of absolute Tapis paths (like '/tapis/jobs/v2/...'),
'Items': list of raw Tapis file objects (metadata)
}
Examples
--------
# Just list files, print up to 5 per directory
>>> outputs = get_tapis_job_all_files(t, jobUuid, displayIt=5)
# List all files without printing anything
>>> outputs = get_tapis_job_all_files(t, jobUuid, displayIt=False)
# Download into default './OutFiles_{jobUuid}'
>>> outputs = get_tapis_job_all_files(t, jobUuid, target_dir=True)
# Download into a custom directory, overwriting if needed
>>> outputs = get_tapis_job_all_files(t, jobUuid, target_dir="my_results", overwrite=True)
Notes
-----
- Downloads replicate the Tapis directory structure inside the chosen local folder.
- Use 'LocalPath' and 'FullPath' together to pair local save paths with original remote locations.
- The 'Items' list provides full Tapis metadata for each file, which can be useful for logs.
"""
# Silvia Mazzoni, 2025
import os
import OpsUtils
def view_tapis_file_in_accordion(selected_path):
import ipywidgets as widgets
from IPython.display import display, clear_output
view_select_out = widgets.Output()
view_select_out_acc = widgets.Accordion(children=[view_select_out])
view_select_out_acc.set_title(0, f" View File: {selected_path}")
# view_select_out_acc.selected_index = 0
display(view_select_out_acc)
with view_select_out:
if not os.path.splitext(selected_path)[-1] in ['.zip','.ZIP']:
clear_output()
if selected_path:
local_file = selected_path.split('/')[-1]
# print('selected_path',selected_path)
data = t.jobs.getJobOutputDownload(jobUuid=jobUuid, outputPath=selected_path)
print(f" Viewing: {selected_path}")
textarea = widgets.Textarea(
value=data,
placeholder='',
description='',
disabled=False,
layout=widgets.Layout(width='100%', height='500px')
)
display(textarea)
else:
print(" No output file selected to download.")
else:
print("can't display content")
# normalize displayIt
if isinstance(displayIt, bool):
displayLevel = 1 if displayIt else 0
displayLimit = None
elif isinstance(displayIt, int):
displayLevel = 1
displayLimit = displayIt if displayIt >= 2 else None
else:
displayLevel = 0
displayLimit = None
if displayLevel>=1:
import ipywidgets as widgets
from IPython.display import display, clear_output
from OpsUtils import OpsUtils
filedata_out = widgets.Output()
filedata_accordion = widgets.Accordion(children=[filedata_out])
filedata_accordion.set_title(0, f'Job Filedata ({jobUuid})')
filedata_accordion.selected_index = 0
display(filedata_accordion)
if displayLevel>=1:
with filedata_out:
print('----------------------------')
print(f'JOB: {jobUuid}')
print('----------------------------')
# determine local download dir
if target_dir is True:
download_dir = f"./OutFiles_{jobUuid}"
elif isinstance(target_dir, str):
download_dir = target_dir
else:
download_dir = None # no download
if displayLevel>=1:
if download_dir != None:
with filedata_out:
print('----------------------------')
print(f'TARGET DIR: {download_dir}')
print('----------------------------')
view_direct_out = widgets.Output()
def get_files_recursive(view_direct_out,path=""):
Nfiles = 0
returnFiles = []
returnFilesPath = []
returnItems = []
output_path = path if path else "."
output_items = t.jobs.getJobOutputList(jobUuid=jobUuid, outputPath=output_path)
# split into dirs vs files
output_items_dirs = [item for item in output_items if getattr(item, "type", "") == "dir"]
output_items_files = [item for item in output_items if getattr(item, "type", "") != "dir"]
output_items_ordered = output_items_files + output_items_dirs
printed_count = 0
Nstopp = 0
hereDisplay = True
if displayLevel >= 1:
if len(output_items_files)>0:
firstCase = output_items_files[0].path
dirr = os.path.dirname(firstCase)
with view_direct_out:
print(f' {dirr}')
with view_direct_out:
print(f' {len(output_items_files)} files & {len(output_items_dirs)} directories:')
print(f' {len(output_items_files)} files & {len(output_items_dirs)} directories')
for item in output_items_ordered:
remote_path = os.path.join(path, item.name) if path else item.name
if getattr(item, "type", "") == "dir":
if displayLevel >= 1:
# print('----------------------------')
# print(f'DIRECTORY: {remote_path}')
# print(f'DIRECTORY: {remote_path}\n{item.path}')
view_direct_out = widgets.Output()
view_direct_out_acc = widgets.Accordion(children=[view_direct_out])
view_direct_out_acc.set_title(0, f"DIRECTORY: {remote_path}")
# view_direct_out_acc.selected_index = 0
display(view_direct_out_acc)
Nhere, hereFiles, hereFilesPath, hereItems = get_files_recursive(view_direct_out,remote_path)
Nfiles += Nhere
returnFiles.extend(hereFiles)
returnFilesPath.extend(hereFilesPath)
returnItems.extend(hereItems)
else:
returnFiles.append(remote_path)
returnFilesPath.append(item.path)
returnItems.append(item)
Nfiles += 1
# print tree
if displayLevel >= 1 and (displayLimit is None or printed_count < displayLimit):
with view_direct_out:
if not download_dir:
if display_file_content:
view_tapis_file_in_accordion(remote_path)
else:
print(f' FILE: {remote_path}')
printed_count += 1
if displayLimit is not None and printed_count == displayLimit:
Nstopp = Nfiles
# download if needed
if download_dir:
# print('download_dir',download_dir)
# print('remote_path',remote_path)
local_file_path = os.path.join(download_dir, remote_path)
homePath = os.path.expanduser('~')
local_file_path = os.path.join(homePath, local_file_path)
local_dir = os.path.dirname(local_file_path)
# print('local_file_path',local_file_path)
# print('local_dir',local_dir)
os.makedirs(local_dir, exist_ok=True)
if os.path.exists(local_file_path) and not overwrite:
if hereDisplay:
print(f" [SKIP] {local_file_path} (already exists)")
continue
if hereDisplay:
print(f" [DOWNLOADING] {remote_path} -> {local_file_path}")
data = t.jobs.getJobOutputDownload(jobUuid=jobUuid, outputPath=remote_path)
with open(local_file_path, "wb") as f:
f.write(data)
if displayLevel >= 1 and hereDisplay and Nstopp != 0:
print(f'\n ........(suppressing additional-file display beyond {displayLimit})')
hereDisplay = False
return Nfiles, returnFiles, returnFilesPath, returnItems
if displayIt:
with filedata_out:
print('----------------------------')
print('DIRECTORY: "."')
view_direct_out = widgets.Output()
view_direct_out_acc = widgets.Accordion(children=[view_direct_out])
view_direct_out_acc.set_title(0, f'DIRECTORY: "."')
# view_direct_out_acc.selected_index = 0
display(view_direct_out_acc)
Nfiles, FileList, FilesPathList, itemsList = get_files_recursive(view_direct_out)
else:
Nfiles, FileList, FilesPathList, itemsList = get_files_recursive(view_direct_out)
if displayIt:
with filedata_out:
print(f"\nA total of {Nfiles} job-output files have been found"
f"{' and downloaded' if download_dir else ''}"
"!")
return {
'Nfiles': Nfiles,
'LocalPath': FileList,
'FullPath': FilesPathList,
'Items': itemsList
}
projectDir = os.path.expanduser('~/MyData/tmp_removeME')
if os.path.exists(projectDir):
print(f"The path exists: {projectDir}")
else:
print(f"Path does not exist: {projectDir}")
os.makedirs(projectDir)
print(f"Create directory: {projectDir}")
Path does not exist: /home/jupyter/MyData/tmp_removeME
Create directory: /home/jupyter/MyData/tmp_removeME
TapisJobFilesDict = OpsUtils.get_tapis_job_all_files(t, jobUuid, displayIt=10, target_dir=projectDir, overwrite=True)
List files in new location#
AllContents = os.listdir(projectDir)
print(f"\n Get contents using: os.listdir('{projectDir}'): {AllContents}")
Get contents using: os.listdir('/home/jupyter/MyData/tmp_removeME'): ['opensees.zip', 'tapisjob.env', 'tapisjob.out', 'tapisjob.sh', 'tapisjob_app.sh', '.ipynb_checkpoints', 'inputDirectory']
List files recursively#
get_files_recursive.py
# ../OpsUtils/OpsUtils/Misc/get_files_recursive.py
def get_files_recursive(path: str = "", displayIt=10, returnItems: bool = False, displayLevel: int = 0):
"""
Recursively list files under a directory with optional on-screen display and
a structured return payload.
Features
--------
- Recursively walks `path`, listing files before directories at each level.
- Optional display of file paths with a limit per directory level.
* If `displayIt` is `True`, displays all files.
* If `displayIt` is `False`, displays nothing.
* If `displayIt` is an `int >= 2`, displays up to that many files per directory; prints a suppression note afterward.
- Returns counts and paths when `returnItems=True`.
Parameters
----------
path : str, default=""
Directory to traverse. Empty string means current working directory (`"."`).
displayIt : bool | int, default=10
Controls on-screen printing:
* `True` -> print everything
* `False` -> print nothing
* `int` (>=2) -> print up to that many files per directory level
* `int` (0 or 1) -> treated as no limit (prints everything at that level)
returnItems : bool, default=False
If `True`, return a dict with counts and path lists (see Returns).
displayLevel : int, default=0
Internal recursion depth; callers generally leave this at default.
Returns
-------
dict | None
If `returnItems=True`, returns:
{
'Nfiles': , # total number of files found
'LocalPath': , # relative paths from `path`
'FullPath': , # absolute file paths
'Items': # basenames of files
}
Otherwise returns `None`.
Notes
-----
- Skips directories named `.ipynb_checkpoints`.
- Files are displayed before directories at each level for readability.
- Paths in `LocalPath` are relative to the input `path` (or "." if empty).
Example
-------
# Print up to 10 files per directory and also capture the results:
results = get_files_recursive("data", displayIt=10, returnItems=True)
print("Total files:", results['Nfiles'])
Author
------
Silvia Mazzoni, DesignSafe (silviamazzoni@yahoo.com)
Date
----
2025-08-14
Version
-------
1.0
"""
import os
# Interpret displayIt
if isinstance(displayIt, bool):
if displayLevel == 0:
displayLevel = 1 if displayIt else 0
displayLimit = None # no per-dir limit; print all/none based on displayLevel
elif isinstance(displayIt, int):
if displayLevel == 0:
displayLevel = 1 # enable display at top if an int was provided
displayLimit = displayIt if displayIt >= 2 else None
else:
displayLimit = None
# Print directory header at this level if enabled
if displayLevel == 1:
print('----------------------------')
print(f'\nDIRECTORY: {path if path else "."}')
# Prepare accumulators (use distinct names; do not overwrite the flag)
Nfiles = 0
local_paths = []
full_paths = []
item_names = []
# Resolve the output path
root = path if path else "."
try:
entries = os.listdir(root)
except FileNotFoundError:
if displayLevel == 1:
print(f' [Error] Directory not found: {root}')
return {'Nfiles': 0, 'LocalPath': [], 'FullPath': [], 'Items': []} if returnItems else None
except PermissionError:
if displayLevel == 1:
print(f' [Error] Permission denied: {root}')
return {'Nfiles': 0, 'LocalPath': [], 'FullPath': [], 'Items': []} if returnItems else None
# Split into files vs directories (files first in display)
dirs = [e for e in entries if os.path.isdir(os.path.join(root, e))]
files = [e for e in entries if not os.path.isdir(os.path.join(root, e))]
ordered = files + dirs
if displayLevel == 1:
print(f' {len(files)} files & {len(dirs)} directories:')
printed_count = 0
suppressed_note_shown = False
for name in ordered:
if name == '.ipynb_checkpoints':
continue
local = name if not path else os.path.join(path, name)
full = os.path.abspath(os.path.join(root, name))
if os.path.isdir(full):
# Print directory header for subdir when displaying
if displayLevel == 1:
print('----------------------------')
print(f'\nDIRECTORY: {local}')
# Recurse
ret = get_files_recursive(local, displayIt=displayIt, returnItems=True, displayLevel=displayLevel + 1)
Nfiles += ret['Nfiles']
local_paths.extend(ret['LocalPath'])
full_paths.extend(ret['FullPath'])
item_names.extend(ret['Items'])
else:
# Record file
Nfiles += 1
local_paths.append(local)
full_paths.append(full)
item_names.append(name)
# Conditional display
if displayLevel == 1:
if displayLimit is None or printed_count < displayLimit:
print(f' FILE: {local}')
printed_count += 1
if displayLimit is not None and printed_count == displayLimit:
# next files will be suppressed
pass
elif not suppressed_note_shown:
print(f'\n ........(suppressing additional-file display beyond {displayLimit})')
suppressed_note_shown = True
if returnItems:
return {
'Nfiles': Nfiles,
'LocalPath': local_paths,
'FullPath': full_paths,
'Items': item_names
}
AllFiles = OpsUtils.get_files_recursive(path=projectDir)
----------------------------
DIRECTORY: /home/jupyter/MyData/tmp_removeME
5 files & 2 directories:
FILE: /home/jupyter/MyData/tmp_removeME/opensees.zip
FILE: /home/jupyter/MyData/tmp_removeME/tapisjob.env
FILE: /home/jupyter/MyData/tmp_removeME/tapisjob.out
FILE: /home/jupyter/MyData/tmp_removeME/tapisjob.sh
FILE: /home/jupyter/MyData/tmp_removeME/tapisjob_app.sh
----------------------------
DIRECTORY: /home/jupyter/MyData/tmp_removeME/inputDirectory
Delete directory#
Clean up: this was just a temporary directory
# let's use shutil utility to delete non-empty directory. BE CAREFUL!
import shutil
if os.path.exists(projectDir):
shutil.rmtree(projectDir)
print(f"Deleted directory and all contents: {projectDir}")
else:
print(f"Directory not found: {projectDir}")
Deleted directory and all contents: /home/jupyter/MyData/tmp_removeME