Skip to content

NIM Containers – Code Snippet Appendix

NIM Containers – Code Snippet Appendix

See the Air-Gap configuration guide.

#!/usr/bin/env python3
"""
Processes a local NIM cache and (optionally) uploads it to S3/MinIO.

## Steps {: #steps }
1. Rename each cached file to its URL-encoded NIM key:
       <model>:<snapshot>?file=<relative/path>
2. Move the renamed files into a staging directory, resolving symlinks so
   everything is self-contained for offline transfer.
3. (Optional) Upload the staged files to S3/MinIO.

## Environment variables {: #environment-variables }
NIM_CACHE_DIR          Path to the local NIM cache.        (required)
NIM_BUCKET_NAME        Destination bucket name.            (required for upload)
AWS_ACCESS_KEY_ID      AWS/MinIO access key.               (required for upload)
AWS_SECRET_ACCESS_KEY  AWS/MinIO secret key.               (required for upload)
AWS_ENDPOINT_URL       Custom endpoint for MinIO.          (optional)
AWS_REGION             Defaults to "us-east-1".            (optional)

## Usage examples {: #usage-examples }
python process_nim_cache.py --process                 [--dest-dir ./nim-model-profiles]
python process_nim_cache.py --upload                  [--dest-dir ./nim-model-profiles] [--insecure]
python process_nim_cache.py --process-and-upload      [--dest-dir ./nim-model-profiles] [--insecure]
"""
import os, argparse, urllib3, urllib.parse, shutil
from pathlib import Path


def process_files(dest_dir="nim-model-profiles"):
    cache = Path(os.getenv("NIM_CACHE_DIR", "")) / "ngc/hub"
    dest = Path(dest_dir)

    if not cache.exists():
        raise SystemExit("NIM_CACHE_DIR is missing or incorrect")

    dest.mkdir(parents=True, exist_ok=True)
    count = 0

    for snap in cache.glob("models--*/snapshots/*"):
        model = snap.parents[1].name[8:].replace("--", "/")
        sid = snap.name

        for f in filter(Path.is_file, snap.rglob("*")):
            # slashes in the path are expected to be encoded twice, where %2F should become %252F
            file_path = f"{f.relative_to(snap)}".replace("/", "%2F")
            key = urllib.parse.quote(f"{model}:{sid}?file={file_path}", safe="")
            tgt = dest / key
            tgt.parent.mkdir(parents=True, exist_ok=True)
            shutil.move(f.resolve(), tgt)
            if f.is_symlink():
                f.unlink()
            count += 1

    print(f"Processed {count} files to {dest}")
    return dest


def s3_client(verify_ssl=True):
    import boto3
    from botocore.config import Config

    ep = os.getenv("AWS_ENDPOINT_URL")
    config_params = {}
    if ep:
        config_params = {"signature_version": "s3v4", "s3": {"addressing_style": "virtual"}}
    config = Config(**config_params)

    return boto3.client("s3", endpoint_url=ep, region_name=os.getenv("AWS_REGION", "us-east-1"),
                        verify=verify_ssl, config=config)


def upload_files(source_dir="nim-model-profiles", verify_ssl=True):
    bucket = os.getenv("NIM_BUCKET_NAME")
    source = Path(source_dir)

    if not bucket:
        raise SystemExit("NIM_BUCKET_NAME must be set")
    if not source.exists():
        raise SystemExit(f"Source directory {source} does not exist")

    s3 = s3_client(verify_ssl)
    files = [f for f in source.rglob("*") if f.is_file()]
    total = len(files)

    print(f"Uploading {total} files to bucket '{bucket}'")

    count = 0
    for file_path in files:
        key = str(file_path.relative_to(source))
        s3.upload_file(str(file_path), bucket, key)
        count += 1
        print(f"Progress: {count}/{total} files")

    print(f"Uploaded {count} files to bucket '{bucket}'")


if __name__ == "__main__":
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    p = argparse.ArgumentParser()
    p.add_argument("--dest-dir", default="nim-model-profiles", help="Staging directory")
    p.add_argument("--insecure", action="store_true", default=False, help="Skip SSL certificates verification")

    action = p.add_mutually_exclusive_group()
    action.add_argument("--process", action="store_true", help="Process files from cache to destination")
    action.add_argument("--upload", action="store_true", help="Upload files from destination to S3/MinIO")
    action.add_argument("--process-and-upload", action="store_true", help="Process and upload in one step")

    args = p.parse_args()

    if not (args.process or args.upload or args.process_and_upload):
        args.process = True

    if args.process or args.process_and_upload:
        process_files(args.dest_dir)
    if args.upload or args.process_and_upload:
        upload_files(args.dest_dir, verify_ssl=not args.insecure)