SageMaker V3 Hyperparameter Training Example

SageMaker V3 Hyperparameter Training Example#

This notebook demonstrates hyperparameter handling in SageMaker V3 ModelTrainer using JSON and YAML files.

import os
import json
import yaml
import tempfile
import shutil

from sagemaker.train.model_trainer import ModelTrainer
from sagemaker.train.configs import SourceCode
from sagemaker.core.helper.session_helper import Session, get_execution_role
from sagemaker.core import image_uris

Step 1: Setup Session and Create Test Files#

Initialize the SageMaker session and create the hyperparameter files and training script.

sagemaker_session = Session()
role = get_execution_role()
region = sagemaker_session.boto_region_name

# Expected hyperparameters
EXPECTED_HYPERPARAMETERS = {
    "integer": 1,
    "boolean": True,
    "float": 3.14,
    "string": "Hello World",
    "list": [1, 2, 3],
    "dict": {
        "string": "value",
        "integer": 3,
        "float": 3.14,
        "list": [1, 2, 3],
        "dict": {"key": "value"},
        "boolean": True,
    },
}

DEFAULT_CPU_IMAGE = image_uris.retrieve(
    framework="pytorch",
    region=region,
    version="2.0.0",
    py_version="py310",
    instance_type="ml.m5.xlarge",
    image_scope="training"
)

# Create temporary directory
temp_dir = tempfile.mkdtemp()
source_dir = os.path.join(temp_dir, "source")
os.makedirs(source_dir, exist_ok=True)

print(f"Created temporary directory: {temp_dir}")

Step 2: Create Hyperparameter Files and Training Script#

Create JSON and YAML hyperparameter files and a training script that validates them.

# Create JSON hyperparameters file
json_file = os.path.join(source_dir, "hyperparameters.json")
with open(json_file, 'w') as f:
    json.dump(EXPECTED_HYPERPARAMETERS, f, indent=2)

# Create YAML hyperparameters file
yaml_file = os.path.join(source_dir, "hyperparameters.yaml")
with open(yaml_file, 'w') as f:
    yaml.dump(EXPECTED_HYPERPARAMETERS, f, default_flow_style=False, indent=2)

print("Created hyperparameter files")

# Create training script that validates hyperparameters
training_script = '''
import argparse
import json
import os

EXPECTED_HYPERPARAMETERS = {
    "integer": 1,
    "boolean": True,
    "float": 3.14,
    "string": "Hello World",
    "list": [1, 2, 3],
    "dict": {
        "string": "value",
        "integer": 3,
        "float": 3.14,
        "list": [1, 2, 3],
        "dict": {"key": "value"},
        "boolean": True,
    },
}

def parse_args():
    parser = argparse.ArgumentParser(description="Test Hyperparameters")
    parser.add_argument("--string", type=str, required=True)
    parser.add_argument("--integer", type=int, required=True)
    parser.add_argument("--float", type=float, required=True)
    parser.add_argument("--boolean", type=lambda x: json.loads(x), required=True)
    parser.add_argument("--list", type=lambda x: json.loads(x), required=True)
    parser.add_argument("--dict", type=lambda x: json.loads(x), required=True)
    return parser.parse_args()

def main():
    args = parse_args()
    print(f"Received hyperparameters: {args}")

    # Validate hyperparameters
    assert args.string == EXPECTED_HYPERPARAMETERS["string"]
    assert args.integer == EXPECTED_HYPERPARAMETERS["integer"]
    assert args.boolean == EXPECTED_HYPERPARAMETERS["boolean"]
    assert args.float == EXPECTED_HYPERPARAMETERS["float"]
    assert args.list == EXPECTED_HYPERPARAMETERS["list"]
    assert args.dict == EXPECTED_HYPERPARAMETERS["dict"]

    # Validate environment variables
    params = json.loads(os.environ["SM_HPS"])
    print(f"SM_HPS: {params}")
    assert params == EXPECTED_HYPERPARAMETERS

    print("All hyperparameter validations passed!")
    
    # Save results
    model_dir = os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
    os.makedirs(model_dir, exist_ok=True)
    
    results = {"status": "success", "hyperparameters": params}
    with open(os.path.join(model_dir, "results.json"), "w") as f:
        json.dump(results, f, indent=2)

if __name__ == "__main__":
    main()
'''

with open(os.path.join(source_dir, "train.py"), 'w') as f:
    f.write(training_script)

# Create requirements file
with open(os.path.join(source_dir, "requirements.txt"), 'w') as f:
    f.write("omegaconf\n")

print("Created training script and requirements")

Step 3: Training with JSON Hyperparameters#

Train a model using hyperparameters loaded from a JSON file.

source_code = SourceCode(
    source_dir=source_dir,
    requirements="requirements.txt",
    entry_script="train.py",
)

json_trainer = ModelTrainer(
    sagemaker_session=sagemaker_session,
    training_image=DEFAULT_CPU_IMAGE,
    hyperparameters=json_file,
    source_code=source_code,
    base_job_name="hp-contract-hyperparameter-json",
)

print("ModelTrainer created with JSON hyperparameters!")
print(f"Hyperparameters loaded: {json_trainer.hyperparameters}")

# Verify hyperparameters match expected values
assert json_trainer.hyperparameters == EXPECTED_HYPERPARAMETERS
print("✓ JSON hyperparameters match expected values!")

print("Starting training with JSON hyperparameters...")

json_trainer.train()
print(f"JSON hyperparameters training completed: {json_trainer._latest_training_job.training_job_name}")

Step 4: Training with YAML Hyperparameters#

Train a model using hyperparameters loaded from a YAML file.

yaml_trainer = ModelTrainer(
    sagemaker_session=sagemaker_session,
    training_image=DEFAULT_CPU_IMAGE,
    hyperparameters=yaml_file,
    source_code=source_code,
    base_job_name="hp-contract-hyperparameter-yaml",
)

print("ModelTrainer created with YAML hyperparameters!")
print(f"Hyperparameters loaded: {yaml_trainer.hyperparameters}")

# Verify hyperparameters match expected values
assert yaml_trainer.hyperparameters == EXPECTED_HYPERPARAMETERS
print("✓ YAML hyperparameters match expected values!")

print("Starting training with YAML hyperparameters...")

yaml_trainer.train()
print(f"YAML hyperparameters training completed: {yaml_trainer._latest_training_job.training_job_name}")

Step 5: Compare Training Results#

Compare the results from both hyperparameter approaches.

training_jobs = [
    ("JSON File", json_trainer),
    ("YAML File", yaml_trainer)
]

print("Training Job Comparison:")
print("=" * 40)

for approach_name, trainer in training_jobs:
    job_name = trainer._latest_training_job.training_job_name
    model_artifacts = trainer._latest_training_job.model_artifacts
    
    print(f"\n{approach_name}:")
    print(f"  Job Name: {job_name}")
    print(f"  Model Artifacts: {model_artifacts}")
    print(f"  Status: Completed")
    
    # Verify all hyperparameters are identical
    assert trainer.hyperparameters == EXPECTED_HYPERPARAMETERS

print("\n✓ All training jobs completed successfully with identical hyperparameters!")

Step 6: Clean Up#

Clean up temporary files.

try:
    shutil.rmtree(temp_dir)
    print(f"Cleaned up temporary directory: {temp_dir}")
except Exception as e:
    print(f"Could not clean up temp directory: {e}")

print("Cleanup completed!")

Summary#

This notebook demonstrated:

JSON hyperparameters: Loading from JSON files
YAML hyperparameters: Loading from YAML files
Validation: Ensuring loaded hyperparameters match expected values
File-based configuration: Managing hyperparameters as external files

File-based hyperparameters provide better version control, reproducibility, and support for complex nested structures.