SageMaker V3 Hyperparameter Training Example#
This notebook demonstrates hyperparameter handling in SageMaker V3 ModelTrainer using JSON and YAML files.
import os
import json
import yaml
import tempfile
import shutil
from sagemaker.train.model_trainer import ModelTrainer
from sagemaker.train.configs import SourceCode
from sagemaker.core.helper.session_helper import Session, get_execution_role
from sagemaker.core import image_uris
Step 1: Setup Session and Create Test Files#
Initialize the SageMaker session and create the hyperparameter files and training script.
sagemaker_session = Session()
role = get_execution_role()
region = sagemaker_session.boto_region_name
# Expected hyperparameters
EXPECTED_HYPERPARAMETERS = {
"integer": 1,
"boolean": True,
"float": 3.14,
"string": "Hello World",
"list": [1, 2, 3],
"dict": {
"string": "value",
"integer": 3,
"float": 3.14,
"list": [1, 2, 3],
"dict": {"key": "value"},
"boolean": True,
},
}
DEFAULT_CPU_IMAGE = image_uris.retrieve(
framework="pytorch",
region=region,
version="2.0.0",
py_version="py310",
instance_type="ml.m5.xlarge",
image_scope="training"
)
# Create temporary directory
temp_dir = tempfile.mkdtemp()
source_dir = os.path.join(temp_dir, "source")
os.makedirs(source_dir, exist_ok=True)
print(f"Created temporary directory: {temp_dir}")
Step 2: Create Hyperparameter Files and Training Script#
Create JSON and YAML hyperparameter files and a training script that validates them.
# Create JSON hyperparameters file
json_file = os.path.join(source_dir, "hyperparameters.json")
with open(json_file, 'w') as f:
json.dump(EXPECTED_HYPERPARAMETERS, f, indent=2)
# Create YAML hyperparameters file
yaml_file = os.path.join(source_dir, "hyperparameters.yaml")
with open(yaml_file, 'w') as f:
yaml.dump(EXPECTED_HYPERPARAMETERS, f, default_flow_style=False, indent=2)
print("Created hyperparameter files")
# Create training script that validates hyperparameters
training_script = '''
import argparse
import json
import os
EXPECTED_HYPERPARAMETERS = {
"integer": 1,
"boolean": True,
"float": 3.14,
"string": "Hello World",
"list": [1, 2, 3],
"dict": {
"string": "value",
"integer": 3,
"float": 3.14,
"list": [1, 2, 3],
"dict": {"key": "value"},
"boolean": True,
},
}
def parse_args():
parser = argparse.ArgumentParser(description="Test Hyperparameters")
parser.add_argument("--string", type=str, required=True)
parser.add_argument("--integer", type=int, required=True)
parser.add_argument("--float", type=float, required=True)
parser.add_argument("--boolean", type=lambda x: json.loads(x), required=True)
parser.add_argument("--list", type=lambda x: json.loads(x), required=True)
parser.add_argument("--dict", type=lambda x: json.loads(x), required=True)
return parser.parse_args()
def main():
args = parse_args()
print(f"Received hyperparameters: {args}")
# Validate hyperparameters
assert args.string == EXPECTED_HYPERPARAMETERS["string"]
assert args.integer == EXPECTED_HYPERPARAMETERS["integer"]
assert args.boolean == EXPECTED_HYPERPARAMETERS["boolean"]
assert args.float == EXPECTED_HYPERPARAMETERS["float"]
assert args.list == EXPECTED_HYPERPARAMETERS["list"]
assert args.dict == EXPECTED_HYPERPARAMETERS["dict"]
# Validate environment variables
params = json.loads(os.environ["SM_HPS"])
print(f"SM_HPS: {params}")
assert params == EXPECTED_HYPERPARAMETERS
print("All hyperparameter validations passed!")
# Save results
model_dir = os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
os.makedirs(model_dir, exist_ok=True)
results = {"status": "success", "hyperparameters": params}
with open(os.path.join(model_dir, "results.json"), "w") as f:
json.dump(results, f, indent=2)
if __name__ == "__main__":
main()
'''
with open(os.path.join(source_dir, "train.py"), 'w') as f:
f.write(training_script)
# Create requirements file
with open(os.path.join(source_dir, "requirements.txt"), 'w') as f:
f.write("omegaconf\n")
print("Created training script and requirements")
Step 3: Training with JSON Hyperparameters#
Train a model using hyperparameters loaded from a JSON file.
source_code = SourceCode(
source_dir=source_dir,
requirements="requirements.txt",
entry_script="train.py",
)
json_trainer = ModelTrainer(
sagemaker_session=sagemaker_session,
training_image=DEFAULT_CPU_IMAGE,
hyperparameters=json_file,
source_code=source_code,
base_job_name="hp-contract-hyperparameter-json",
)
print("ModelTrainer created with JSON hyperparameters!")
print(f"Hyperparameters loaded: {json_trainer.hyperparameters}")
# Verify hyperparameters match expected values
assert json_trainer.hyperparameters == EXPECTED_HYPERPARAMETERS
print("✓ JSON hyperparameters match expected values!")
print("Starting training with JSON hyperparameters...")
json_trainer.train()
print(f"JSON hyperparameters training completed: {json_trainer._latest_training_job.training_job_name}")
Step 4: Training with YAML Hyperparameters#
Train a model using hyperparameters loaded from a YAML file.
yaml_trainer = ModelTrainer(
sagemaker_session=sagemaker_session,
training_image=DEFAULT_CPU_IMAGE,
hyperparameters=yaml_file,
source_code=source_code,
base_job_name="hp-contract-hyperparameter-yaml",
)
print("ModelTrainer created with YAML hyperparameters!")
print(f"Hyperparameters loaded: {yaml_trainer.hyperparameters}")
# Verify hyperparameters match expected values
assert yaml_trainer.hyperparameters == EXPECTED_HYPERPARAMETERS
print("✓ YAML hyperparameters match expected values!")
print("Starting training with YAML hyperparameters...")
yaml_trainer.train()
print(f"YAML hyperparameters training completed: {yaml_trainer._latest_training_job.training_job_name}")
Step 5: Compare Training Results#
Compare the results from both hyperparameter approaches.
training_jobs = [
("JSON File", json_trainer),
("YAML File", yaml_trainer)
]
print("Training Job Comparison:")
print("=" * 40)
for approach_name, trainer in training_jobs:
job_name = trainer._latest_training_job.training_job_name
model_artifacts = trainer._latest_training_job.model_artifacts
print(f"\n{approach_name}:")
print(f" Job Name: {job_name}")
print(f" Model Artifacts: {model_artifacts}")
print(f" Status: Completed")
# Verify all hyperparameters are identical
assert trainer.hyperparameters == EXPECTED_HYPERPARAMETERS
print("\n✓ All training jobs completed successfully with identical hyperparameters!")
Step 6: Clean Up#
Clean up temporary files.
try:
shutil.rmtree(temp_dir)
print(f"Cleaned up temporary directory: {temp_dir}")
except Exception as e:
print(f"Could not clean up temp directory: {e}")
print("Cleanup completed!")
Summary#
This notebook demonstrated:
JSON hyperparameters: Loading from JSON files
YAML hyperparameters: Loading from YAML files
Validation: Ensuring loaded hyperparameters match expected values
File-based configuration: Managing hyperparameters as external files
File-based hyperparameters provide better version control, reproducibility, and support for complex nested structures.