SageMaker V3 Local Container Mode Example#
This notebook demonstrates how to use SageMaker V3 ModelBuilder in Local Container mode for testing models in Docker containers locally.
# Import required libraries
import json
import uuid
import tempfile
import os
import shutil
import torch
import torch.nn as nn
from sagemaker.serve.model_builder import ModelBuilder
from sagemaker.serve.spec.inference_spec import InferenceSpec
from sagemaker.serve.builder.schema_builder import SchemaBuilder
from sagemaker.serve.utils.types import ModelServer
from sagemaker.serve.mode.function_pointers import Mode
# NOTE: Local mode requires Docker to be installed and running.
# If Docker is not in your system PATH, you may need to define the Docker path in one of the top cells.
# Here is an example:
import os
os.environ['PATH'] = '/usr/local/bin:/Applications/Docker.app/Contents/Resources/bin:' + os.environ['PATH']
Step 1: Create a PyTorch Model#
Create and save a simple PyTorch model for local container testing.
class SimpleModel(nn.Module):
"""Simple PyTorch model for testing."""
def __init__(self):
super().__init__()
self.linear = nn.Linear(4, 2)
def forward(self, x):
return torch.softmax(self.linear(x), dim=1)
def save_pytorch_model(model_path: str):
"""Save PyTorch model for testing."""
model = SimpleModel()
sample_input = torch.tensor([[1.0, 2.0, 3.0, 4.0]], dtype=torch.float32)
traced_model = torch.jit.trace(model, sample_input)
model_file = os.path.join(model_path, "model.pt")
torch.jit.save(traced_model, model_file)
return model_file
# Create temporary model directory and save model
temp_model_path = tempfile.mkdtemp()
model_file = save_pytorch_model(temp_model_path)
print(f"Model saved to: {model_file}")
Step 2: Define PyTorch InferenceSpec#
Create an InferenceSpec that can load and run our PyTorch model.
class PyTorchInferenceSpec(InferenceSpec):
"""PyTorch InferenceSpec for local container mode."""
def __init__(self, model_path=None):
self.model_path = model_path
def prepare(self, model_dir: str):
"""Prepare PyTorch model artifacts."""
if self.model_path:
src_model = os.path.join(self.model_path, "model.pt")
dst_model = os.path.join(model_dir, "model.pt")
if os.path.exists(src_model) and src_model != dst_model:
shutil.copy2(src_model, dst_model)
def load(self, model_dir: str):
"""Load PyTorch model."""
model_path = os.path.join(model_dir, "model.pt")
if os.path.exists(model_path):
model = torch.jit.load(model_path, map_location='cpu')
else:
model = SimpleModel()
model.eval()
return model
def invoke(self, input_object, model):
"""PyTorch inference."""
if isinstance(input_object, dict) and "data" in input_object:
input_data = input_object["data"]
else:
input_data = input_object
if isinstance(input_data, list):
input_tensor = torch.tensor(input_data, dtype=torch.float32)
else:
input_tensor = torch.tensor(input_data.tolist() if hasattr(input_data, 'tolist') else input_data, dtype=torch.float32)
with torch.no_grad():
output = model(input_tensor)
return output.tolist()
print("PyTorch InferenceSpec defined successfully!")
Step 3: Create Schema Builder#
Define the input/output schema for our PyTorch model.
# Create PyTorch schema builder
sample_input = [[1.0, 2.0, 3.0, 4.0]]
sample_output = [[0.6, 0.4]]
schema_builder = SchemaBuilder(sample_input, sample_output)
print("Schema builder created successfully!")
Step 4: Configure ModelBuilder for Local Container Mode#
Set up ModelBuilder to run in LOCAL_CONTAINER mode with Docker.
# Configuration
MODEL_NAME_PREFIX = "pytorch-local"
ENDPOINT_NAME_PREFIX = "pytorch-local"
# Generate unique identifiers
unique_id = str(uuid.uuid4())[:8]
model_name = f"{MODEL_NAME_PREFIX}-{unique_id}"
endpoint_name = f"{ENDPOINT_NAME_PREFIX}-{unique_id}"
# Create ModelBuilder in LOCAL_CONTAINER mode
inference_spec = PyTorchInferenceSpec(model_path=temp_model_path)
model_builder = ModelBuilder(
inference_spec=inference_spec,
model_server=ModelServer.TORCHSERVE,
schema_builder=schema_builder,
mode=Mode.LOCAL_CONTAINER # This enables Docker container mode
)
print(f"ModelBuilder configured for local container model: {model_name}")
print(f"Target endpoint: {endpoint_name}")
print("Note: This will use Docker containers locally!")
Step 5: Build the Model#
Build the model artifacts for containerized deployment.
# Build the model
local_model = model_builder.build(model_name=model_name)
print(f"Model Successfully Created: {local_model.model_name}")
Step 6: Deploy in Local Container#
Deploy the model in a local Docker container. This may take a few minutes to pull the container image and ping the container until it is live. This is a normal part of the deployment process.
# Deploy locally in container mode
print("Starting local container deployment...")
print("Note: This may take a few minutes to pull the Docker image on first run.")
local_endpoint = model_builder.deploy_local(
endpoint_name=endpoint_name,
wait=True,
container_timeout_in_seconds=1200 # 20 minutes timeout
)
print(f"Local Container Endpoint Successfully Created: {endpoint_name}")
print("Container is now running and ready for inference!")
Step 7: Test the Containerized Model#
Send test requests to the model running in the local container.
# Test 1: Single prediction
test_input_1 = [[1.0, 2.0, 3.0, 4.0]]
response_1 = local_endpoint.invoke(
body=json.dumps(test_input_1),
content_type="application/json"
)
response_data_1 = response_1.body.read().decode('utf-8')
parsed_response_1 = json.loads(response_data_1)
print(f"Test 1 - Single prediction: {parsed_response_1}")
# Test 2: Batch prediction
test_input_2 = [
[1.0, 2.0, 3.0, 4.0],
[0.5, 1.5, 2.5, 3.5],
[2.0, 3.0, 4.0, 5.0]
]
response_2 = local_endpoint.invoke(
body=json.dumps(test_input_2),
content_type="application/json"
)
response_data_2 = response_2.body.read().decode('utf-8')
parsed_response_2 = json.loads(response_data_2)
print(f"Test 2 - Batch prediction: {parsed_response_2}")
# Test 3: Edge case - different input ranges
test_input_3 = [[0.1, 0.2, 0.3, 0.4]]
response_3 = local_endpoint.invoke(
body=json.dumps(test_input_3),
content_type="application/json"
)
response_data_3 = response_3.body.read().decode('utf-8')
parsed_response_3 = json.loads(response_data_3)
print(f"Test 3 - Edge case: {parsed_response_3}")
Step 8: Container Information#
Get information about the running container.
# Display container information
print("Container Information:")
print(f"- Endpoint Name: {local_endpoint.endpoint_name}")
print(f"- Model Server: TorchServe")
print(f"- Container Mode: LOCAL_CONTAINER")
print(f"- Model Path: {temp_model_path}")
# You can also check Docker containers running
print("\nTo see the running container, you can run:")
print("docker ps")
Step 9: Clean Up#
Clean up the local container and temporary files.
# Clean up temporary model files
shutil.rmtree(temp_model_path)
print("Temporary model files cleaned up!")
# Note: Local container will be automatically cleaned up when the process ends
print("Local container will be automatically stopped when this notebook session ends.")
print("No AWS resources were created, so no cloud cleanup needed.")
Summary#
This notebook demonstrated:
Creating and saving a PyTorch model
Defining a PyTorch InferenceSpec with prepare(), load(), and invoke() methods
Configuring ModelBuilder for LOCAL_CONTAINER mode
Building and deploying models in local Docker containers
Testing containerized models with various inputs
Proper cleanup of local resources
Benefits of Local Container Mode:#
Container parity: Same environment as SageMaker endpoints
No AWS costs: Runs entirely locally
Realistic testing: Uses actual model serving containers
Debugging friendly: Can inspect container logs and behavior
Dependency isolation: Container handles all dependencies
When to Use Local Container Mode:#
Testing models before deploying to SageMaker
Debugging inference issues
Validating custom inference code
Development with realistic serving environment
CI/CD pipeline testing
Local container mode provides the perfect balance between local development speed and production environment fidelity!