Source code for sagemaker.core.remote_function.errors
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""Definitions for reomote job errors and error handling"""
from __future__ import absolute_import
import os
from tblib import pickling_support
from sagemaker.core.s3 import s3_path_join
import sagemaker.core.remote_function.core.serialization as serialization
DEFAULT_FAILURE_CODE = 1
FAILURE_REASON_PATH = "/opt/ml/output/failure"
[docs]
@pickling_support.install
class RemoteFunctionError(Exception):
"""The base exception class for remote function exceptions"""
def __init__(self, message):
self.message = message
super().__init__(self.message)
[docs]
@pickling_support.install
class ServiceError(RemoteFunctionError):
"""Raised when errors encountered during interaction with SageMaker, S3 service APIs"""
[docs]
@pickling_support.install
class SerializationError(RemoteFunctionError):
"""Raised when errors encountered during serialization of remote function objects"""
[docs]
@pickling_support.install
class DeserializationError(RemoteFunctionError):
"""Raised when errors encountered during deserialization of remote function objects"""
def _get_valid_failure_exit_code(exit_code) -> int:
"""Normalize exit code for terminating the process"""
try:
valid_exit_code = int(exit_code)
except (TypeError, ValueError):
valid_exit_code = DEFAULT_FAILURE_CODE
return valid_exit_code
def _write_failure_reason_file(failure_msg):
"""Create a file 'failure' with failure reason written if remote function execution failed.
See: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-training-algo.html
Args:
failure_msg: The content of file to be written.
"""
if not os.path.exists(FAILURE_REASON_PATH):
with open(FAILURE_REASON_PATH, "w") as f:
f.write(failure_msg)
[docs]
def handle_error(error, sagemaker_session, s3_base_uri, s3_kms_key) -> int:
"""Handle all exceptions raised during remote function execution.
Args:
error (Exception): The error to be handled.
sagemaker_session (sagemaker.core.helper.session.Session): The underlying Boto3 session which
AWS service calls are delegated to.
s3_base_uri (str): S3 root uri to which resulting serialized exception will be uploaded.
s3_kms_key (str): KMS key used to encrypt artifacts uploaded to S3.
Returns :
exit_code (int): Exit code to terminate current job.
"""
failure_reason = repr(error)
if isinstance(error, RemoteFunctionError):
exit_code = DEFAULT_FAILURE_CODE
else:
error_number = getattr(error, "errno", DEFAULT_FAILURE_CODE)
exit_code = _get_valid_failure_exit_code(error_number)
_write_failure_reason_file(failure_reason)
serialization.serialize_exception_to_s3(
exc=error,
sagemaker_session=sagemaker_session,
s3_uri=s3_path_join(s3_base_uri, "exception"),
s3_kms_key=s3_kms_key,
)
return exit_code