Skip to content

Create Processing Job

sagemaker_create_processing_job R Documentation

Creates a processing job

Description

Creates a processing job.

Usage

sagemaker_create_processing_job(ProcessingInputs,
  ProcessingOutputConfig, ProcessingJobName, ProcessingResources,
  StoppingCondition, AppSpecification, Environment, NetworkConfig,
  RoleArn, Tags, ExperimentConfig)

Arguments

ProcessingInputs

An array of inputs configuring the data to download into the processing container.

ProcessingOutputConfig

Output configuration for the processing job.

ProcessingJobName

[required] The name of the processing job. The name must be unique within an Amazon Web Services Region in the Amazon Web Services account.

ProcessingResources

[required] Identifies the resources, ML compute instances, and ML storage volumes to deploy for a processing job. In distributed training, you specify more than one instance.

StoppingCondition

The time limit for how long the processing job is allowed to run.

AppSpecification

[required] Configures the processing job to run a specified Docker container image.

Environment

The environment variables to set in the Docker container. Up to 100 key and values entries in the map are supported.

Do not include any security-sensitive information including account access IDs, secrets, or tokens in any environment fields. As part of the shared responsibility model, you are responsible for any potential exposure, unauthorized access, or compromise of your sensitive data if caused by security-sensitive information included in the request environment variable or plain text fields.

NetworkConfig

Networking options for a processing job, such as whether to allow inbound and outbound network calls to and from processing containers, and the VPC subnets and security groups to use for VPC-enabled processing jobs.

RoleArn

[required] The Amazon Resource Name (ARN) of an IAM role that Amazon SageMaker can assume to perform tasks on your behalf.

Tags

(Optional) An array of key-value pairs. For more information, see Using Cost Allocation Tags in the Amazon Web Services Billing and Cost Management User Guide.

Do not include any security-sensitive information including account access IDs, secrets, or tokens in any tags. As part of the shared responsibility model, you are responsible for any potential exposure, unauthorized access, or compromise of your sensitive data if caused by security-sensitive information included in the request tag variable or plain text fields.

ExperimentConfig

Value

A list with the following syntax:

list(
  ProcessingJobArn = "string"
)

Request syntax

svc$create_processing_job(
  ProcessingInputs = list(
    list(
      InputName = "string",
      AppManaged = TRUE|FALSE,
      S3Input = list(
        S3Uri = "string",
        LocalPath = "string",
        S3DataType = "ManifestFile"|"S3Prefix",
        S3InputMode = "Pipe"|"File",
        S3DataDistributionType = "FullyReplicated"|"ShardedByS3Key",
        S3CompressionType = "None"|"Gzip"
      ),
      DatasetDefinition = list(
        AthenaDatasetDefinition = list(
          Catalog = "string",
          Database = "string",
          QueryString = "string",
          WorkGroup = "string",
          OutputS3Uri = "string",
          KmsKeyId = "string",
          OutputFormat = "PARQUET"|"ORC"|"AVRO"|"JSON"|"TEXTFILE",
          OutputCompression = "GZIP"|"SNAPPY"|"ZLIB"
        ),
        RedshiftDatasetDefinition = list(
          ClusterId = "string",
          Database = "string",
          DbUser = "string",
          QueryString = "string",
          ClusterRoleArn = "string",
          OutputS3Uri = "string",
          KmsKeyId = "string",
          OutputFormat = "PARQUET"|"CSV",
          OutputCompression = "None"|"GZIP"|"BZIP2"|"ZSTD"|"SNAPPY"
        ),
        LocalPath = "string",
        DataDistributionType = "FullyReplicated"|"ShardedByS3Key",
        InputMode = "Pipe"|"File"
      )
    )
  ),
  ProcessingOutputConfig = list(
    Outputs = list(
      list(
        OutputName = "string",
        S3Output = list(
          S3Uri = "string",
          LocalPath = "string",
          S3UploadMode = "Continuous"|"EndOfJob"
        ),
        FeatureStoreOutput = list(
          FeatureGroupName = "string"
        ),
        AppManaged = TRUE|FALSE
      )
    ),
    KmsKeyId = "string"
  ),
  ProcessingJobName = "string",
  ProcessingResources = list(
    ClusterConfig = list(
      InstanceCount = 123,
      InstanceType = "ml.t3.medium"|"ml.t3.large"|"ml.t3.xlarge"|"ml.t3.2xlarge"|"ml.m4.xlarge"|"ml.m4.2xlarge"|"ml.m4.4xlarge"|"ml.m4.10xlarge"|"ml.m4.16xlarge"|"ml.c4.xlarge"|"ml.c4.2xlarge"|"ml.c4.4xlarge"|"ml.c4.8xlarge"|"ml.p2.xlarge"|"ml.p2.8xlarge"|"ml.p2.16xlarge"|"ml.p3.2xlarge"|"ml.p3.8xlarge"|"ml.p3.16xlarge"|"ml.c5.xlarge"|"ml.c5.2xlarge"|"ml.c5.4xlarge"|"ml.c5.9xlarge"|"ml.c5.18xlarge"|"ml.m5.large"|"ml.m5.xlarge"|"ml.m5.2xlarge"|"ml.m5.4xlarge"|"ml.m5.12xlarge"|"ml.m5.24xlarge"|"ml.r5.large"|"ml.r5.xlarge"|"ml.r5.2xlarge"|"ml.r5.4xlarge"|"ml.r5.8xlarge"|"ml.r5.12xlarge"|"ml.r5.16xlarge"|"ml.r5.24xlarge"|"ml.g4dn.xlarge"|"ml.g4dn.2xlarge"|"ml.g4dn.4xlarge"|"ml.g4dn.8xlarge"|"ml.g4dn.12xlarge"|"ml.g4dn.16xlarge"|"ml.g5.xlarge"|"ml.g5.2xlarge"|"ml.g5.4xlarge"|"ml.g5.8xlarge"|"ml.g5.16xlarge"|"ml.g5.12xlarge"|"ml.g5.24xlarge"|"ml.g5.48xlarge"|"ml.r5d.large"|"ml.r5d.xlarge"|"ml.r5d.2xlarge"|"ml.r5d.4xlarge"|"ml.r5d.8xlarge"|"ml.r5d.12xlarge"|"ml.r5d.16xlarge"|"ml.r5d.24xlarge"|"ml.g6.xlarge"|"ml.g6.2xlarge"|"ml.g6.4xlarge"|"ml.g6.8xlarge"|"ml.g6.12xlarge"|"ml.g6.16xlarge"|"ml.g6.24xlarge"|"ml.g6.48xlarge"|"ml.g6e.xlarge"|"ml.g6e.2xlarge"|"ml.g6e.4xlarge"|"ml.g6e.8xlarge"|"ml.g6e.12xlarge"|"ml.g6e.16xlarge"|"ml.g6e.24xlarge"|"ml.g6e.48xlarge"|"ml.m6i.large"|"ml.m6i.xlarge"|"ml.m6i.2xlarge"|"ml.m6i.4xlarge"|"ml.m6i.8xlarge"|"ml.m6i.12xlarge"|"ml.m6i.16xlarge"|"ml.m6i.24xlarge"|"ml.m6i.32xlarge"|"ml.c6i.xlarge"|"ml.c6i.2xlarge"|"ml.c6i.4xlarge"|"ml.c6i.8xlarge"|"ml.c6i.12xlarge"|"ml.c6i.16xlarge"|"ml.c6i.24xlarge"|"ml.c6i.32xlarge",
      VolumeSizeInGB = 123,
      VolumeKmsKeyId = "string"
    )
  ),
  StoppingCondition = list(
    MaxRuntimeInSeconds = 123
  ),
  AppSpecification = list(
    ImageUri = "string",
    ContainerEntrypoint = list(
      "string"
    ),
    ContainerArguments = list(
      "string"
    )
  ),
  Environment = list(
    "string"
  ),
  NetworkConfig = list(
    EnableInterContainerTrafficEncryption = TRUE|FALSE,
    EnableNetworkIsolation = TRUE|FALSE,
    VpcConfig = list(
      SecurityGroupIds = list(
        "string"
      ),
      Subnets = list(
        "string"
      )
    )
  ),
  RoleArn = "string",
  Tags = list(
    list(
      Key = "string",
      Value = "string"
    )
  ),
  ExperimentConfig = list(
    ExperimentName = "string",
    TrialName = "string",
    TrialComponentDisplayName = "string",
    RunName = "string"
  )
)