resource "aws_emr_cluster" "example" {
name = "emr-transform"
release_label = "emr-6.10.0"
applications = ["Spark"]
service_role = aws_iam_role.emr_service.arn
ec2_attributes {
key_name = "my-key"
instance_profile = aws_iam_instance_profile.emr_profile.arn
}
master_instance_type = "m5.xlarge"
core_instance_type = "m5.xlarge"
core_instance_count = 2
log_uri = "s3://my-bucket/emr-logs/"
auto_terminate = true
step {
name = "Transform CSV to Parquet"
action_on_failure = "CONTINUE"
hadoop_jar_step {
jar = "command-runner.jar"
args = [
"spark-submit",
"--deploy-mode", "cluster",
"s3://my-bucket/scripts/transform_data.py",
"s3://my-bucket/input/",
"s3://my-bucket/output/"
]
}
}
}
No comments:
Post a Comment