# main.tf
provider "aws" {
region = var.region
}
# IAM Role for Glue
resource "aws_iam_role" "glue_role" {
name = "glue-service-role"
assume_role_policy = jsonencode({
Version = "2012-10-17",
Statement = [{
Action = "sts:AssumeRole",
Principal = {
Service = "glue.amazonaws.com"
},
Effect = "Allow",
Sid = ""
}]
})
}
resource "aws_iam_role_policy_attachment" "glue_policy" {
role = aws_iam_role.glue_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
}
resource "aws_glue_catalog_database" "glue_db" {
name = var.glue_db
}
resource "aws_glue_crawler" "crawler" {
name = var.crawler_name
role = aws_iam_role.glue_role.arn
database_name = aws_glue_catalog_database.glue_db.name
s3_target {
path = "s3://${var.bucket_name}/raw/"
}
table_prefix = "raw_"
}
resource "aws_glue_job" "transform" {
name = var.glue_job_name
role_arn = aws_iam_role.glue_role.arn
command {
name = "glueetl"
script_location = "s3://${var.bucket_name}/scripts/transform_script.py"
python_version = "3"
}
glue_version = "4.0"
max_capacity = 2
execution_property {
max_concurrent_runs = 1
}
}
# Glue Trigger to run job every 24 hours
resource "aws_glue_trigger" "daily_trigger" {
name = "daily-trigger"
type = "SCHEDULED"
schedule = "cron(0 0 * * ? *)" # every 24 hours UTC
actions {
job_name = aws_glue_job.transform.name
}
enabled = true
}
# IAM Role for Step Functions
resource "aws_iam_role" "step_function_role" {
name = "step-function-glue-role"
assume_role_policy = jsonencode({
Version = "2012-10-17",
Statement = [{
Effect = "Allow",
Principal = {
Service = "states.amazonaws.com"
},
Action = "sts:AssumeRole"
}]
})
}
resource "aws_iam_role_policy" "step_function_policy" {
name = "step-function-policy"
role = aws_iam_role.step_function_role.id
policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Effect = "Allow",
Action = [
"glue:GetCrawler",
"glue:StartCrawler",
"glue:StartJobRun",
"lambda:InvokeFunction",
"states:StartExecution"
],
Resource = "*"
}
]
})
}
resource "aws_lambda_function" "notify_lambda" {
function_name = "notify-complete-lambda"
role = aws_iam_role.step_function_role.arn
handler = "index.lambda_handler"
runtime = "python3.9"
filename = "lambda_function_payload.zip"
source_code_hash = filebase64sha256("lambda_function_payload.zip")
}
resource "aws_sfn_state_machine" "glue_workflow" {
name = "glue-etl-workflow"
role_arn = aws_iam_role.step_function_role.arn
definition = jsonencode({
Comment = "ETL with Glue and Lambda",
StartAt = "StartCrawler",
States = {
StartCrawler = {
Type = "Task",
Resource = "arn:aws:states:::aws-sdk:glue:startCrawler",
Parameters = {
Name = var.crawler_name
},
Next = "Wait30Seconds"
},
Wait30Seconds = {
Type = "Wait",
Seconds = 30,
Next = "GetCrawlerStatus"
},
GetCrawlerStatus = {
Type = "Task",
Resource = "arn:aws:states:::aws-sdk:glue:getCrawler",
Parameters = {
Name = var.crawler_name
},
Next = "CheckCrawlerState"
},
CheckCrawlerState = {
Type = "Choice",
Choices = [
{
Variable = "$.Crawler.State",
StringEquals = "READY",
Next = "RunTransformJob"
}
],
Default = "Wait30Seconds"
},
RunTransformJob = {
Type = "Task",
Resource = "arn:aws:states:::glue:startJobRun.sync",
Parameters = {
JobName = var.glue_job_name
},
Next = "NotifyLambda"
},
NotifyLambda = {
Type = "Task",
Resource = "arn:aws:states:::lambda:invoke",
Parameters = {
FunctionName = aws_lambda_function.notify_lambda.arn,
Payload = {
job = var.glue_job_name,
status = "SUCCESS"
}
},
End = true
}
}
})
}
# EventBridge Rule to trigger Step Function daily
resource "aws_cloudwatch_event_rule" "daily_etl_rule" {
name = "daily-etl-schedule"
schedule_expression = "cron(0 0 * * ? *)"
}
resource "aws_cloudwatch_event_target" "etl_target" {
rule = aws_cloudwatch_event_rule.daily_etl_rule.name
target_id = "StepFunctionTrigger"
arn = aws_sfn_state_machine.glue_workflow.arn
role_arn = aws_iam_role.step_function_role.arn
}
# Upload these:
# - glue_job/transform_script.py to s3://${var.bucket_name}/scripts/
# - Lambda zip as lambda_function_payload.zip
No comments:
Post a Comment