Unboxing AWS DataSync

Andreas Wittig – 23 Dec 2020

Your toolbox should contain AWS DataSync, a service to synchronize data between all kinds of locations. Copy data between S3, EFS, and FSx. On top of that, DataSync works with on-premises locations like NFS, SMB, and more.

Unboxing AWS DataSync

In this week’s video Andreas unboxes AWS DataSync for you, explains the core concepts, and demos how to copy data between S3 and EFS.

Here is the Terraform configuration code, that we used for our example to synchronize data between S3 and EFS. The Terraform configuration creates the following resources:

  • S3 bucket
  • EFS file system
  • DataSync configuration
  • EC2 instance allowing you to access the EFS file system
  • Security Groups for DataSync and EC2
  • IAM roles for Data Sync and EC2

Use the AWS Systems Manager Session Manager to connect with the EC2 instance to mount and inspect the EFS file system.

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.0"
}
}
}

provider "aws" {
region = "eu-west-1"
}

data "aws_vpc" "default" {
default = true
}

data "aws_subnet_ids" "public" {
vpc_id = data.aws_vpc.default.id
}

data "aws_subnet" "selected" {
vpc_id = data.aws_vpc.default.id
id = sort(data.aws_subnet_ids.public.ids)[0]
}

data "aws_ami" "amzn2" {
most_recent = true

filter {
name = "name"
values = ["amzn2-ami-hvm-2.0.*-x86_64-gp2"]
}

filter {
name = "virtualization-type"
values = ["hvm"]
}

owners = ["137112412989"]
}

resource "aws_datasync_location_s3" "demo" {
s3_bucket_arn = aws_s3_bucket.demo.arn
subdirectory = "/"

s3_config {
bucket_access_role_arn = aws_iam_role.datasync.arn
}
}

resource "aws_datasync_location_efs" "demo" {
efs_file_system_arn = aws_efs_file_system.demo.arn

ec2_config {
security_group_arns = [ aws_security_group.datasync.arn ]
subnet_arn = data.aws_subnet.selected.arn
}
}

resource "aws_datasync_task" "demo" {
name = "demo-s3-to-efs"
source_location_arn = aws_datasync_location_s3.demo.arn
destination_location_arn = aws_datasync_location_efs.demo.arn
}


resource "aws_s3_bucket" "demo" {
bucket_prefix = "demo-"
}

resource "aws_efs_file_system" "demo" {
tags = {
Name = "Demo"
}
}

resource "aws_iam_role" "datasync" {
name = "demo-datasync"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "datasync.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_role_policy" "datasync" {
role = aws_iam_role.datasync.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListBucketMultipartUploads",
"s3:HeadBucket"
]
Resource = aws_s3_bucket.demo.arn
},
{
Effect = "Allow"
Action = [
"s3:AbortMultipartUpload",
"s3:DeleteObject",
"s3:GetObject",
"s3:ListMultipartUploadParts",
"s3:GetObjectTagging",
"s3:PutObjectTagging",
"s3:PutObject"
]
Resource = "${aws_s3_bucket.demo.arn}/*"
}
]
})
}

resource "aws_efs_mount_target" "demo" {
for_each = data.aws_subnet_ids.public.ids
file_system_id = aws_efs_file_system.demo.id
subnet_id = each.value
security_groups = [ aws_security_group.efs.id ]
}

resource "aws_security_group" "efs" {
name_prefix = "demo-efs-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "efs_datasync" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.datasync.id
}

resource "aws_security_group_rule" "efs_ec2" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.ec2.id
}

resource "aws_security_group" "datasync" {
name_prefix = "demo-datasync-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "datasync" {
type = "egress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.datasync.id
source_security_group_id = aws_security_group.efs.id
}

resource "aws_instance" "demo" {
ami = data.aws_ami.amzn2.id
instance_type = "t3.micro"
vpc_security_group_ids = [ aws_security_group.ec2.id ]
subnet_id = data.aws_subnet.selected.id
iam_instance_profile = aws_iam_instance_profile.ec2.name

tags = {
Name = "demo-datasync"
}
}

resource "aws_security_group" "ec2" {
name_prefix = "demo-ec2-"
vpc_id = data.aws_vpc.default.id

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

resource "aws_iam_role" "ec2" {
name = "demo-ec2"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_instance_profile" "ec2" {
name = "demo-ec2"
role = aws_iam_role.ec2.name
}

resource "aws_iam_role_policy" "ec2" {
role = aws_iam_role.ec2.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ec2messages:*",
"ssmmessages:*",
"ssm:UpdateInstanceInformation",
"ssm:GetDeployablePatchSnapshotForInstance",
"ssm:ListAssociations"
]
Resource = "*"
}
]
})
}

Andreas Wittig

Andreas Wittig

I’ve been building on AWS since 2012 together with my brother Michael. We are sharing our insights into all things AWS on cloudonaut and have written the book AWS in Action. Besides that, we’re currently working on bucketAV,HyperEnv for GitHub Actions, and marbot.

Here are the contact options for feedback and questions.