Unboxing AWS DataSync

Andreas Wittig – 23 Dec 2020

Your toolbox should contain AWS DataSync, a service to synchronize data between all kinds of locations. Copy data between S3, EFS, and FSx. On top of that, DataSync works with on-premises locations like NFS, SMB, and more.

Unboxing AWS DataSync

In this week’s video Andreas unboxes AWS DataSync for you, explains the core concepts, and demos how to copy data between S3 and EFS.


Looking for a new challenge?

  • tecRacer

    Cloud Consultant

    tecRacer • Premier AWS Consulting Partner • Germany, Austria, Spain, and Switzerland
    AWS only Infrastructure as Code EC2 Containers Serverless
  • tecRacer

    Cloud Migration Specialist

    tecRacer • Premier AWS Consulting Partner • Germany, Austria, Spain, and Switzerland
    Lift&Shift Transformation EC2 RDS VPC

Here is the Terraform configuration code, that we used for our example to synchronize data between S3 and EFS. The Terraform configuration creates the following resources:

  • S3 bucket
  • EFS file system
  • DataSync configuration
  • EC2 instance allowing you to access the EFS file system
  • Security Groups for DataSync and EC2
  • IAM roles for Data Sync and EC2

Use the AWS Systems Manager Session Manager to connect with the EC2 instance to mount and inspect the EFS file system.

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.0"
}
}
}

provider "aws" {
region = "eu-west-1"
}

data "aws_vpc" "default" {
default = true
}

data "aws_subnet_ids" "public" {
vpc_id = data.aws_vpc.default.id
}

data "aws_subnet" "selected" {
vpc_id = data.aws_vpc.default.id
id = sort(data.aws_subnet_ids.public.ids)[0]
}

data "aws_ami" "amzn2" {
most_recent = true

filter {
name = "name"
values = ["amzn2-ami-hvm-2.0.*-x86_64-gp2"]
}

filter {
name = "virtualization-type"
values = ["hvm"]
}

owners = ["137112412989"]
}

resource "aws_datasync_location_s3" "demo" {
s3_bucket_arn = aws_s3_bucket.demo.arn
subdirectory = "/"

s3_config {
bucket_access_role_arn = aws_iam_role.datasync.arn
}
}

resource "aws_datasync_location_efs" "demo" {
efs_file_system_arn = aws_efs_file_system.demo.arn

ec2_config {
security_group_arns = [ aws_security_group.datasync.arn ]
subnet_arn = data.aws_subnet.selected.arn
}
}

resource "aws_datasync_task" "demo" {
name = "demo-s3-to-efs"
source_location_arn = aws_datasync_location_s3.demo.arn
destination_location_arn = aws_datasync_location_efs.demo.arn
}


resource "aws_s3_bucket" "demo" {
bucket_prefix = "demo-"
}

resource "aws_efs_file_system" "demo" {
tags = {
Name = "Demo"
}
}

resource "aws_iam_role" "datasync" {
name = "demo-datasync"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "datasync.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_role_policy" "datasync" {
role = aws_iam_role.datasync.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListBucketMultipartUploads",
"s3:HeadBucket"
]
Resource = aws_s3_bucket.demo.arn
},
{
Effect = "Allow"
Action = [
"s3:AbortMultipartUpload",
"s3:DeleteObject",
"s3:GetObject",
"s3:ListMultipartUploadParts",
"s3:GetObjectTagging",
"s3:PutObjectTagging",
"s3:PutObject"
]
Resource = "${aws_s3_bucket.demo.arn}/*"
}
]
})
}

resource "aws_efs_mount_target" "demo" {
for_each = data.aws_subnet_ids.public.ids
file_system_id = aws_efs_file_system.demo.id
subnet_id = each.value
security_groups = [ aws_security_group.efs.id ]
}

resource "aws_security_group" "efs" {
name_prefix = "demo-efs-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "efs_datasync" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.datasync.id
}

resource "aws_security_group_rule" "efs_ec2" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.ec2.id
}

resource "aws_security_group" "datasync" {
name_prefix = "demo-datasync-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "datasync" {
type = "egress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.datasync.id
source_security_group_id = aws_security_group.efs.id
}

resource "aws_instance" "demo" {
ami = data.aws_ami.amzn2.id
instance_type = "t3.micro"
vpc_security_group_ids = [ aws_security_group.ec2.id ]
subnet_id = data.aws_subnet.selected.id
iam_instance_profile = aws_iam_instance_profile.ec2.name

tags = {
Name = "demo-datasync"
}
}

resource "aws_security_group" "ec2" {
name_prefix = "demo-ec2-"
vpc_id = data.aws_vpc.default.id

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

resource "aws_iam_role" "ec2" {
name = "demo-ec2"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_instance_profile" "ec2" {
name = "demo-ec2"
role = aws_iam_role.ec2.name
}

resource "aws_iam_role_policy" "ec2" {
role = aws_iam_role.ec2.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ec2messages:*",
"ssmmessages:*",
"ssm:UpdateInstanceInformation",
"ssm:GetDeployablePatchSnapshotForInstance",
"ssm:ListAssociations"
]
Resource = "*"
}
]
})
}

Become a cloudonaut supporter

Andreas Wittig

Andreas Wittig ( Email, Twitter, or LinkedIn )

We launched the cloudonaut blog in 2015. Since then, we have published 345 articles, 45 podcast episodes, and 37 videos. It's all free and means a lot of work in our spare time. We enjoy sharing our AWS knowledge with you.

Please support us

Have you learned something new by reading, listening, or watching our content? With your help, we can spend enough time to keep publishing great content in the future. Learn more

$
Amount must be a multriply of 5. E.g, 5, 10, 15.

Thanks to Alan Leech, Alex DeBrie, ANTHONY RAITI, Jaap-Jan Frans, Jason Yorty, Jeff Finley, Jens Gehring, jhoadley, Johannes Grumböck, John Culkin, Jonas Mellquist, Juraj Martinka, Kamil Oboril, Ken Snyder, Ross Mohan, Ross Mohan, sam onaga, Shawn Tolidano, Thorsten Hoeger, Todd Valentine, and all anonymous supporters for your help! We also want to thank all supporters who purchased a cloudonaut t-shirt.