🎉 We are launching a new weekly show: Hot off the Cloud

🎉 We are launching a new weekly show

Unboxing AWS DataSync

Andreas Wittig – 23 Dec 2020

Your toolbox should contain AWS DataSync, a service to synchronize data between all kinds of locations. Copy data between S3, EFS, and FSx. On top of that, DataSync works with on-premises locations like NFS, SMB, and more.

Unboxing AWS DataSync

In this week’s video Andreas unboxes AWS DataSync for you, explains the core concepts, and demos how to copy data between S3 and EFS.


Looking for a new challenge?

  • tecRacer

    Cloud Consultant • AWS Migrations

    tecRacer • Premier AWS Consulting Partner • Germany, Austria, Portugal, and Switzerland
    Assessment Transformation Change Management
  • DEMICON

    Senior Lead Full Stack Developer

    DEMICON • AWS Advanced Consulting Partner • Remote
    AWS JavaScript/TypeScript Angular React

Here is the Terraform configuration code, that we used for our example to synchronize data between S3 and EFS. The Terraform configuration creates the following resources:

  • S3 bucket
  • EFS file system
  • DataSync configuration
  • EC2 instance allowing you to access the EFS file system
  • Security Groups for DataSync and EC2
  • IAM roles for Data Sync and EC2

Use the AWS Systems Manager Session Manager to connect with the EC2 instance to mount and inspect the EFS file system.

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 3.0"
}
}
}

provider "aws" {
region = "eu-west-1"
}

data "aws_vpc" "default" {
default = true
}

data "aws_subnet_ids" "public" {
vpc_id = data.aws_vpc.default.id
}

data "aws_subnet" "selected" {
vpc_id = data.aws_vpc.default.id
id = sort(data.aws_subnet_ids.public.ids)[0]
}

data "aws_ami" "amzn2" {
most_recent = true

filter {
name = "name"
values = ["amzn2-ami-hvm-2.0.*-x86_64-gp2"]
}

filter {
name = "virtualization-type"
values = ["hvm"]
}

owners = ["137112412989"]
}

resource "aws_datasync_location_s3" "demo" {
s3_bucket_arn = aws_s3_bucket.demo.arn
subdirectory = "/"

s3_config {
bucket_access_role_arn = aws_iam_role.datasync.arn
}
}

resource "aws_datasync_location_efs" "demo" {
efs_file_system_arn = aws_efs_file_system.demo.arn

ec2_config {
security_group_arns = [ aws_security_group.datasync.arn ]
subnet_arn = data.aws_subnet.selected.arn
}
}

resource "aws_datasync_task" "demo" {
name = "demo-s3-to-efs"
source_location_arn = aws_datasync_location_s3.demo.arn
destination_location_arn = aws_datasync_location_efs.demo.arn
}


resource "aws_s3_bucket" "demo" {
bucket_prefix = "demo-"
}

resource "aws_efs_file_system" "demo" {
tags = {
Name = "Demo"
}
}

resource "aws_iam_role" "datasync" {
name = "demo-datasync"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "datasync.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_role_policy" "datasync" {
role = aws_iam_role.datasync.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetBucketLocation",
"s3:ListBucket",
"s3:ListBucketMultipartUploads",
"s3:HeadBucket"
]
Resource = aws_s3_bucket.demo.arn
},
{
Effect = "Allow"
Action = [
"s3:AbortMultipartUpload",
"s3:DeleteObject",
"s3:GetObject",
"s3:ListMultipartUploadParts",
"s3:GetObjectTagging",
"s3:PutObjectTagging",
"s3:PutObject"
]
Resource = "${aws_s3_bucket.demo.arn}/*"
}
]
})
}

resource "aws_efs_mount_target" "demo" {
for_each = data.aws_subnet_ids.public.ids
file_system_id = aws_efs_file_system.demo.id
subnet_id = each.value
security_groups = [ aws_security_group.efs.id ]
}

resource "aws_security_group" "efs" {
name_prefix = "demo-efs-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "efs_datasync" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.datasync.id
}

resource "aws_security_group_rule" "efs_ec2" {
type = "ingress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.efs.id
source_security_group_id = aws_security_group.ec2.id
}

resource "aws_security_group" "datasync" {
name_prefix = "demo-datasync-"
vpc_id = data.aws_vpc.default.id
}

resource "aws_security_group_rule" "datasync" {
type = "egress"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_group_id = aws_security_group.datasync.id
source_security_group_id = aws_security_group.efs.id
}

resource "aws_instance" "demo" {
ami = data.aws_ami.amzn2.id
instance_type = "t3.micro"
vpc_security_group_ids = [ aws_security_group.ec2.id ]
subnet_id = data.aws_subnet.selected.id
iam_instance_profile = aws_iam_instance_profile.ec2.name

tags = {
Name = "demo-datasync"
}
}

resource "aws_security_group" "ec2" {
name_prefix = "demo-ec2-"
vpc_id = data.aws_vpc.default.id

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

resource "aws_iam_role" "ec2" {
name = "demo-ec2"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
Action = "sts:AssumeRole"
},
]
})
}

resource "aws_iam_instance_profile" "ec2" {
name = "demo-ec2"
role = aws_iam_role.ec2.name
}

resource "aws_iam_role_policy" "ec2" {
role = aws_iam_role.ec2.name

policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"ec2messages:*",
"ssmmessages:*",
"ssm:UpdateInstanceInformation",
"ssm:GetDeployablePatchSnapshotForInstance",
"ssm:ListAssociations"
]
Resource = "*"
}
]
})
}

Become a cloudonaut supporter

Andreas Wittig

Andreas Wittig ( Email, Twitter, or LinkedIn )

We launched the cloudonaut blog in 2015. Since then, we have published 360 articles, 49 podcast episodes, and 48 videos. It's all free and means a lot of work in our spare time. We enjoy sharing our AWS knowledge with you.

Please support us

Have you learned something new by reading, listening, or watching our content? With your help, we can spend enough time to keep publishing great content in the future. Learn more

$
Amount must be a multriply of 5. E.g, 5, 10, 15.

Thanks to Alan Leech, Alex DeBrie, ANTHONY RAITI, Christopher Hipwell, Jaap-Jan Frans, Jason Yorty, Jeff Finley, Jens Gehring, jhoadley, Johannes Grumböck, Johannes Konings, John Culkin, Jonas Mellquist, Juraj Martinka, Kamil Oboril, Ken Snyder, Markus Ellers, Ross Mohan, Ross Mohan, sam onaga, Satyendra Sharma, Shawn Tolidano, Simon Devlin, Thorsten Hoeger, Todd Valentine, Victor Grenu, and all anonymous supporters for your help! We also want to thank all supporters who purchased a cloudonaut t-shirt.