0
我想在EMR中自动执行集群创建任务。我有一个json文件 ,其中包含需要应用于新群集的配置,我想写一个shell脚本来为我自动执行此任务。是否有可能通过从json文件提供所有配置来创建EMR集群
是否有可能通过从json文件中提供所有配置来创建EMR集群?
例如,我有这个文件
{
"Cluster": {
"Ec2InstanceAttributes": {
"EmrManagedMasterSecurityGroup": "sg-00b10b71",
"RequestedEc2AvailabilityZones": [],
"AdditionalSlaveSecurityGroups": [],
"AdditionalMasterSecurityGroups": [],
"RequestedEc2SubnetIds": [
"subnet-02291b3e"
],
"Ec2SubnetId": "subnet-02291b3e",
"IamInstanceProfile": "EMR_EC2_DefaultRole",
"Ec2KeyName": "perf_key_pair",
"Ec2AvailabilityZone": "us-east-1e",
"EmrManagedSlaveSecurityGroup": "sg-f2b30983"
},
"Name": "NitinJ-Perf",
"ServiceRole": "EMR_DefaultRole",
"Tags": [
{
"Value": "Perf-Nitink",
"Key": "Qubole"
}
],
"Applications": [
{
"Version": "3.7.2",
"Name": "Ganglia"
},
{
"Version": "2.7.3",
"Name": "Hadoop"
},
{
"Version": "2.1.1",
"Name": "Hive"
},
{
"Version": "0.16.0",
"Name": "Pig"
},
{
"Version": "0.8.4",
"Name": "Tez"
}
],
"MasterPublicDnsName": "ec2-34-229-254-217.compute-1.amazonaws.com",
"ScaleDownBehavior": "TERMINATE_AT_INSTANCE_HOUR",
"InstanceGroups": [
{
"RequestedInstanceCount": 4,
"Status": {
"Timeline": {
"ReadyDateTime": 1499150835.979,
"CreationDateTime": 1499150533.99
},
"State": "RUNNING",
"StateChangeReason": {
"Message": ""
}
},
"Name": "Core Instance Group",
"InstanceGroupType": "CORE",
"EbsBlockDevices": [],
"ShrinkPolicy": {},
"Id": "ig-34P3CVF8ZL5CW",
"Configurations": [],
"InstanceType": "r3.4xlarge",
"Market": "ON_DEMAND",
"RunningInstanceCount": 4
},
{
"RequestedInstanceCount": 1,
"Status": {
"Timeline": {
"ReadyDateTime": 1499150804.591,
"CreationDateTime": 1499150533.99
},
"State": "RUNNING",
"StateChangeReason": {
"Message": ""
}
},
"Name": "Master Instance Group",
"InstanceGroupType": "MASTER",
"EbsBlockDevices": [],
"ShrinkPolicy": {},
"Id": "ig-3V7EHQ36187PY",
"Configurations": [],
"InstanceType": "r3.4xlarge",
"Market": "ON_DEMAND",
"RunningInstanceCount": 1
}
],
"Configurations": [
{
"Properties": {
"hive.vectorized.execution.enabled": "true"
},
"Classification": "hive-site"
}
]
}
}
我可以建立在EMR集群使用像
aws emr create-cluster --cli-input-json file://'pwd'/emr_cluster_up.json
但在这个文档http://docs.aws.amazon.com/cli/latest/reference/emr/create-cluster.html#examples实施例5中他们已经使用configuration.json为configuraing簇。 –
该选项用于指定Hadoop MapReduce的配置,而不是基础架构。 – sudheerchamarthi