Skip to content
This repository has been archived by the owner on Mar 18, 2024. It is now read-only.

Commit

Permalink
Tfhartmann/plat 2145 drain instances when terminating (#3)
Browse files Browse the repository at this point in the history
* Adding Checks and Handlers for draining ECS instances

* changed args to handler

* Update JSON for watches

* Watch handler must be a string

* Updates to handler

I had the handler set to watch a service not the check - these checks
are part of the $cluster service

* More handler updates

* Doh... watches try to run on the parent container

* some updates
  • Loading branch information
tfhartmann authored and hakamadare committed Mar 6, 2018
1 parent 71e3ab6 commit e80566e
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 3 deletions.
23 changes: 23 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
FROM alpine:3.6
# This is the release of Consul to pull in.
ENV CONSUL_VERSION=1.0.6
# This is the location of the releases.
ENV HASHICORP_RELEASES=https://releases.hashicorp.com
RUN apk -v --update add \
bash \
python \
Expand All @@ -8,11 +12,30 @@ RUN apk -v --update add \
mailcap \
jq \
curl \
ca-certificates \
gnupg libcap \
openssl \
su-exec \
dumb-init \
&& \
pip install --upgrade awscli==1.14.5 s3cmd==2.0.1 python-magic && \
apk -v --purge del py-pip && \
rm /var/cache/apk/*
# Set up certificates, and Consul.
RUN gpg --keyserver pgp.mit.edu --recv-keys 91A6E7F85D05C65630BEF18951852D87348FFC4C && \
mkdir -p /tmp/build && \
cd /tmp/build && \
wget ${HASHICORP_RELEASES}/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip && \
wget ${HASHICORP_RELEASES}/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_SHA256SUMS && \
wget ${HASHICORP_RELEASES}/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_SHA256SUMS.sig && \
gpg --batch --verify consul_${CONSUL_VERSION}_SHA256SUMS.sig consul_${CONSUL_VERSION}_SHA256SUMS && \
grep consul_${CONSUL_VERSION}_linux_amd64.zip consul_${CONSUL_VERSION}_SHA256SUMS | sha256sum -c && \
unzip -d /bin consul_${CONSUL_VERSION}_linux_amd64.zip && \
cd /tmp && \
rm -rf /tmp/build && \
apk del gnupg openssl && \
rm -rf /root/.gnupg

COPY scripts/*.sh /usr/local/bin/
COPY check_definitions/*.sh /usr/local/bin/check_definitions/
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
Expand Down
13 changes: 11 additions & 2 deletions check_definitions/ecs-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ _SERVICE=$(cat <<EOT
"docker_container_id": "${DOCKER_ID}",
"shell": "/bin/bash",
"args": ["/usr/local/bin/ami_up2date.sh"],
"interval": "10s",
"interval": "15s",
"status": "passing"
},
{
Expand All @@ -31,8 +31,17 @@ _SERVICE=$(cat <<EOT
"args": ["/usr/local/bin/ecs-cloudwatch-metrics.sh"],
"interval": "60s",
"status": "passing"
},
{
"id": "instance-status",
"name": "Instance Status",
"notes": "Instance and ECS Instance Status",
"docker_container_id": "${DOCKER_ID}",
"shell": "/bin/bash",
"args": ["/usr/local/bin/instance-status.sh"],
"interval": "60s",
"status": "passing"
}
]
}
}
Expand Down
4 changes: 3 additions & 1 deletion scripts/ecs-cloudwatch-metrics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ ECS_CLUSTER=$(curl -s http://localhost:51678/v1/metadata | jq -r .Cluster)
CONTAINERINSTANCES=$(aws --region ${REGION} ecs describe-clusters --clusters ${ECS_CLUSTER} | jq .clusters[0].registeredContainerInstancesCount)
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")

aws --region ${REGION} cloudwatch put-metric-data --metric-name ContainerInstancesCount --namespace /AWS/ECS --dimensions Cluster=${ECS_CLUSTER} --unit Count --value 1 --timestamp ${TIMESTAMP}
# Commenting out to save CPU Cycles on smaller instances.
#aws --region ${REGION} cloudwatch put-metric-data --metric-name ContainerInstancesCount --namespace /AWS/ECS --dimensions Cluster=${ECS_CLUSTER} --unit Count --value 1 --timestamp ${TIMESTAMP}
aws --region ${REGION} cloudwatch put-metric-data --metric-name registeredContainerInstances --namespace /AWS/ECS --dimensions Cluster=${ECS_CLUSTER} --unit Count --value ${CONTAINERINSTANCES} --timestamp ${TIMESTAMP}
echo "Sending Metrics to CloudWatch"
exit 0
18 changes: 18 additions & 0 deletions scripts/instance-draining-handler.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
set -e
# >>> Currently NOT USED <<<<
# We probably want to replace this with something that eats json and do this in a non-horrible way
# It would safe clock time to get status from the JSON input of the consul watch.

REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/ | sed 's/.$//')
ARN=$(curl -s http://localhost:51678/v1/metadata | jq -r .ContainerInstanceArn)
ECS_CLUSTER=$(curl -s http://localhost:51678/v1/metadata | jq -r .Cluster)
ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
STATUS=$(aws --region $REGION autoscaling describe-auto-scaling-instances --instance-ids $ID | jq -r '.AutoScalingInstances[0].LifecycleState')

if [ $STATUS = "Terminating:Wait" ]; then
aws --region $REGION ecs update-container-instances-state --cluster $ECS_CLUSTER --container-instances $ARN --status DRAINING
exit 0
else
exit 0
fi
24 changes: 24 additions & 0 deletions scripts/instance-status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
set -e

REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/ | sed 's/.$//')
ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
ARN=$(curl -s http://localhost:51678/v1/metadata | jq -r .ContainerInstanceArn)
ECS_CLUSTER=$(curl -s http://localhost:51678/v1/metadata | jq -r .Cluster)
#ASG=$(aws ec2 describe-instances --region $REGION --instance-ids $ID | jq -r '.Reservations[0].Instances[0].Tags[1].Value')
CONTAINER_INSTANCE_STATUS=$(aws --region $REGION ecs describe-container-instances --cluster $ECS_CLUSTER --container-instances "$ARN" | jq -r .containerInstances[0].status)
STATUS=$(aws --region $REGION autoscaling describe-auto-scaling-instances --instance-ids $ID | jq -r '.AutoScalingInstances[0].LifecycleState')

if [ $STATUS = "InService" ]; then
echo Status is Lifecycle State : $STATUS
echo ECS Instance Status : $CONTAINER_INSTANCE_STATUS
elif [ $STATUS = "Terminating:Wait" ]; then
aws --region $REGION ecs update-container-instances-state --cluster $ECS_CLUSTER --container-instances $ARN --status DRAINING
echo Status is Lifecycle State : $STATUS
echo ECS Instance Status : $CONTAINER_INSTANCE_STATUS
exit 255
else
echo Status is Lifecycle State : $STATUS
echo ECS Instance Status : $CONTAINER_INSTANCE_STATUS
exit 1
fi

0 comments on commit e80566e

Please sign in to comment.