diff --git a/README.md b/README.md index 6205409..8b6a614 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,11 @@ The picture shows the dependencies within the modules of the cos-stack and the d ## Troubleshooting +### Monitoring Server and Nodes + +- `nomad monitor -log-level error|warn|info|debug|trace -node-id | -server-id ` +- supported since [nomad 0.10.2](https://www.nomadproject.io/docs/commands/monitor.html) + ### Nomad CLI complains about invalid Certificate If you have deployed the cluster with https endpoints for the ui-albs and have created a selfsigned certificate you might get errors from the nomad cli complanig about an invalid certificate (`x509: certificate is..`). To fix this you have to integrate your custom root-CA you used for signing your certificate apropriately into your system. diff --git a/examples/jobs/consulconnect/README.md b/examples/jobs/consulconnect/README.md new file mode 100644 index 0000000..e3f9992 --- /dev/null +++ b/examples/jobs/consulconnect/README.md @@ -0,0 +1,83 @@ +# Usage Example: Consul Connect - NOT WORKING + +This folder provides an example on how to use Consul Connect in the cluster. +It is derived from the this [tutorial](https://www.hashicorp.com/blog/consul-connect-integration-in-hashicorp-nomad/). + +## !!! Errors !!! + +Image: https://console.cloud.google.com/gcr/images/google-containers/GLOBAL/pause-amd64@sha256:163ac025575b775d1c0f9bf0bdd0f086883171eb475b5068e7defa4ca9e76516/details?tab=info + +Local Pull Working: +- `docker pull gcr.io/google-containers/pause-amd64:3.0` + +""" +failed to setup alloc: + pre-run hook "network" failed: + failed to create network for alloc: + Failed to pull `gcr.io/google_containers/pause-amd64:3.0`: + API error (500): + Get https://gcr.io/v2/: -> Google Container Registry + net/http: + request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers) +""" + +### State of Investigation + +Reason currently unclear. Further debugging necessary. + +Checked: +- docker.config - ecr helper only limited to amazon, + -> all other Docker registries should be supported as well. +- Security Group Configuration - outbound traffic for nodes in private datacenter fully open + +## Usage + +This usage example was tested with `examples/root-example/README.md`. + +- `nomad run api_service.nomad` +- `nomad run dashboard_service.nomad` + +- `curl ???` + +### Prerequisits + +- consul >=1.6 +- CNI plugins installed on the instance + +### Limitations +- [Consul Connect Native](https://www.consul.io/docs/connect/native.html) is not yet supported. + - -> Integration into the application without sidecar not usable. +- Consul Connect HTTP and gRPC checks are not yet supported. + - -> No [health check](https://www.consul.io/docs/agent/checks.html) support. + - -> __Fabio usage unclear__. +- [Consul ACLs](https://learn.hashicorp.com/consul/security-networking/production-acls) are not yet supported. + - -> No additional access management only network separation. +- __Variable interpolation for group services and checks are not yet supported.__ ??? + +## Background + +- envoy via [Docker](https://hub.docker.com/r/envoyproxy/envoy) + +> Hashicorp: +> Prior to Nomad 0.10, Nomad’s networking model running all applications in _host networking mode_. +> Applications running on the same host could communicate with each other over localhost! +> +> Nomad 0.10 introduces network namespace support. +> This is a new network model within Nomad +> where task groups are a single network endpoint and +> share a network namespace. + +### Job Specification + +- `connect` +- `sidecar_service` + +#### network stanza - New Networking Modes + +- _none_ + - isolated network without any network interfaces +- ___bridge__ + - isolated network namespace with an interface that is bridged with the host +- _host_ + - join the host network namespace and a shared network namespace is not created. + - _This matches the behavior in Nomad 0.9_ diff --git a/examples/jobs/consulconnect/connect.nomad b/examples/jobs/consulconnect/connect.nomad new file mode 100644 index 0000000..68c851e --- /dev/null +++ b/examples/jobs/consulconnect/connect.nomad @@ -0,0 +1,63 @@ +job "countdash" { + datacenters = ["private-services"] + + group "api" { + network { + mode = "bridge" + } + + service { + name = "count-api" + port = "9001" + + connect { + sidecar_service {} + } + } + + task "web" { + driver = "docker" + config { + image = "hashicorpnomad/counter-api:v1" + } + } + } + + group "dashboard" { + network { + mode ="bridge" + port "http" { + static = 9002 + to = 9002 + } + } + + service { + name = "count-dashboard" + port = "9002" + + tags = ["urlprefix-/dashboard"] # fabio + + connect { + sidecar_service { + proxy { + upstreams { + destination_name = "count-api" + local_bind_port = 8080 + } + } + } + } + } + + task "dashboard" { + driver = "docker" + env { + COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}" + } + config { + image = "hashicorpnomad/counter-dashboard:v1" + } + } + } + } diff --git a/examples/jobs/fabio.nomad b/examples/jobs/fabio.nomad index 0eb7ac2..189e871 100644 --- a/examples/jobs/fabio.nomad +++ b/examples/jobs/fabio.nomad @@ -10,11 +10,11 @@ job "fabio" { task "fabio" { driver = "exec" # Linux only! config { - command = "fabio-1.5.10-go1.11.1-linux_amd64" + command = "fabio-1.5.13-go1.13.4-linux_amd64" } artifact { - source = "https://github.com/fabiolb/fabio/releases/download/v1.5.10/fabio-1.5.10-go1.11.1-linux_amd64" + source = "https://github.com/fabiolb/fabio/releases/download/v1.5.13/fabio-1.5.13-go1.13.4-linux_amd64" } resources { diff --git a/modules/ami/setup_nomad_consul.sh b/modules/ami/setup_nomad_consul.sh deleted file mode 100755 index 253ca3c..0000000 --- a/modules/ami/setup_nomad_consul.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -set -e - -# Environment variables are set by packer -# NOTE: Script will try to update all installed packages -/tmp/install-nomad/install-nomad --version "${NOMAD_VERSION}" - -# FIXME: This is so ugly! Why not catching the binaries directly? -# NOTE: Consul will try to update all installed packages -git clone --branch "${CONSUL_MODULE_VERSION}" https://github.com/hashicorp/terraform-aws-consul.git /tmp/terraform-aws-consul -/tmp/terraform-aws-consul/modules/install-consul/install-consul --version "${CONSUL_VERSION}" diff --git a/modules/ami2/Makefile b/modules/ami2/Makefile index 20bc64c..381260a 100644 --- a/modules/ami2/Makefile +++ b/modules/ami2/Makefile @@ -13,6 +13,7 @@ aws_region:=us-east-1 .PHONY: ami .PHONY: nomad-consul-docker-ecr.json .PHONY: nomad-consul-docker.json +.PHONY: nomad-consul-docker-ecr-cni.json ami: nomad-consul-docker-ecr.json @@ -21,3 +22,6 @@ nomad-consul-docker-ecr.json: nomad-consul-docker.json: packer build -var aws_region=$(aws_region) nomad-consul-docker.json + +nomad-consul-docker-ecr-cni.json: + packer build -var aws_region=$(aws_region) nomad-consul-docker-ecr-cni.json diff --git a/modules/ami2/install-nomad/install-nomad b/modules/ami2/install-nomad/install-nomad index dbed6ac..17c5b57 100755 --- a/modules/ami2/install-nomad/install-nomad +++ b/modules/ami2/install-nomad/install-nomad @@ -13,9 +13,6 @@ readonly DEFAULT_NOMAD_USER="nomad" readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" readonly SYSTEM_BIN_DIR="/usr/local/bin" -readonly SUPERVISOR_DIR="/etc/supervisor" -readonly SUPERVISOR_CONF_DIR="$SUPERVISOR_DIR/conf.d" - readonly SCRIPT_NAME="$(basename "$0")" function print_usage { @@ -68,71 +65,16 @@ function assert_not_empty { fi } -# Install steps are based on: http://unix.stackexchange.com/a/291098/215969 -function install_supervisord_debian { - sudo apt-get install -y supervisor - sudo update-rc.d supervisor defaults - - create_supervisor_config - sudo systemctl enable supervisor -} - function install_systemd_nomad_amazon_linux2 { log_info "Installing systemd nomad.service" # TODO: Add check for systemd? # Moving service description script to the system sudo cp "$SCRIPT_DIR/nomad.service" "/etc/systemd/system/" - # NOTE: Nomad is not yet configured - hence it makes no sense to try to start it! sudo systemctl enable nomad.service - #sudo systemctl start nomad.service - #sudo systemctl status nomad.service - - # FIXME: Adapt run-nomad script to reflect the configuration change - # Remove supervisord configration and adjust configuration file loaded in nomad.service -} - - -# Install steps are based on: http://stackoverflow.com/a/31576473/483528 -function install_supervisord_amazon_linux2 { - echo "[DEBUG] [install-nomad] - supervisor - installing supervisor" - sudo pip install supervisor - - echo "[DEBUG] [install-nomad] - supervisor - setup path" - - sudo pip install supervisor - # On Amazon Linux, /usr/local/bin is not in PATH for the root user, so we add symlinks to /usr/bin, which is in PATH - if [[ ! -f "/usr/bin/supervisorctl" ]]; then - sudo ln -s /usr/local/bin/supervisorctl /usr/bin/supervisorctl - fi - if [[ ! -f "/usr/bin/supervisord" ]]; then - sudo ln -s /usr/local/bin/supervisord /usr/bin/supervisord - fi - - echo "[DEBUG] [install-nomad] - supervisor - setup systemctl" - ls -l "/etc/systemd/" - sudo cp "$SCRIPT_DIR/supervisor.service" "/etc/systemd/system/" - echo "[DEBUG] [install-nomad] - supervisor - setup systemctl - enable" - sudo systemctl enable supervisor.service - - - echo "[DEBUG] [install-nomad] - supervisor - create supervisor config" - create_supervisor_config - #sudo chkconfig --add supervisor - #sudo chkconfig supervisor on - - #echo "[DEBUG] [install-nomad] - supervisor - systemctl" - sudo systemctl start supervisor.service - # FIXME: DEBUG information - sudo systemctl status supervisor.service - - #echo "[DEBUG] [install-nomad] - supervisor - journald" - #sudo journalctrl -xe -} - -function create_supervisor_config { - sudo mkdir -p "$SUPERVISOR_CONF_DIR" - sudo cp "$SCRIPT_DIR/supervisord.conf" "$SUPERVISOR_DIR/supervisord.conf" + # NOTE: Nomad is not yet configured - hence it makes no sense to try to start it! + # DEBUG: sudo systemctl start nomad.service + # DEBUG: sudo systemctl status nomad.service } function has_yum { @@ -149,11 +91,9 @@ function install_dependencies { if $(has_apt_get); then sudo apt-get update -y sudo apt-get install -y awscli curl unzip jq - install_supervisord_debian elif $(has_yum); then # Amazon Linux 2 comes without pip sudo yum install -y aws curl unzip jq python2-pip - install_supervisord_amazon_linux2 install_systemd_nomad_amazon_linux2 else log_error "Could not find apt-get or yum. Cannot install dependencies on this OS." diff --git a/modules/ami2/install-nomad/supervisor-initd-script.sh b/modules/ami2/install-nomad/supervisor-initd-script.sh deleted file mode 100755 index 171b916..0000000 --- a/modules/ami2/install-nomad/supervisor-initd-script.sh +++ /dev/null @@ -1,116 +0,0 @@ -#!/bin/bash -# -# supervisord Startup script for the Supervisor process control system -# -# Author: Mike McGrath (based off yumupdatesd) -# Jason Koppe adjusted to read sysconfig, -# use supervisord tools to start/stop, conditionally wait -# for child processes to shutdown, and startup later -# Erwan Queffelec -# make script LSB-compliant -# -# chkconfig: 345 83 04 -# description: Supervisor is a client/server system that allows \ -# its users to monitor and control a number of processes on \ -# UNIX-like operating systems. -# processname: supervisord -# config: /etc/supervisord.conf -# config: /etc/sysconfig/supervisord -# pidfile: /var/run/supervisord.pid -# -### BEGIN INIT INFO -# Provides: supervisord -# Required-Start: $all -# Required-Stop: $all -# Short-Description: start and stop Supervisor process control system -# Description: Supervisor is a client/server system that allows -# its users to monitor and control a number of processes on -# UNIX-like operating systems. -### END INIT INFO - -# Source function library -. /etc/rc.d/init.d/functions - -# Source system settings -if [ -f /etc/sysconfig/supervisord ]; then - . /etc/sysconfig/supervisord -fi - -# Path to the supervisorctl script, server binary, -# and short-form for messages. -supervisorctl=/usr/local/bin/supervisorctl -supervisord=${SUPERVISORD-/usr/local/bin/supervisord} -prog=supervisord -pidfile=${PIDFILE-/tmp/supervisord.pid} -lockfile=${LOCKFILE-/var/lock/subsys/supervisord} -STOP_TIMEOUT=${STOP_TIMEOUT-60} -OPTIONS="${OPTIONS--c /etc/supervisor/supervisord.conf}" -RETVAL=0 - -start() { - echo -n $"Starting $prog: " - daemon --pidfile=${pidfile} $supervisord $OPTIONS - RETVAL=$? - echo - if [ $RETVAL -eq 0 ]; then - touch ${lockfile} - $supervisorctl $OPTIONS status - fi - return $RETVAL -} - -stop() { - echo -n $"Stopping $prog: " - killproc -p ${pidfile} -d ${STOP_TIMEOUT} $supervisord - RETVAL=$? - echo - [ $RETVAL -eq 0 ] && rm -rf ${lockfile} ${pidfile} -} - -reload() { - echo -n $"Reloading $prog: " - LSB=1 killproc -p $pidfile $supervisord -HUP - RETVAL=$? - echo - if [ $RETVAL -eq 7 ]; then - failure $"$prog reload" - else - $supervisorctl $OPTIONS status - fi -} - -restart() { - stop - start -} - -case "$1" in - start) - start - ;; - stop) - stop - ;; - status) - status -p ${pidfile} $supervisord - RETVAL=$? - [ $RETVAL -eq 0 ] && $supervisorctl $OPTIONS status - ;; - restart) - restart - ;; - condrestart|try-restart) - if status -p ${pidfile} $supervisord >&/dev/null; then - stop - start - fi - ;; - force-reload|reload) - reload - ;; - *) - echo $"Usage: $prog {start|stop|restart|condrestart|try-restart|force-reload|reload}" - RETVAL=2 - esac - - exit $RETVAL \ No newline at end of file diff --git a/modules/ami2/install-nomad/supervisor.service b/modules/ami2/install-nomad/supervisor.service deleted file mode 100644 index 88874ce..0000000 --- a/modules/ami2/install-nomad/supervisor.service +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Supervisor process control system for UNIX -Documentation=http://supervisord.org -After=network.target - -[Service] -#User=ec2-user -#Group=user -ExecStart=/usr/bin/supervisord -n -c /etc/supervisor/supervisord.conf -ExecStop=/usr/bin/supervisorctl $OPTIONS shutdown -ExecReload=/usr/local/bin/supervisorctl -c /etc/supervisor/supervisord.conf $OPTIONS reload -KillMode=process -Restart=on-failure -RestartSec=50s - -[Install] -WantedBy=multi-user.target diff --git a/modules/ami2/install-nomad/supervisord.conf b/modules/ami2/install-nomad/supervisord.conf deleted file mode 100644 index d96beb0..0000000 --- a/modules/ami2/install-nomad/supervisord.conf +++ /dev/null @@ -1,39 +0,0 @@ -; supervisor config file -; -; For more information on the config file, please see: -; http://supervisord.org/configuration.html -; -; Notes: -; - Shell expansion ("~" or "$HOME") is not supported. Environment -; variables can be expanded using this syntax: "%(ENV_HOME)s". -; - Comments must have a leading space: "a=b ;comment" not "a=b;comment". - -[unix_http_server] -file=/var/run/supervisor.sock ; (the path to the socket file) -chmod=0700 ; sockef file mode (default 0700) - -[supervisord] -logfile=/var/log/supervisor/supervisord.log ; (main log file;default $CWD/supervisord.log) -pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid) -childlogdir=/var/log/supervisor ; ('AUTO' child log dir, default $TEMP) -logfile_maxbytes=50MB ; (max main logfile bytes b4 rotation;default 50MB) -logfile_backups=10 ; (num of main logfile rotation backups;default 10) -loglevel=info ; (log level;default info; others: debug,warn,trace) - -; the below section must remain in the config file for RPC -; (supervisorctl/web interface) to work, additional interfaces may be -; added by defining them in separate rpcinterface: sections -[rpcinterface:supervisor] -supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface - -[supervisorctl] -serverurl=unix:///var/run/supervisor.sock ; use a unix:// URL for a unix socket - -; The [include] section can just contain the "files" setting. This -; setting can list multiple files (separated by whitespace or -; newlines). It can also contain wildcards. The filenames are -; interpreted as relative to this file. Included files *cannot* -; include files themselves. - -[include] -files = /etc/supervisor/conf.d/*.conf diff --git a/modules/ami2/nomad-consul-docker-ecr-cni.json b/modules/ami2/nomad-consul-docker-ecr-cni.json new file mode 100644 index 0000000..a58a573 --- /dev/null +++ b/modules/ami2/nomad-consul-docker-ecr-cni.json @@ -0,0 +1,138 @@ +{ + "min_packer_version": "0.12.0", + "variables": { + "nomad_version": "0.10.2", + "cni_version": "0.8.2", + "consul_module_version": "v0.7.3", + "consul_version": "1.6.2", + "aws_account_ids": "" + }, + "builders": [ + { + "ami_name": "nomad-consul-docker-ecr-cni-amazon-linux2-{{isotime | clean_ami_name}}", + "ami_description": "An Amazon Linux 2 AMI that has Nomad ({{user `nomad_version`}}), Consul ({{user `consul_version`}}), CNI-Plugins ({{user `cni_version`}}) and Docker, ECR Helper installed.", + "instance_type": "t2.micro", + "name": "amazon-linux-ami2", + "region": "{{user `aws_region`}}", + "ami_regions": "{{user `ami_regions`}}", + "type": "amazon-ebs", + "source_ami_filter": { + "filters": { + "virtualization-type": "hvm", + "architecture": "x86_64", + "name": "*amzn2-ami-hvm-*gp2", + "block-device-mapping.volume-type": "gp2", + "root-device-type": "ebs" + }, + "owners": ["amazon"], + "most_recent": true + }, + "ssh_username": "ec2-user", + "ami_users": "{{user `aws_account_ids`}}", + "tags": { + "Consul Version": "{{user `consul_version`}}", + "Consul Module Version": "{{user `consul_module_version`}}", + "Nomad Version": "{{user `nomad_version`}}", + "Base_AMI_Name": "{{ .SourceAMIName }}" + } + } + ], + "provisioners": [ + { + "type": "shell", + "script": "{{template_dir}}/setup_amazon-linux.sh", + "only": ["amazon-linux-ami2"] + }, + { + "type": "file", + "source": "{{template_dir}}/dnsmasq_10-consul.conf", + "destination": "/tmp/dnsmasq_10-consul.conf" + }, + { + "type": "file", + "source": "{{template_dir}}/install-nomad", + "destination": "/tmp/install-nomad/", + "pause_before": "10s" + }, + { + "type": "file", + "source": "{{template_dir}}/run-nomad", + "destination": "/tmp/run-nomad/", + "pause_before": "10s" + }, + { + "type": "shell", + "environment_vars": [ + "NOMAD_VERSION={{user `nomad_version`}}", + "CONSUL_VERSION={{user `consul_version`}}", + "CONSUL_MODULE_VERSION={{user `consul_module_version`}}" + ], + "script": "{{template_dir}}/setup_nomad_consul.sh" + }, + { + "type": "shell", + "environment_vars": [ + "CNI_VERSION={{user `cni_version`}}" + ], + "script": "{{template_dir}}/setup_cni.sh" + }, + + { + "type": "shell", + "inline": ["mkdir -p ~/.docker"] + }, + { + "type": "file", + "source": "{{template_dir}}/docker_config.json", + "destination": "$HOME/.docker/config.json" + }, + { + "type": "shell", + "inline": [ + "sudo mkdir -p /etc/docker/", + "sudo cp $HOME/.docker/config.json /etc/docker/config.json", + "sudo chmod a+rwX /etc/docker/" + ] + }, + { + "type": "shell", + "script": "{{template_dir}}/setup_ecr_helper.sh" + }, + { + "type": "shell", + "inline": [ + "cp /etc/sysctl.conf /tmp/sysctl.conf", + "echo 'vm.max_map_count=262144' >> /tmp/sysctl.conf", + "sudo cp /tmp/sysctl.conf /etc/sysctl.conf" + ] + }, + { + "type": "file", + "source": "{{template_dir}}/mount_efs.sh", + "destination": "/tmp/mount_efs.sh" + }, + { + "type": "shell", + "inline": [ + "sudo mv /tmp/mount_efs.sh /usr/bin", + "sudo chmod u+x /usr/bin/mount_efs.sh" + ] + }, + { + "type": "file", + "source": "{{template_dir}}/copy_map_efs.sh", + "destination": "/tmp/copy_map_efs.sh" + }, + { + "type": "shell", + "inline": [ + "sudo mv /tmp/copy_map_efs.sh /usr/bin", + "sudo chmod u+x /usr/bin/copy_map_efs.sh" + ] + }, + { + "type": "shell", + "script": "{{template_dir}}/test.sh" + } + ] +} diff --git a/modules/ami2/nomad-consul-docker-ecr.json b/modules/ami2/nomad-consul-docker-ecr.json index b1602a4..fbf05d4 100644 --- a/modules/ami2/nomad-consul-docker-ecr.json +++ b/modules/ami2/nomad-consul-docker-ecr.json @@ -1,9 +1,9 @@ { "min_packer_version": "1.5.1", "variables": { - "nomad_version": "0.9.0", - "consul_module_version": "v0.4.4", - "consul_version": "1.4.4", + "nomad_version": "0.10.2", + "consul_module_version": "v0.7.3", + "consul_version": "1.6.2", "aws_account_ids": "" }, "builders": [ diff --git a/modules/ami2/run-nomad/run-nomad b/modules/ami2/run-nomad/run-nomad index e0ed1cd..d45e28d 100755 --- a/modules/ami2/run-nomad/run-nomad +++ b/modules/ami2/run-nomad/run-nomad @@ -4,7 +4,6 @@ set -e readonly NOMAD_CONFIG_FILE="default.hcl" -readonly SUPERVISOR_CONFIG_PATH="/etc/supervisor/conf.d/run-nomad.conf" readonly EC2_INSTANCE_METADATA_URL="http://169.254.169.254/latest/meta-data" readonly EC2_INSTANCE_DYNAMIC_DATA_URL="http://169.254.169.254/latest/dynamic" @@ -200,35 +199,6 @@ EOF chown "$user:$user" "$config_path" } -# OUTDATED -function generate_supervisor_config { - local readonly supervisor_config_path="$1" - local readonly nomad_config_dir="$2" - local readonly nomad_data_dir="$3" - local readonly nomad_bin_dir="$4" - local readonly nomad_log_dir="$5" - local readonly nomad_user="$6" - local readonly use_sudo="$7" - - if [[ "$use_sudo" == "true" ]]; then - log_info "The --use-sudo flag is set, so running Nomad as the root user" - nomad_user="root" - fi - - log_info "Creating Supervisor config file to run Nomad in $supervisor_config_path" - cat > "$supervisor_config_path" <