Skip to content

Commit

Permalink
test: integration test for tls slurm collector
Browse files Browse the repository at this point in the history
  • Loading branch information
raghuvar-vijay committed Jan 23, 2025
1 parent 2601e5b commit 6b0bc30
Show file tree
Hide file tree
Showing 5 changed files with 436 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/slurm-collector.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,7 @@ jobs:

- name: Test Slurm collector
run: SKIP_COMPILATION=true ./scripts/test_slurm_collector.sh

- name: Test tls Slurm collector
run: SKIP_COMPILATION=true ./scripts/test_tls_slurm_collector.sh

56 changes: 56 additions & 0 deletions containers/docker-centos7-slurm/collector_tls_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
addr: "host.docker.internal"
port: 8000
record_prefix: "slurm"
job_filter:
status: # A list of acceptable job statuses
- "completed"
sacct_frequency: 2 # in seconds
sender_frequency: 1 # in seconds
sites:
- name: "SiteA"
only_if:
key: "Partition"
matches: "^part1$"
- name: "SiteB"
only_if:
key: "Partition"
matches: "^part2$"
meta:
- name: Comment
key: "Comment"
key_type: Json
components:
- name: "Cores"
key: "NCPUS"
scores:
- name: "HEPSPEC06"
value: 1.1
only_if:
key: "Partition"
matches: "^part1$"
- name: "HEPSPEC06"
value: 1.2
only_if:
key: "Partition"
matches: "^part2$"
- name: "SystemCPU"
key: "SystemCPU"
key_type: Time
only_if:
key: "Partition"
matches: "^part1$"
- name: "UserCPU"
key: "UserCPU"
key_type: Time
only_if:
key: "Partition"
matches: "^part1$"
- name: "Memory"
key: "ReqMem"
key_type: IntegerMega
tls_config:
use_tls: true
ca_cert_path: "/client_certs/rootCA.pem"
client_cert_path: "/client_certs/client-cert.pem"
client_key_path: "/client_certs/client-key.pem"

21 changes: 21 additions & 0 deletions containers/docker-centos7-slurm/tls_auditor_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
application:
port: 8000
database:
host: "localhost"
port: 5432
username: "postgres"
password: "password"
database_name: "auditor"
metrics:
database:
frequency: 30
metrics:
- RecordCount
- RecordCountPerSite
- RecordCountPerGroup
- RecordCountPerUser
tls_config:
use_tls: true
ca_cert_path: "/server_certs/rootCA.pem"
server_cert_path: "/server_certs/server-cert.pem"
server_key_path: "/server_certs/server-key.pem"
86 changes: 84 additions & 2 deletions scripts/test_slurm_collector.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,28 @@ function start_container() {
--project-directory=$DOCKER_PROJECT_DIR \
--project-name="$COMPOSE_PROJECT_NAME" \
cp ./containers/docker-centos7-slurm/collector_config.yaml slurm:/collector_config.yaml
# Copy tls_config for collector
docker compose \
--file $DOCKER_COMPOSE_FILE \
--project-directory=$DOCKER_PROJECT_DIR \
--project-name="$COMPOSE_PROJECT_NAME" \
cp ./containers/docker-centos7-slurm/collector_tls_config.yaml slurm:/collector_tls_config.yaml

# Copy config for auditor
docker compose \
--file $DOCKER_COMPOSE_FILE \
--project-directory=$DOCKER_PROJECT_DIR \
--project-name="$COMPOSE_PROJECT_NAME" \
cp ./containers/docker-centos7-slurm/tls_auditor_config.yaml slurm:/tls_auditor_config.yaml

# Copy client tls certs
docker compose \
--file $DOCKER_COMPOSE_FILE \
--project-directory=$DOCKER_PROJECT_DIR \
--project-name="$COMPOSE_PROJECT_NAME" \
cp ./scripts/certs slurm:/client_certs


# Copy basic batch script
docker compose \
--file $DOCKER_COMPOSE_FILE \
Expand Down Expand Up @@ -125,6 +147,32 @@ function start_auditor() {
done
}

function start_tls_auditor() {
if [[ -z "${SKIP_COMPILATION}" ]]
then
compile_auditor
fi
if [ "$RELEASE_MODE" = true ]; then
AUDITOR_APPLICATION__ADDR=0.0.0.0 AUDITOR_DATABASE__DATABASE_NAME=$DB_NAME ./target/release/auditor auditor/configuration/tls_config.yaml &
else
AUDITOR_APPLICATION__ADDR=0.0.0.0 AUDITOR_DATABASE__DATABASE_NAME=$DB_NAME ./target/debug/auditor auditor/configuration/tls_config.yaml &
fi
AUDITOR_SERVER_PID=$!
COUNTER=0
until curl http://localhost:8000/health_check; do
>&2 echo "Auditor is still unavailable - sleeping"
let COUNTER=COUNTER+1
if [ "$COUNTER" -gt "30" ]; then
echo >&2 "Auditor did not come up in time."
stop_auditor $AUDITOR_SERVER_PID
echo >&2 "Exiting."
exit 1
fi
sleep 1
done
}


function start_slurm_collector() {
if [[ -z "${SKIP_COMPILATION}" ]]
then
Expand All @@ -133,16 +181,30 @@ function start_slurm_collector() {
docker exec "${COMPOSE_PROJECT_NAME}-slurm-1" /auditor-slurm-collector /collector_config.yaml &
}

function start_tls_slurm_collector() {
if [[ -z "${SKIP_COMPILATION}" ]]
then
compile_collector
fi
docker exec "${COMPOSE_PROJECT_NAME}-slurm-1" /auditor-slurm-collector collector_tls_config.yaml &
}



function stop_auditor() {
echo >&2 "Stopping Auditor"
kill $AUDITOR_SERVER_PID
}

function test_collector() {
function submit_test_job_1() {
# Run on partition1
docker exec "${COMPOSE_PROJECT_NAME}-slurm-1" sh -c "sbatch --job-name=test_part1 --partition=part1 --comment=\"$COMMENT\" /batch.sh"
sleep 20

}

function test_collector_1() {

TEST1=$(curl -X GET http://localhost:8000/records | jq)

if [ "$(echo $TEST1 | jq '. | length')" != 1 ]
Expand Down Expand Up @@ -189,9 +251,15 @@ function test_collector() {
exit 1
fi

}

function submit_test_job_2(){
# Run on partition2
docker exec "${COMPOSE_PROJECT_NAME}-slurm-1" sh -c "sbatch --job-name=test_part2 --partition=part2 /batch.sh"
sleep 20
}

function test_collector_2(){

TEST2=$(curl -X GET http://localhost:8000/records | jq)

Expand Down Expand Up @@ -236,11 +304,25 @@ if [[ -z "${SKIP_COMPILATION}" ]]
then
compile_collector
fi

start_container
start_auditor
start_slurm_collector

test_collector
submit_test_job_1
test_collector_1
submit_test_job_2
test_collector_2

stop_container
stop_auditor

# Testing TLS setup
start_container
start_tls_auditor
start_tls_slurm_collector

test_collector_2

stop_container
stop_auditor
Expand Down
Loading

0 comments on commit 6b0bc30

Please sign in to comment.