Skip to content

Commit

Permalink
Issue #1295: Enable slurm telemetry execution without awx
Browse files Browse the repository at this point in the history
Signed-off-by: DeepikaKrishnaiah <[email protected]>
  • Loading branch information
DeepikaKrishnaiah committed Jun 30, 2022
2 parents 14695ef + 3e5cd61 commit cf2034d
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 54 deletions.
3 changes: 2 additions & 1 deletion .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
"review",
"maintenance",
"projectManagement",
"mentoring"
"mentoring",
"talk"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Our thanks go to everyone who makes Omnia possible ([emoji key](https://allcontr
<tr>
<td align="center"><a href="http://johnlockman.com"><img src="https://avatars.githubusercontent.com/u/912987?v=4?s=100" width="100px;" alt=""/><br /><sub><b>John Lockman</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=j0hnL" title="Tests">⚠️</a> <a href="https://github.com/dellhpc/omnia/commits?author=j0hnL" title="Code">💻</a> <a href="#blog-j0hnL" title="Blogposts">📝</a> <a href="#ideas-j0hnL" title="Ideas, Planning, & Feedback">🤔</a> <a href="#maintenance-j0hnL" title="Maintenance">🚧</a> <a href="#mentoring-j0hnL" title="Mentoring">🧑‍🏫</a> <a href="#design-j0hnL" title="Design">🎨</a> <a href="https://github.com/dellhpc/omnia/pulls?q=is%3Apr+reviewed-by%3Aj0hnL" title="Reviewed Pull Requests">👀</a> <a href="#talk-j0hnL" title="Talks">📢</a> <a href="https://github.com/dellhpc/omnia/issues?q=author%3Aj0hnL" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/lwilson"><img src="https://avatars.githubusercontent.com/u/1236922?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Lucas A. Wilson</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=lwilson" title="Code">💻</a> <a href="#design-lwilson" title="Design">🎨</a> <a href="#maintenance-lwilson" title="Maintenance">🚧</a> <a href="#ideas-lwilson" title="Ideas, Planning, & Feedback">🤔</a> <a href="#blog-lwilson" title="Blogposts">📝</a> <a href="https://github.com/dellhpc/omnia/commits?author=lwilson" title="Documentation">📖</a> <a href="#mentoring-lwilson" title="Mentoring">🧑‍🏫</a> <a href="#projectManagement-lwilson" title="Project Management">📆</a> <a href="https://github.com/dellhpc/omnia/pulls?q=is%3Apr+reviewed-by%3Alwilson" title="Reviewed Pull Requests">👀</a> <a href="#talk-lwilson" title="Talks">📢</a> <a href="https://github.com/dellhpc/omnia/issues?q=author%3Alwilson" title="Bug reports">🐛</a></td>
<td align="center"><a href="https://github.com/sujit-jadhav"><img src="https://avatars.githubusercontent.com/u/73123831?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Sujit Jadhav</b></sub></a><br /><a href="#ideas-sujit-jadhav" title="Ideas, Planning, & Feedback">🤔</a> <a href="https://github.com/dellhpc/omnia/commits?author=sujit-jadhav" title="Documentation">📖</a> <a href="https://github.com/dellhpc/omnia/commits?author=sujit-jadhav" title="Code">💻</a> <a href="https://github.com/dellhpc/omnia/pulls?q=is%3Apr+reviewed-by%3Asujit-jadhav" title="Reviewed Pull Requests">👀</a> <a href="#maintenance-sujit-jadhav" title="Maintenance">🚧</a> <a href="#projectManagement-sujit-jadhav" title="Project Management">📆</a> <a href="#mentoring-sujit-jadhav" title="Mentoring">🧑‍🏫</a></td>
<td align="center"><a href="https://github.com/sujit-jadhav"><img src="https://avatars.githubusercontent.com/u/73123831?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Sujit Jadhav</b></sub></a><br /><a href="#ideas-sujit-jadhav" title="Ideas, Planning, & Feedback">🤔</a> <a href="https://github.com/dellhpc/omnia/commits?author=sujit-jadhav" title="Documentation">📖</a> <a href="https://github.com/dellhpc/omnia/commits?author=sujit-jadhav" title="Code">💻</a> <a href="https://github.com/dellhpc/omnia/pulls?q=is%3Apr+reviewed-by%3Asujit-jadhav" title="Reviewed Pull Requests">👀</a> <a href="#maintenance-sujit-jadhav" title="Maintenance">🚧</a> <a href="#projectManagement-sujit-jadhav" title="Project Management">📆</a> <a href="#mentoring-sujit-jadhav" title="Mentoring">🧑‍🏫</a> <a href="#talk-sujit-jadhav" title="Talks">📢</a></td>
<td align="center"><a href="https://github.com/DeepikaKrishnaiah"><img src="https://avatars.githubusercontent.com/u/73213880?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Deepika K</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=DeepikaKrishnaiah" title="Code">💻</a> <a href="https://github.com/dellhpc/omnia/commits?author=DeepikaKrishnaiah" title="Tests">⚠️</a> <a href="https://github.com/dellhpc/omnia/issues?q=author%3ADeepikaKrishnaiah" title="Bug reports">🐛</a> <a href="#security-DeepikaKrishnaiah" title="Security">🛡️</a> <a href="#talk-DeepikaKrishnaiah" title="Talks">📢</a></td>
<td align="center"><a href="https://github.com/sakshiarora13"><img src="https://avatars.githubusercontent.com/u/73195862?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Sakshi Arora</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=sakshiarora13" title="Code">💻</a> <a href="https://github.com/dellhpc/omnia/issues?q=author%3Asakshiarora13" title="Bug reports">🐛</a> <a href="#talk-sakshiarora13" title="Talks">📢</a></td>
<td align="center"><a href="https://github.com/abhishek-sa1"><img src="https://avatars.githubusercontent.com/u/94038029?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Abhishek SA</b></sub></a><br /><a href="https://github.com/dellhpc/omnia/commits?author=abhishek-sa1" title="Code">💻</a> <a href="https://github.com/dellhpc/omnia/issues?q=author%3Aabhishek-sa1" title="Bug reports">🐛</a> <a href="https://github.com/dellhpc/omnia/commits?author=abhishek-sa1" title="Documentation">📖</a> <a href="https://github.com/dellhpc/omnia/commits?author=abhishek-sa1" title="Tests">⚠️</a> <a href="#maintenance-abhishek-sa1" title="Maintenance">🚧</a> <a href="#talk-abhishek-sa1" title="Talks">📢</a></td>
Expand Down
25 changes: 25 additions & 0 deletions telemetry/roles/common/tasks/validate_base_vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,28 @@
that: mysqldb_name | length > 1
success_msg: "{{ mysqldb_success_msg }}"
fail_msg: "{{ mysqldb_fail_msg }}"

- name: Verify Inventory file based on awx_web_support
block:
- name: Verify Inventory file when awx_web_support is false
fail:
msg: "{{ inventory_absent_message }}"
when:
- groups['manager'] is not defined
- groups['compute'] is not defined

- name: Verify manager group in inventory when awx_web_support is false
assert:
that: "groups['manager'] | length | int == 1"
fail_msg: "{{ telemetry_manager_group_fail_msg }}"
success_msg: "{{ telemetry_manager_group_success_msg }}"

- name: Verify compute group in inventory when awx_web_support is false
assert:
that: "groups['compute'] | length | int >= 1"
fail_msg: "{{ telemetry_compute_group_fail_msg }}"
success_msg: "{{ telemetry_compute_group_success_msg }}"
when:
- not awx_web_support
- slurm_telemetry_support
any_errors_fatal: true
8 changes: 7 additions & 1 deletion telemetry/roles/common/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ timescaledb_fail_msg: "Timescale DB name should have minimum length of 2"
mysqldb_success_msg: "MySQL DB name validated successfully"
mysqldb_fail_msg: "MySQL DB name should have minimum length of 2"

telemetry_manager_group_fail_msg: "Failed. manager group should contain only 1 node in inventory for slurm telemetry"
telemetry_manager_group_success_msg: "Manager group validated in inventory for slurm telemetry"
telemetry_compute_group_fail_msg: "compute group should contain atleast 1 node in inventory for slurm telemetry"
telemetry_compute_group_success_msg: "Compute group validated in inventory for slurm telemetry"
inventory_absent_message: "Failed. Inventory file absent for slurm telemetry / manager or compute group not present in inventory"

# Usage: validate_login_vars.yml
vault_filename: "{{ role_path }}/../../input_params/.login_vault_key"
login_vars_fail_msg: "Username/password validation in telemetry_login_vars.yml failed. Please check the requirements."
Expand All @@ -77,4 +83,4 @@ secrets_name: credentials

# Usage: set_docker_os.yml
docker_rocky_os: "8.5"
slurm_telemetry_dockerfile_path: "{{ playbook_dir }}/roles/slurm_telemetry/files/Dockerfile"
slurm_telemetry_dockerfile_path: "{{ playbook_dir }}/roles/slurm_telemetry/files/Dockerfile"
109 changes: 58 additions & 51 deletions telemetry/roles/slurm_telemetry/tasks/get_node_inventory.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,65 +32,72 @@
state: present
executable: pip3

- name: Get AWX service IP
command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.clusterIP}'
changed_when: false
failed_when: false
register: awx_svc_ip
- name: Check the awx status on control plane
include_vars: "{{ playbook_dir }}/../control_plane/input_params/base_vars.yml"
no_log: true

- name: AWX needs to be installed
fail:
msg: "{{ awx_fail_msg }}"
when: not awx_svc_ip.stdout
- name: Get awx inventory details when awx is installed on control plane
block:
- name: Get AWX service IP
command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.clusterIP}'
changed_when: false
failed_when: false
register: awx_svc_ip

- name: Get AWX service port
command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.ports[0].port}'
changed_when: false
register: awx_svc_port
- name: AWX needs to be installed
fail:
msg: "{{ awx_fail_msg }}"
when: not awx_svc_ip.stdout

- name: Get AWX secret
shell: >
set -o pipefail && \
kubectl get secret awx-admin-password -n {{ awx_namespace }} -o jsonpath="{.data.password}" | base64 --decode
changed_when: false
register: awx_secret
- name: Get AWX service port
command: kubectl get svc awx-ui -n {{ awx_namespace }} -o=jsonpath='{.spec.ports[0].port}'
changed_when: false
register: awx_svc_port

- name: Get node_inventory id
shell: >
set -o pipefail && \
awx --conf.host http://{{ awx_svc_ip.stdout }}:{{ awx_svc_port.stdout }} --conf.username {{ awx_username }} \
--conf.password {{ awx_secret.stdout }} --conf.insecure inventory list -f human | grep node_inventory
changed_when: false
register: inventory_id
- name: Get AWX secret
shell: >
set -o pipefail && \
kubectl get secret awx-admin-password -n {{ awx_namespace }} -o jsonpath="{.data.password}" | base64 --decode
changed_when: false
register: awx_secret

- name: Node inventory not found in AWX
fail:
msg: "{{ node_inventory_fail_msg }}"
when: not inventory_id.stdout
- name: Get node_inventory id
shell: >
set -o pipefail && \
awx --conf.host http://{{ awx_svc_ip.stdout }}:{{ awx_svc_port.stdout }} --conf.username {{ awx_username }} \
--conf.password {{ awx_secret.stdout }} --conf.insecure inventory list -f human | grep node_inventory
changed_when: false
register: inventory_id

- name: Get node_inventory
command: awx --conf.host http://{{ awx_svc_ip.stdout }}:{{ awx_svc_port.stdout }} --conf.username {{ awx_username }} \
--conf.password {{ awx_secret.stdout }} --conf.insecure hosts list --inventory {{ inventory_id.stdout[0] }}
changed_when: false
register: node_inventory_output
- name: Node inventory not found in AWX
fail:
msg: "{{ node_inventory_fail_msg }}"
when: not inventory_id.stdout

- name: Save the json data
set_fact:
node_inventory_jsondata: "{{ node_inventory_output.stdout | from_json }}"
- name: Get node_inventory
command: awx --conf.host http://{{ awx_svc_ip.stdout }}:{{ awx_svc_port.stdout }} --conf.username {{ awx_username }} \
--conf.password {{ awx_secret.stdout }} --conf.insecure hosts list --inventory {{ inventory_id.stdout[0] }}
changed_when: false
register: node_inventory_output

- name: Add temporary hosts
add_host:
name: "{{ node_inventory_jsondata['results'][node_index].name }}"
groups: "{{ node_inventory_jsondata['results'][node_index].summary_fields.groups.results[0].name }}"
ansible_user: "{{ os_username }}"
ansible_password: "{{ provision_password }}"
ansible_become_pass: "{{ provision_password }}"
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
with_items: "{{ node_inventory_jsondata['results'] }}"
loop_control:
index_var: node_index
when: node_inventory_jsondata['results'][node_index].summary_fields.groups.count > 0
no_log: true
- name: Save the json data
set_fact:
node_inventory_jsondata: "{{ node_inventory_output.stdout | from_json }}"

- name: Add temporary hosts
add_host:
name: "{{ node_inventory_jsondata['results'][node_index].name }}"
groups: "{{ node_inventory_jsondata['results'][node_index].summary_fields.groups.results[0].name }}"
ansible_user: "{{ os_username }}"
ansible_password: "{{ provision_password }}"
ansible_become_pass: "{{ provision_password }}"
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
with_items: "{{ node_inventory_jsondata['results'] }}"
loop_control:
index_var: node_index
when: node_inventory_jsondata['results'][node_index].summary_fields.groups.count > 0
no_log: true
when: awx_web_support

- name: Copy input_config file
copy:
Expand Down

0 comments on commit cf2034d

Please sign in to comment.