Skip to content

Commit

Permalink
Merge pull request #93 from cyclinder/mccl
Browse files Browse the repository at this point in the history
support mccl test
  • Loading branch information
weizhoublue authored Jan 16, 2025
2 parents a7a4351 + ba66f55 commit 46f5afa
Show file tree
Hide file tree
Showing 14 changed files with 735 additions and 512 deletions.
4 changes: 2 additions & 2 deletions rdma-tools/chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ home: "https://spidernet-io.github.io/charts"
# application or library
type: application
# no need to modify this version , CI will auto update it with /VERSION
version: 12.5.3
version: 12.5.4
# This field is informational, and has no impact on chart version calculations .
# Leaving it unquoted can lead to parsing issues in some cases
# no need to modify this version , CI will auto update it with /VERSION
appVersion: "12.5.3"
appVersion: "12.5.4"
kubeVersion: ">= 1.16.0-0"
description: rdma test
sources:
Expand Down
10 changes: 5 additions & 5 deletions rdma-tools/chart/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ spec:
volumeMounts:
- mountPath: /dev/shm
name: dshm
{{- if .Values.extraVolumes }}
{{- include "tplvalues.render" ( dict "value" .Values.extraVolumes "context" $ ) | nindent 12 }}
{{- if .Values.extraVolumeMounts }}
{{- include "tplvalues.render" ( dict "value" .Values.extraVolumeMounts "context" $ ) | nindent 12 }}
{{- end }}
volumes:
- emptyDir:
medium: Memory
name: dshm
{{- if .Values.extraVolumeMounts }}
{{- include "tplvalues.render" ( dict "value" .Values.extraVolumeMounts "context" $ ) | nindent 6 }}
{{- end }}
{{- if .Values.extraVolumes }}
{{- include "tplvalues.render" ( dict "value" .Values.extraVolumes "context" $ ) | nindent 8 }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "project.name" . | trunc 63 | trimSuffix "-" | quote }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ include "project.name" . | trunc 63 | trimSuffix "-" | quote }}
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
verbs:
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "project.name" . | trunc 63 | trimSuffix "-" | quote }}
Expand Down
16 changes: 0 additions & 16 deletions rdma-tools/chart/templates/role.yaml

This file was deleted.

94 changes: 49 additions & 45 deletions rdma-tools/image/install-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ set -o errexit
set -o pipefail
set -o nounset

InstallNccl(){
InstallNccl() {
echo " install nccl"

cd /tmp
Expand All @@ -21,23 +21,23 @@ InstallNccl(){
apt install --allow-change-held-packages -y libnccl2 libnccl-dev
rm * -rf || true

echo "ulimit -l 2000000" >> /etc/bash.bashrc
echo "* soft memlock unlimited" >> /etc/security/limits.conf
echo "* hard memlock unlimited" >> /etc/security/limits.conf
echo "ulimit -l 2000000" >>/etc/bash.bashrc
echo "* soft memlock unlimited" >>/etc/security/limits.conf
echo "* hard memlock unlimited" >>/etc/security/limits.conf
}

InstallSSH(){
InstallSSH() {
# for mpirun
mkdir /root/.ssh
ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -N ""
cat ~/.ssh/id_ed25519.pub >> ~/.ssh/authorized_keys
cat ~/.ssh/id_ed25519.pub >>~/.ssh/authorized_keys

sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config
echo " UserKnownHostsFile /dev/null" >>/etc/ssh/ssh_config
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
}

InstallOfedRepo(){
InstallOfedRepo() {
# required by perftest
echo " install ofed lib"
# Mellanox OFED (latest)
Expand All @@ -46,52 +46,53 @@ InstallOfedRepo(){
wget ${ENV_DOWNLOAD_OFED_DEB_SOURCE}
apt-get update

for ITEM in "infiniband-diags" "rdmacm-utils" "ibverbs-utils" ; do
VERSION=$( apt-cache show ${ITEM} | grep Version | grep mlnx | awk '{print $2}' )
[ -n "${VERSION}" ] || { echo "error, failed to find mlnx version "; exit 1 ; }
apt-get install -y --no-install-recommends ${ITEM}=${VERSION}
for ITEM in "infiniband-diags" "rdmacm-utils" "ibverbs-utils"; do
VERSION=$(apt-cache show ${ITEM} | grep Version | grep mlnx | awk '{print $2}')
[ -n "${VERSION}" ] || {
echo "error, failed to find mlnx version "
exit 1
}
apt-get install -y --no-install-recommends ${ITEM}=${VERSION}
done

}

InstallEnv(){
InstallEnv() {
echo " install enviroment for hpc-x"
chmod +x /printpaths.sh
# HPC-X Environment variables
source /opt/hpcx/hpcx-init.sh
hpcx_load

# test
/printpaths.sh ENV

# Preserve environment variables in new login shells
alias install='install --owner=0 --group=0'
/printpaths.sh export \
| install --mode=644 /dev/stdin /etc/profile.d/hpcx-env.sh

# Preserve environment variables (except *PATH*) when sudoing
install -d --mode=0755 /etc/sudoers.d
/printpaths.sh \
| sed -E -e '{ s:^([^=]+)=.*$:\1:g ; /PATH/d ; s:^.*$:Defaults env_keep += "\0":g }' \
| install --mode=440 /dev/stdin /etc/sudoers.d/hpcx-env

# Register shared libraries with ld regardless of LD_LIBRARY_PATH
echo $LD_LIBRARY_PATH | tr ':' '\n' \
| install --mode=644 /dev/stdin /etc/ld.so.conf.d/hpcx.conf

rm /printpaths.sh
ldconfig
chmod +x /printpaths.sh
# HPC-X Environment variables
source /opt/hpcx/hpcx-init.sh
hpcx_load

# test
/printpaths.sh ENV

# Preserve environment variables in new login shells
alias install='install --owner=0 --group=0'
/printpaths.sh export |
install --mode=644 /dev/stdin /etc/profile.d/hpcx-env.sh

# Preserve environment variables (except *PATH*) when sudoing
install -d --mode=0755 /etc/sudoers.d
/printpaths.sh |
sed -E -e '{ s:^([^=]+)=.*$:\1:g ; /PATH/d ; s:^.*$:Defaults env_keep += "\0":g }' |
install --mode=440 /dev/stdin /etc/sudoers.d/hpcx-env

# Register shared libraries with ld regardless of LD_LIBRARY_PATH
echo $LD_LIBRARY_PATH | tr ':' '\n' |
install --mode=644 /dev/stdin /etc/ld.so.conf.d/hpcx.conf

rm /printpaths.sh
ldconfig
}


InstallGdrCopy(){
echo "install gdrcopy library"
cd /buildGdrcopy
dpkg -i *.deb
rm -rf /buildGdrcopy
InstallGdrCopy() {
echo "install gdrcopy library"
cd /buildGdrcopy
dpkg -i *.deb
rm -rf /buildGdrcopy
}


packages=(
iproute2
smc-tools
Expand Down Expand Up @@ -123,6 +124,9 @@ packages=(
# ibdiagnet ibnetdiscover
ibutils2
ibdump
libelf1
libltdl7
libnuma1
)

export DEBIAN_FRONTEND=noninteractive
Expand Down
Loading

0 comments on commit 46f5afa

Please sign in to comment.