Skip to content
This repository has been archived by the owner on Mar 30, 2023. It is now read-only.

Commit

Permalink
Merge pull request #41 from RSE-Cambridge/rsync-fixes
Browse files Browse the repository at this point in the history
Rsync fixes
  • Loading branch information
JohnGarbutt authored Jan 3, 2019
2 parents 663f065 + 8e1f08c commit 90b482f
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 32 deletions.
11 changes: 1 addition & 10 deletions dac-ansible/roles/data-acc/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
---
- name: check for installation of data-acc
become: yes
stat:
path: '{{data_acc_install_dir}}/{{data_acc_name}}/bin'
changed_when: false
register: data_acc_binary_dir

- when: not data_acc_binary_dir.stat.exists
block:
- block:
- name: download data_acc
become: yes
become_user: root
Expand All @@ -33,7 +25,6 @@
remote_src: yes
src: /tmp/{{data_acc_tgz}}
dest: '{{data_acc_install_dir}}/{{data_acc_name}}'
creates: '{{data_acc_install_dir}}/{{data_acc_name}}/bin'
always:
- name: delete archive
become: yes
Expand Down
1 change: 0 additions & 1 deletion docker-slurm/burst_buffer.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
AllowUsers=root,slurm
Flags=EnablePersistent,PrivateData

StageInTimeout=3600
Expand Down
2 changes: 0 additions & 2 deletions internal/pkg/dacctl/persistent.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,6 @@ func CreateVolumesAndJobs(volReg registry.VolumeRegistry, poolRegistry registry.
JobVolume: volume.Name, // Even though its a persistent buffer, we add it here to ensure we delete buffer
Paths: make(map[string]string),
}
job.Paths[fmt.Sprintf("DW_PERSISTENT_STRIPED_%s", volume.Name)] = fmt.Sprintf(
"/mnt/dac/job/%s/multijob/%s", job.Name, volume.Name)

err = volReg.AddJob(job)
if err != nil {
Expand Down
41 changes: 34 additions & 7 deletions internal/pkg/pfsprovider/ansible/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,36 @@ import (
"fmt"
"github.com/RSE-Cambridge/data-acc/internal/pkg/registry"
"log"
"path"
"strings"
)

func processDataCopy(volume registry.Volume, request registry.DataCopyRequest) error {
cmd, err := generateDataCopyCmd(volume, request)
if err != nil {
return err
}
if cmd == "" {
log.Println("No files to copy for:", volume.Name)
return nil
}

log.Printf("Doing copy: %s", cmd)

log.Printf("FAKE copy: %s", cmd)
return nil
// Make sure global dir is setup correctly
// TODO: share code with mount better
// TODO: Probably should all get setup in fs-ansible really!!
mountDir := fmt.Sprintf("/mnt/lustre/%s", volume.UUID)
sharedDir := path.Join(mountDir, "/global")
if err := mkdir("localhost", sharedDir); err != nil {
return err
}
if err := fixUpOwnership("localhost", volume.Owner, volume.Group, sharedDir); err != nil {
return err
}

// Do the copy
return runner.Execute("localhost", cmd)
}

func generateDataCopyCmd(volume registry.Volume, request registry.DataCopyRequest) (string, error) {
Expand All @@ -22,24 +42,31 @@ func generateDataCopyCmd(volume registry.Volume, request registry.DataCopyReques
return "", err
}

cmd := fmt.Sprintf("sudo su `getent passwd %d | cut -d: -f1` %s", volume.Owner, rsync)
cmd := fmt.Sprintf("sudo -g '#%d' -u '#%d' %s", volume.Group, volume.Owner, rsync)
dacHostBufferPath := fmt.Sprintf("/mnt/lustre/%s/global", volume.UUID)
cmd = fmt.Sprintf("bash -c \"export DW_JOB_STRIPED='%s' && %s\"", dacHostBufferPath, cmd)
return cmd, nil
}

func generateRsyncCmd(volume registry.Volume, request registry.DataCopyRequest) (string, error) {
if request.Source == "" && request.Destination == "" {
log.Println("No files to copy for:", volume.Name)
return "", nil
}

var flags string
if request.SourceType == registry.Directory {
flags = "-r "
flags = "-r -ospgu --stats"
} else if request.SourceType == registry.File {
flags = ""
flags = "-ospgu --stats"
} else {
return "", fmt.Errorf("unsupported source type %s for volume: %s", request.SourceType, volume.Name)
}

return fmt.Sprintf("rsync %s%s %s", flags, request.Source, request.Destination), nil
return fmt.Sprintf("rsync %s %s %s", flags,
escapePath(request.Source),
escapePath(request.Destination)), nil
}

func escapePath(path string) string {
return strings.Replace(path, "$DW_JOB_STRIPED", "\\$DW_JOB_STRIPED", 1)
}
10 changes: 6 additions & 4 deletions internal/pkg/pfsprovider/ansible/copy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ func Test_GenerateDataCopy(t *testing.T) {
testVolume := registry.Volume{
Name: registry.VolumeName("asdf"),
Owner: 1001,
Group: 1002,
UUID: "fsuuid",
}
request := registry.DataCopyRequest{}

Expand All @@ -18,11 +20,11 @@ func Test_GenerateDataCopy(t *testing.T) {
assert.Empty(t, cmd)

request.SourceType = registry.File
request.Source = "source"
request.Source = "$DW_JOB_STRIPED/source"
request.Destination = "dest"
cmd, err = generateDataCopyCmd(testVolume, request)
assert.Nil(t, err)
assert.Equal(t, "sudo su `getent passwd 1001 | cut -d: -f1` rsync source dest", cmd)
assert.Equal(t, "bash -c \"export DW_JOB_STRIPED='/mnt/lustre/fsuuid/global' && sudo -g '#1002' -u '#1001' rsync -ospgu --stats \\$DW_JOB_STRIPED/source dest\"", cmd)

request.SourceType = registry.List
request.Source = "list_filename"
Expand All @@ -47,14 +49,14 @@ func Test_GenerateRsyncCmd(t *testing.T) {
request.Destination = "dest"
cmd, err = generateRsyncCmd(testVolume, request)
assert.Nil(t, err)
assert.Equal(t, "rsync source dest", cmd)
assert.Equal(t, "rsync -ospgu --stats source dest", cmd)

request.SourceType = registry.Directory
request.Source = "source"
request.Destination = "dest"
cmd, err = generateRsyncCmd(testVolume, request)
assert.Nil(t, err)
assert.Equal(t, "rsync -r source dest", cmd)
assert.Equal(t, "rsync -r -ospgu --stats source dest", cmd)

request.SourceType = registry.List
request.Source = "list_filename"
Expand Down
4 changes: 2 additions & 2 deletions tools/dac-reset.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ set +a
/usr/local/bin/etcdctl --key /etc/data-acc/pki/`hostname`.dac.hpc.cam.ac.uk-key.pem --cert /etc/data-acc/pki/`hostname`.dac.hpc.cam.ac.uk.pem --cacert /etc/data-acc/pki/ca.pem del --prefix ''

# Kill all lustre filesystems
ssh slurm-cpu1 sudo umount -atl lustre
ssh slurm-cpu2 sudo umount -atl lustre
#ssh slurm-cpu1 sudo umount -atl lustre
#ssh slurm-cpu2 sudo umount -atl lustre
ssh dac1 sudo umount -at lustre
ssh dac2 sudo umount -at lustre
ssh dac3 sudo umount -at lustre
Expand Down
21 changes: 15 additions & 6 deletions tools/slurm-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ echo "#!/bin/bash
#DW jobdw capacity=2TB access_mode=striped,private type=scratch
#DW persistentdw name=mytestbuffer
#DW swap 5MB
#DW stage_in source=/global/cscratch1/filename1 destination=\$DW_JOB_STRIPED/filename1 type=file
#DW stage_out source=\$DW_JOB_STRIPED/outdir destination=/global/scratch1/outdir type=directory
#DW stage_in source=/usr/local/bin/dacd destination=\$DW_JOB_STRIPED/filename1 type=file
#DW stage_out source=\$DW_JOB_STRIPED/outdir destination=/tmp type=directory
env
df -h
swapon
mkdir \$DW_JOB_STRIPED/outdir
df -h > \$DW_JOB_STRIPED/outdir/dfoutput
ls -al \$DW_JOB_STRIPED > \$DW_JOB_STRIPED/outdir/lsoutput
echo \$HOSTNAME
" > use-persistent.sh

Expand Down Expand Up @@ -45,11 +51,14 @@ scontrol show burstbuffer
squeue

echo "***Use persistent buffer***"
adduser centos
cat use-persistent.sh
su slurm -c 'sbatch use-persistent.sh'
su slurm -c 'sbatch use-persistent.sh'
su slurm -c 'sbatch use-persistent.sh'
su slurm -c 'sbatch use-persistent.sh'
su centos -c 'sbatch use-persistent.sh'
su centos -c 'sbatch use-persistent.sh'
su centos -c 'sbatch use-persistent.sh'
su centos -c 'sbatch use-persistent.sh'
su centos -c 'sbatch --array=1-10 test-persistent.sh'

squeue

sleep $SLEEP_INTERVAL
Expand Down

0 comments on commit 90b482f

Please sign in to comment.