Skip to content

Commit

Permalink
Merge pull request #181 from xcp-ng/xcpng-fs-check
Browse files Browse the repository at this point in the history
Add a filesystem diff script
  • Loading branch information
stormi authored Dec 8, 2023
2 parents fc9cc8f + f62c38e commit 50a253c
Showing 1 changed file with 315 additions and 0 deletions.
315 changes: 315 additions & 0 deletions scripts/xcpng-fs-diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
#!/usr/bin/env python3

#
# Data structure format:
#
# The files to be checked are stored in Python dictionnaries with 4 main entries.
# - 'file' entry for standard files and the file md5 sum
# - 'file_symlink', valid symlinks to standard files and the file md5 sum
# - 'dir_symlink', valid symlinks to directory and the directory pointed at
# - 'broken_symlink', broken symlinks to directory and the file pointed at
#
# {
# "files": {
# "/opt/pbis/docs/pbis-open-installation-and-administration-guide.pdf": "7f9eaec58fd3422b79c88126cefdf503",
# "/opt/pbis/docs/pbis-quick-start-guide-for-linux.pdf": "8bbd04e4c73cedefeeb35ed74a8a3d4b",
# ...
# },
# "file_symlinks": {
# "/opt/pbis/lib/libldap-2.4.so.2": "a417e1e86cec9735a76059b72d8e1cbf",
# "/opt/pbis/lib/liblsaclient_ntlm.so.0": "9601bfc4827f0b9cff50cc591b2b6f11",
# ...
# },
# "dir_symlinks": {
# "/usr/tmp": "../var/tmp",
# "/usr/lib/kbd/keymaps/legacy/ppc": "mac",
# ...
# },
# "broken_symlinks": {
# "/usr/lib/firmware/a300_pfp.fw": "qcom/a300_pfp.fw",
# "/usr/lib/firmware/a300_pm4.fw": "qcom/a300_pm4.fw",
# ...
# }
# }
#

import argparse
import sys
import subprocess
import json
import tempfile
import os
from fnmatch import fnmatch
from enum import Enum

class FileType(Enum):
FILE = 0
FILE_SYMLINK = 1
DIR_SYMLINK = 2
BROKEN_SYMLINK = 3

def ignore_file(filename):
ignored_files = [
'/boot/initrd-*',
'/boot/grub/*',
'/boot/vmlinuz-fallback',
'/boot/xen-fallback.gz',
'/etc/chrony.conf',
'/etc/firstboot.d/data/default-storage.conf',
'/etc/firstboot.d/data/iqn.conf',
'/etc/fstab',
'/etc/group*',
'/etc/grub.cfg',
'/etc/gshadow*',
'/etc/hostname',
'/etc/iscsi/initiatorname.iscsi',
'/etc/issue',
'/etc/krb5.conf',
'/etc/lvm/backup/*',
'/etc/mtab',
'/etc/machine-id',
'/etc/passwd*',
'/etc/pki/ca-trust/extracted/java/cacerts',
'/etc/pki/java/cacerts',
'/etc/shadow*',
'/etc/ssh/ssh_host_*_key.pub',
'/etc/ssh/ssh_host_*_key',
'/etc/sysconfig/network',
'/etc/sysconfig/network-scripts/interface-rename-data/*',
'/etc/sysconfig/xencommons',
'/etc/vconsole.conf',
'/etc/xensource-inventory',
'/etc/xensource/boot_time_cpus',
'/etc/xensource/ptoken',
'/etc/xensource/xapi-ssl.pem',
'/opt/xensource/gpg/trustdb.gpg',
]

for i in ignored_files:
if fnmatch(filename, i):
return True

return False

def ssh_cmd(host, cmd):
args = ["ssh", "root@{}".format(host), cmd]

cmdres = subprocess.run(args, capture_output=True, text=True)
if cmdres.returncode:
raise Exception(cmdres.stderr)

return cmdres.stdout

def ssh_get_files(host, file_type, folders):
md5sum = False
readlink = False
folders = " ".join(folders)

match file_type:
case FileType.FILE:
find_type = "-type f"
md5sum = True
case FileType.FILE_SYMLINK:
find_type = "-type l -xtype f"
md5sum = True
case FileType.DIR_SYMLINK:
find_type = "-type l -xtype d"
readlink = True
case FileType.BROKEN_SYMLINK:
find_type = "-xtype l"
readlink = True
case _:
print("Unknown file type: ", file=sys.stderr)
return None

find_cmd = "find {} {}".format(folders, find_type)
if readlink:
find_cmd += " -exec readlink -n {} \\; -exec echo -n ' ' \\; -print"
elif md5sum:
# This will make one call to md5sum with all files passed as parameter
# This is much more efficient than using find '-exec md5sum {}'
find_cmd += " | xargs md5sum"

rawres = ssh_cmd(host, find_cmd)

res = dict()
for line in rawres.splitlines():
entry = line.split()
res[entry[1]] = entry[0]

return res

def get_files(host, folders):
ref_files = dict()

try:
ref_files['file'] = ssh_get_files(host, FileType.FILE, folders)
ref_files['file_symlink'] = ssh_get_files(host, FileType.FILE_SYMLINK, folders)
ref_files['dir_symlink'] = ssh_get_files(host, FileType.DIR_SYMLINK, folders)
ref_files['broken_symlink'] = ssh_get_files(host, FileType.BROKEN_SYMLINK, folders)
except Exception as e:
print(e, file=sys.stderr)
exit(-1)

return ref_files

def sftp_get(host, remote_file, local_file):
opts = '-o "StrictHostKeyChecking no" -o "LogLevel ERROR" -o "UserKnownHostsFile /dev/null"'

args = "sftp {} -b - root@{}".format(opts, host)
input = bytes("get {} {}".format(remote_file, local_file), 'utf-8')
res = subprocess.run(
args,
input=input,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=False
)

if res.returncode:
raise Exception("Failed to get file from host: {}".format(res.returncode))

return res

def remote_diff(host1, host2, filename):
try:
file1 = None
file2 = None

# check remote files are text files
args = ["ssh", "root@{}".format(host1), "file", "-b", filename]
cmd = "file -b {}".format(filename)
file_type = ssh_cmd(host1, cmd)
if not file_type.lower().startswith("ascii"):
print("Binary file. Not showing diff")
return

fd, file1 = tempfile.mkstemp()
os.close(fd)
sftp_get(host1, filename, file1)

fd, file2 = tempfile.mkstemp()
os.close(fd)
sftp_get(host2, filename, file2)

args = ["diff", "-u", file1, file2]
diff_res = subprocess.run(args, capture_output=True, text=True)

match diff_res.returncode:
case 1:
print(diff_res.stdout)
case 2:
raise Exception(diff_res.stderr)
case _:
pass

except Exception as e:
print(e, file=sys.stderr)
finally:
if file1 is not None and os.path.exists(file1):
os.remove(file1)
if file2 is not None and os.path.exists(file2):
os.remove(file2)

def compare_files(ref, test, show_diff):
ref_files = ref['files']
ref_host = ref['host']
test_files = test['files']
test_host = test['host']

for ftype in test_files:
for file in test_files[ftype]:
if ignore_file(file):
continue

if file not in ref_files[ftype]:
print("{} doesn't exist on reference host: {}".format(ftype, file))
continue

if ref_files[ftype][file] != test_files[ftype][file]:
print("{} differs: {}".format(ftype, file))
if show_diff:
remote_diff(ref_host, test_host, file)

ref_files[ftype][file] = None

# Check for files that only exist on the reference host
for ftype in ref_files:
for file, val in ref_files[ftype].items():
if ignore_file(file):
continue

if val is not None:
print("{} doesn't exist on tested host: {}".format(ftype, file))

# Load a previously saved json file containing a the reference files
def load_reference_files(filename):
try:
with open(filename, 'r') as fd:
return json.load(fd)
except Exception as e:
print("Error: {}".format(e), file=sys.stderr)
exit(-1)

# Save files from a reference host in json format
def save_reference_files(files, filename):
try:
with open(filename, 'w') as fd:
json.dump(files, fd, indent=4)
except Exception as e:
print("Error: {}".format(e), file=sys.stderr)
exit(-1)

def main():
ref_files = None
folders = ["/boot", "/etc", "/opt", "/usr"]

parser = argparse.ArgumentParser(description='Spot filesystem differences between 2 XCP-ng hosts')
parser.add_argument('--reference-host', '-r', dest='ref_host',
help='The XCP-ng host used as reference')
parser.add_argument('--test-host', '-t', dest='test_host',
help='The XCP-ng host to be tested after install or upgrade')
parser.add_argument('--save-reference', '-s', dest='save_ref',
help='Save filesystem information of the reference host to a file')
parser.add_argument('--load-reference', '-l', dest='load_ref',
help='Load reference filesystem information from a file')
parser.add_argument('--show-diff', '-d', action='store_true', dest='show_diff',
help='Show diff of text files that differ. A reference host must be supplied with -r')
parser.add_argument('--add-folder', '-f', action='append', dest='folders', default=folders,
help='Add folders to the default searched folders (/boot, /etc, /opt, and /usr). '
'Can be specified multiple times')
args = parser.parse_args(sys.argv[1:])

if args.ref_host is None and args.show_diff:
print("Missing parameters. -d must be used with -r. Try --help", file=sys.stderr)
return -1

if args.load_ref:
print("Get reference files from {}".format(args.load_ref))
ref_files = load_reference_files(args.load_ref)
elif args.ref_host:
print("Get reference files from {}".format(args.ref_host))
ref_files = get_files(args.ref_host, args.folders)

if args.save_ref:
print("Saving reference files to {}".format(args.save_ref))
save_reference_files(ref_files, args.save_ref)

if ref_files is None or args.test_host is None:
print("\nMissing parameters. Try --help", file=sys.stderr)
return -1

print("Get test host files from {}".format(args.test_host))
test_files = get_files(args.test_host, args.folders)

ref = dict([('files', ref_files), ('host', args.ref_host)])
test = dict([('files', test_files), ('host', args.test_host)])

print("\nResults:")
compare_files(ref, test, args.show_diff)

return 0

if __name__ == '__main__':
exit(main())

0 comments on commit 50a253c

Please sign in to comment.