Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a filesystem diff script #181

Merged
merged 3 commits into from
Dec 8, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
315 changes: 315 additions & 0 deletions scripts/xcpng-fs-diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
#!/usr/bin/env python3

#
# Data structure format:
#
# The files to be checked are stored in Python dictionnaries with 4 main entries.
# - 'file' entry for standard files and the file md5 sum
# - 'file_symlink', valid symlinks to standard files and the file md5 sum
# - 'dir_symlink', valid symlinks to directory and the directory pointed at
# - 'broken_symlink', broken symlinks to directory and the file pointed at
#
# {
# "files": {
# "/opt/pbis/docs/pbis-open-installation-and-administration-guide.pdf": "7f9eaec58fd3422b79c88126cefdf503",
# "/opt/pbis/docs/pbis-quick-start-guide-for-linux.pdf": "8bbd04e4c73cedefeeb35ed74a8a3d4b",
# ...
# },
# "file_symlinks": {
# "/opt/pbis/lib/libldap-2.4.so.2": "a417e1e86cec9735a76059b72d8e1cbf",
# "/opt/pbis/lib/liblsaclient_ntlm.so.0": "9601bfc4827f0b9cff50cc591b2b6f11",
# ...
# },
# "dir_symlinks": {
# "/usr/tmp": "../var/tmp",
# "/usr/lib/kbd/keymaps/legacy/ppc": "mac",
# ...
# },
# "broken_symlinks": {
# "/usr/lib/firmware/a300_pfp.fw": "qcom/a300_pfp.fw",
# "/usr/lib/firmware/a300_pm4.fw": "qcom/a300_pm4.fw",
# ...
# }
# }
#

import argparse
import sys
import subprocess
import json
import tempfile
import os
from fnmatch import fnmatch
from enum import Enum

class FileType(Enum):
FILE = 0
FILE_SYMLINK = 1
DIR_SYMLINK = 2
BROKEN_SYMLINK = 3

def ignore_file(filename):
ignored_files = [
'/boot/initrd-*',
'/boot/grub/*',
'/boot/vmlinuz-fallback',
'/boot/xen-fallback.gz',
'/etc/chrony.conf',
'/etc/firstboot.d/data/default-storage.conf',
'/etc/firstboot.d/data/iqn.conf',
'/etc/fstab',
'/etc/group*',
'/etc/grub.cfg',
'/etc/gshadow*',
'/etc/hostname',
'/etc/iscsi/initiatorname.iscsi',
'/etc/issue',
'/etc/krb5.conf',
'/etc/lvm/backup/*',
'/etc/mtab',
'/etc/machine-id',
'/etc/passwd*',
'/etc/pki/ca-trust/extracted/java/cacerts',
'/etc/pki/java/cacerts',
'/etc/shadow*',
'/etc/ssh/ssh_host_*_key.pub',
'/etc/ssh/ssh_host_*_key',
'/etc/sysconfig/network',
'/etc/sysconfig/network-scripts/interface-rename-data/*',
'/etc/sysconfig/xencommons',
'/etc/vconsole.conf',
'/etc/xensource-inventory',
'/etc/xensource/boot_time_cpus',
'/etc/xensource/ptoken',
'/etc/xensource/xapi-ssl.pem',
'/opt/xensource/gpg/trustdb.gpg',
]

for i in ignored_files:
if fnmatch(filename, i):
return True

return False

def ssh_cmd(host, cmd):
args = ["ssh", "root@{}".format(host), cmd]

cmdres = subprocess.run(args, capture_output=True, text=True)
if cmdres.returncode:
raise Exception(cmdres.stderr)

return cmdres.stdout

def ssh_get_files(host, file_type, folders):
md5sum = False
readlink = False
folders = " ".join(folders)

match file_type:
case FileType.FILE:
find_type = "-type f"
md5sum = True
case FileType.FILE_SYMLINK:
find_type = "-type l -xtype f"
md5sum = True
case FileType.DIR_SYMLINK:
find_type = "-type l -xtype d"
readlink = True
case FileType.BROKEN_SYMLINK:
find_type = "-xtype l"
readlink = True
case _:
print("Unknown file type: ", file=sys.stderr)
return None

find_cmd = "find {} {}".format(folders, find_type)
if readlink:
find_cmd += " -exec readlink -n {} \\; -exec echo -n ' ' \\; -print"
elif md5sum:
# This will make one call to md5sum with all files passed as parameter
# This is much more efficient than using find '-exec md5sum {}'
find_cmd += " | xargs md5sum"

rawres = ssh_cmd(host, find_cmd)

res = dict()
for line in rawres.splitlines():
entry = line.split()
res[entry[1]] = entry[0]

return res

def get_files(host, folders):
ref_files = dict()

try:
ref_files['file'] = ssh_get_files(host, FileType.FILE, folders)
ref_files['file_symlink'] = ssh_get_files(host, FileType.FILE_SYMLINK, folders)
ref_files['dir_symlink'] = ssh_get_files(host, FileType.DIR_SYMLINK, folders)
ref_files['broken_symlink'] = ssh_get_files(host, FileType.BROKEN_SYMLINK, folders)
except Exception as e:
print(e, file=sys.stderr)
exit(-1)

return ref_files

def sftp_get(host, remote_file, local_file):
opts = '-o "StrictHostKeyChecking no" -o "LogLevel ERROR" -o "UserKnownHostsFile /dev/null"'

args = "sftp {} -b - root@{}".format(opts, host)
input = bytes("get {} {}".format(remote_file, local_file), 'utf-8')
res = subprocess.run(
args,
input=input,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=False
)

if res.returncode:
raise Exception("Failed to get file from host: {}".format(res.returncode))

return res

def remote_diff(host1, host2, filename):
try:
file1 = None
file2 = None

# check remote files are text files
args = ["ssh", "root@{}".format(host1), "file", "-b", filename]
cmd = "file -b {}".format(filename)
file_type = ssh_cmd(host1, cmd)
if not file_type.lower().startswith("ascii"):
print("Binary file. Not showing diff")
return

fd, file1 = tempfile.mkstemp()
os.close(fd)
sftp_get(host1, filename, file1)

fd, file2 = tempfile.mkstemp()
os.close(fd)
sftp_get(host2, filename, file2)

args = ["diff", "-u", file1, file2]
diff_res = subprocess.run(args, capture_output=True, text=True)

match diff_res.returncode:
case 1:
print(diff_res.stdout)
case 2:
raise Exception(diff_res.stderr)
case _:
pass

except Exception as e:
print(e, file=sys.stderr)
finally:
if file1 is not None and os.path.exists(file1):
os.remove(file1)
if file2 is not None and os.path.exists(file2):
os.remove(file2)

def compare_files(ref, test, show_diff):
ref_files = ref['files']
ref_host = ref['host']
test_files = test['files']
test_host = test['host']

for ftype in test_files:
for file in test_files[ftype]:
if ignore_file(file):
continue

if file not in ref_files[ftype]:
print("{} doesn't exist on reference host: {}".format(ftype, file))
continue

if ref_files[ftype][file] != test_files[ftype][file]:
print("{} differs: {}".format(ftype, file))
if show_diff:
remote_diff(ref_host, test_host, file)

ref_files[ftype][file] = None

# Check for files that only exist on the reference host
for ftype in ref_files:
for file, val in ref_files[ftype].items():
if ignore_file(file):
continue

if val is not None:
print("{} doesn't exist on tested host: {}".format(ftype, file))

# Load a previously saved json file containing a the reference files
def load_reference_files(filename):
try:
with open(filename, 'r') as fd:
return json.load(fd)
except Exception as e:
print("Error: {}".format(e), file=sys.stderr)
exit(-1)

# Save files from a reference host in json format
def save_reference_files(files, filename):
try:
with open(filename, 'w') as fd:
json.dump(files, fd, indent=4)
except Exception as e:
print("Error: {}".format(e), file=sys.stderr)
exit(-1)

def main():
ref_files = None
folders = ["/boot", "/etc", "/opt", "/usr"]

parser = argparse.ArgumentParser(description='Spot filesystem differences between 2 XCP-ng hosts')
parser.add_argument('--reference-host', '-r', dest='ref_host',
help='The XCP-ng host used as reference')
parser.add_argument('--test-host', '-t', dest='test_host',
help='The XCP-ng host to be tested after install or upgrade')
parser.add_argument('--save-reference', '-s', dest='save_ref',
help='Save filesystem information of the reference host to a file')
parser.add_argument('--load-reference', '-l', dest='load_ref',
help='Load reference filesystem information from a file')
parser.add_argument('--show-diff', '-d', action='store_true', dest='show_diff',
help='Show diff of text files that differ. A reference host must be supplied with -r')
parser.add_argument('--add-folder', '-f', action='append', dest='folders', default=folders,
help='Add folders to the default searched folders (/boot, /etc, /opt, and /usr). '
'Can be specified multiple times')
args = parser.parse_args(sys.argv[1:])

if args.ref_host is None and args.show_diff:
print("Missing parameters. -d must be used with -r. Try --help", file=sys.stderr)
return -1

if args.load_ref:
print("Get reference files from {}".format(args.load_ref))
ref_files = load_reference_files(args.load_ref)
elif args.ref_host:
print("Get reference files from {}".format(args.ref_host))
ref_files = get_files(args.ref_host, args.folders)

if args.save_ref:
print("Saving reference files to {}".format(args.save_ref))
save_reference_files(ref_files, args.save_ref)

if ref_files is None or args.test_host is None:
print("\nMissing parameters. Try --help", file=sys.stderr)
return -1

print("Get test host files from {}".format(args.test_host))
test_files = get_files(args.test_host, args.folders)

ref = dict([('files', ref_files), ('host', args.ref_host)])
test = dict([('files', test_files), ('host', args.test_host)])

print("\nResults:")
compare_files(ref, test, args.show_diff)

return 0

if __name__ == '__main__':
exit(main())
Loading