Skip to content

Commit

Permalink
[git] Add recovery functionality to fetch method
Browse files Browse the repository at this point in the history
This commit implements the recovery functionality in the fetch method
of the Git backend, enabling the recovery of Perceval execution from
a specific commit. Now, users can specify a recovery commit to start
fetching commits from that point, ensuring robustness against failures.

Signed-off-by: Jose Javier Merchante <[email protected]>
  • Loading branch information
jjmerchante committed Mar 1, 2024
1 parent 1a3b337 commit 6bb1f96
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 2 deletions.
84 changes: 82 additions & 2 deletions perceval/backends/core/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def __init__(self, uri, gitpath, tag=None, archive=None, ssl_verify=True):
self.gitpath = gitpath

def fetch(self, category=CATEGORY_COMMIT, from_date=DEFAULT_DATETIME, to_date=DEFAULT_LAST_DATETIME,
branches=None, latest_items=False, no_update=False):
branches=None, latest_items=False, recovery_commit=None, no_update=False):
"""Fetch commits.
The method retrieves from a Git repository or a log file
Expand Down Expand Up @@ -118,6 +118,7 @@ def fetch(self, category=CATEGORY_COMMIT, from_date=DEFAULT_DATETIME, to_date=DE
:param branches: names of branches to fetch from (default: None)
:param latest_items: sync with the repository to fetch only the
newest commits
:param recovery_commit: recover from this commit no updating the repo
:param no_update: if enabled, don't update the repo with the latest changes
:returns: a generator of commits
Expand All @@ -132,6 +133,7 @@ def fetch(self, category=CATEGORY_COMMIT, from_date=DEFAULT_DATETIME, to_date=DE
'to_date': to_date,
'branches': branches,
'latest_items': latest_items,
'recovery_commit': recovery_commit,
'no_update': no_update
}
items = super().fetch(category, **kwargs)
Expand All @@ -151,11 +153,14 @@ def fetch_items(self, category, **kwargs):
branches = kwargs['branches']
latest_items = kwargs['latest_items']
no_update = kwargs['no_update']
recovery_commit = kwargs['recovery_commit']

ncommits = 0

try:
if os.path.isfile(self.gitpath):
if recovery_commit:
commits = self._recovery(recovery_commit, from_date, to_date, branches)
elif os.path.isfile(self.gitpath):
commits = self._fetch_from_log()
else:
commits = self._fetch_from_repo(from_date, to_date, branches,
Expand Down Expand Up @@ -321,6 +326,50 @@ def _fetch_newest_commits_from_repo(self, repo):
gitshow = repo.show(hashes)
return self.parse_git_log_from_iter(gitshow)

def _recovery(self, from_commit, from_date, to_date, branches):
"""Recover the Perceval execution from a specific commit"""

# Recover from log file, packfiles or first execution without packfiles
if os.path.isfile(self.gitpath):
commits = self._fetch_from_log()
else:
repo = self._create_git_repository()
packs = repo.packs_by_date()
if packs:
hashes = repo.get_commits_from_packs(packs, from_commit)
gitshow = repo.show(hashes)
commits = self.parse_git_log_from_iter(gitshow)
else:
commits = self._fetch_from_repo(from_date=from_date, to_date=to_date,
branches=branches, no_update=True)

# Only commits after from_commit
found = False
for commit in commits:
if found:
yield commit
elif commit['commit'] == from_commit:
found = True
yield commit

if not found:
return []

def _recovery_from_repo(self, from_commit, from_date, to_date, branches):
"""Fetch commits from repo from a specific commit"""

commits = self._fetch_from_repo(from_date=from_date, to_date=to_date,
branches=branches, no_update=True)
found = False
for commit in commits:
if found:
yield commit
elif commit['data']['commit'] == from_commit:
found = True
yield commit

return []

def _create_git_repository(self):
if not os.path.exists(self.gitpath):
repo = GitRepository.clone(self.uri, self.gitpath, self.ssl_verify)
Expand Down Expand Up @@ -380,6 +429,8 @@ def setup_cmd_parser(cls):
exgroup_fetch.add_argument('--latest-items', dest='latest_items',
action='store_true',
help="Fetch latest commits added to the repository")
exgroup_fetch.add_argument('--recovery', dest='recovery_commit',
help="Recover the last execution from a commit")
exgroup_fetch.add_argument('--no-update', dest='no_update',
action='store_true',
help="Fetch all commits without updating the repository")
Expand Down Expand Up @@ -1112,6 +1163,35 @@ def show(self, commits=None, encoding='utf-8'):
logger.debug("Git show fetched from %s repository (%s)",
self.uri, self.dirpath)

def get_commits_from_packs(self, packs, from_commit):
"""Get commits from a specific one using fetched packfiles"""

hashes = []

for pack in packs:
commits = self._read_commits_from_pack(pack)
for commit in commits:
if hashes:
hashes.append(commit)
elif from_commit == commit:
hashes.append(commit)

return hashes

def packs_by_date(self):
"""Get all packs ordered by date"""

packs_dir = os.path.join(self.dirpath, 'objects/pack/')

files = os.listdir(packs_dir)
# Sort by date, from older to newer
files.sort(key=lambda x: os.path.getmtime(os.path.join(packs_dir, x)))
packs = [f.split('.')[0].split('-')[1]
for f in files
if f.endswith('.idx')]

return packs

def _fetch_pack(self):
"""Fetch changes and store them in a pack."""

Expand Down
113 changes: 113 additions & 0 deletions tests/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,95 @@ def test_fetch_from_file(self):
self.assertEqual(commit['category'], 'commit')
self.assertEqual(commit['tag'], 'http://example.com.git')

def test_fetch_recovery(self):
"""Test whether recovery from a commits works"""

origin_path = os.path.join(self.tmp_repo_path, 'gittest')
editable_path = os.path.join(self.tmp_path, 'editgit')
new_path = os.path.join(self.tmp_path, 'newgit')
new_file = os.path.join(editable_path, 'newfile')

shutil.copytree(origin_path, editable_path)

git = Git(editable_path, new_path)
commits = [commit for commit in git.fetch()]

# Count the number of commits before adding some new
expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0),
('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0),
('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0),
('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0),
('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0),
('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0),
('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0),
('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0),
('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)]

self.assertEqual(len(commits), len(expected))

for x in range(len(commits)):
expected_uuid = uuid(editable_path, expected[x][0])
commit = commits[x]
self.assertEqual(commit['uuid'], expected_uuid)
self.assertEqual(commit['data']['commit'], expected[x][0])

# Check we can recover from a commit
from_commit = 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb'
commits_recovery = [commit for commit in git.fetch(recovery_commit=from_commit)]

expected_recovery = expected[4:]

self.assertEqual(len(commits_recovery), len(expected_recovery))

for x in range(len(commits_recovery)):
expected_uuid = uuid(editable_path, expected_recovery[x][0])
commit = commits_recovery[x]
self.assertEqual(commit['uuid'], expected_uuid)
self.assertEqual(commit['data']['commit'], expected_recovery[x][0])

# Create some new commits
cmd = ['git', 'checkout', '-b', 'mybranch']
subprocess.check_output(cmd, stderr=subprocess.STDOUT,
cwd=editable_path, env={'LANG': 'C'})

with open(new_file, 'w') as f:
f.write("Testing sync method")

cmd = ['git', 'add', new_file]
subprocess.check_output(cmd, stderr=subprocess.STDOUT,
cwd=editable_path, env={'LANG': 'C'})

cmd = ['git', '-c', 'user.name="mock"',
'-c', 'user.email="[email protected]"',
'commit', '-m', 'Testing sync']
subprocess.check_output(cmd, stderr=subprocess.STDOUT,
cwd=editable_path, env={'LANG': 'C'})

cmd = ['git', 'rm', new_file]
subprocess.check_output(cmd, stderr=subprocess.STDOUT,
cwd=editable_path, env={'LANG': 'C'})

cmd = ['git', '-c', 'user.name="mock"',
'-c', 'user.email="[email protected]"',
'commit', '-m', 'Removing testing file for sync']
subprocess.check_output(cmd, stderr=subprocess.STDOUT,
cwd=editable_path, env={'LANG': 'C'})

# Two new commits should have been fetched
commits = [commit for commit in git.fetch(latest_items=True)]
self.assertEqual(len(commits), 2)

# Check if we can recover from the last packfile
from_commit = commits[0]['data']['commit']
commits_recovery = [commit for commit in git.fetch(recovery_commit=from_commit)]
self.assertEqual(len(commits_recovery), 2)
self.assertEqual(commits_recovery[0]['uuid'], commits[0]['uuid'])
self.assertEqual(commits_recovery[1]['uuid'], commits[1]['uuid'])

# Cleanup
shutil.rmtree(editable_path)
shutil.rmtree(new_path)

def test_git_parser(self):
"""Test if the static method parses a git log file"""

Expand Down Expand Up @@ -873,6 +962,18 @@ def test_setup_cmd_parser(self):
self.assertEqual(parsed_args.uri, 'http://example.com/')
self.assertFalse(parsed_args.ssl_verify)

args = ['http://example.com/',
'--git-path', '/tmp/gitpath',
'--recovery', 'foocommit']

parsed_args = parser.parse(*args)
self.assertEqual(parsed_args.git_path, '/tmp/gitpath')
self.assertEqual(parsed_args.uri, 'http://example.com/')
self.assertEqual(parsed_args.recovery_commit, 'foocommit')
self.assertFalse(parsed_args.no_update)
self.assertFalse(parsed_args.latest_items)
self.assertTrue(parsed_args.ssl_verify)

def test_mutual_exclusive_update(self):
"""Test whether an exception is thrown when no-update and latest-items flags are set"""

Expand All @@ -885,6 +986,18 @@ def test_mutual_exclusive_update(self):
with self.assertRaises(SystemExit):
_ = parser.parse(*args)

def test_mutual_exclusive_recovery(self):
"""Test whether an exception is thrown when recovery and latest-items flags are set"""

parser = GitCommand.setup_cmd_parser()
args = ['http://example.com/',
'--git-path', '/tmp/gitpath',
'--recovery', 'foocommit',
'--latest-items']

with self.assertRaises(SystemExit):
_ = parser.parse(*args)


class TestGitParser(TestCaseGit):
"""Git parser tests"""
Expand Down

0 comments on commit 6bb1f96

Please sign in to comment.