diff --git a/perceval/backends/core/git.py b/perceval/backends/core/git.py index 284406e0c..e094aeac6 100644 --- a/perceval/backends/core/git.py +++ b/perceval/backends/core/git.py @@ -83,7 +83,7 @@ def __init__(self, uri, gitpath, tag=None, archive=None, ssl_verify=True): self.gitpath = gitpath def fetch(self, category=CATEGORY_COMMIT, from_date=DEFAULT_DATETIME, to_date=DEFAULT_LAST_DATETIME, - branches=None, latest_items=False, no_update=False): + branches=None, latest_items=False, recovery_commit=None, no_update=False): """Fetch commits. The method retrieves from a Git repository or a log file @@ -118,6 +118,7 @@ def fetch(self, category=CATEGORY_COMMIT, from_date=DEFAULT_DATETIME, to_date=DE :param branches: names of branches to fetch from (default: None) :param latest_items: sync with the repository to fetch only the newest commits + :param recovery_commit: recover from this commit no updating the repo :param no_update: if enabled, don't update the repo with the latest changes :returns: a generator of commits @@ -132,6 +133,7 @@ def fetch(self, category=CATEGORY_COMMIT, from_date=DEFAULT_DATETIME, to_date=DE 'to_date': to_date, 'branches': branches, 'latest_items': latest_items, + 'recovery_commit': recovery_commit, 'no_update': no_update } items = super().fetch(category, **kwargs) @@ -151,11 +153,14 @@ def fetch_items(self, category, **kwargs): branches = kwargs['branches'] latest_items = kwargs['latest_items'] no_update = kwargs['no_update'] + recovery_commit = kwargs['recovery_commit'] ncommits = 0 try: - if os.path.isfile(self.gitpath): + if recovery_commit: + commits = self._recovery(recovery_commit, from_date, to_date, branches) + elif os.path.isfile(self.gitpath): commits = self._fetch_from_log() else: commits = self._fetch_from_repo(from_date, to_date, branches, @@ -321,6 +326,50 @@ def _fetch_newest_commits_from_repo(self, repo): gitshow = repo.show(hashes) return self.parse_git_log_from_iter(gitshow) + def _recovery(self, from_commit, from_date, to_date, branches): + """Recover the Perceval execution from a specific commit""" + + # Recover from log file, packfiles or first execution without packfiles + if os.path.isfile(self.gitpath): + commits = self._fetch_from_log() + else: + repo = self._create_git_repository() + packs = repo.packs_by_date() + if packs: + hashes = repo.get_commits_from_packs(packs, from_commit) + gitshow = repo.show(hashes) + commits = self.parse_git_log_from_iter(gitshow) + else: + commits = self._fetch_from_repo(from_date=from_date, to_date=to_date, + branches=branches, no_update=True) + + # Only commits after from_commit + found = False + for commit in commits: + if found: + yield commit + elif commit['commit'] == from_commit: + found = True + yield commit + + if not found: + return [] + + def _recovery_from_repo(self, from_commit, from_date, to_date, branches): + """Fetch commits from repo from a specific commit""" + + commits = self._fetch_from_repo(from_date=from_date, to_date=to_date, + branches=branches, no_update=True) + found = False + for commit in commits: + if found: + yield commit + elif commit['data']['commit'] == from_commit: + found = True + yield commit + + return [] + def _create_git_repository(self): if not os.path.exists(self.gitpath): repo = GitRepository.clone(self.uri, self.gitpath, self.ssl_verify) @@ -380,6 +429,8 @@ def setup_cmd_parser(cls): exgroup_fetch.add_argument('--latest-items', dest='latest_items', action='store_true', help="Fetch latest commits added to the repository") + exgroup_fetch.add_argument('--recovery', dest='recovery_commit', + help="Recover the last execution from a commit") exgroup_fetch.add_argument('--no-update', dest='no_update', action='store_true', help="Fetch all commits without updating the repository") @@ -1112,6 +1163,35 @@ def show(self, commits=None, encoding='utf-8'): logger.debug("Git show fetched from %s repository (%s)", self.uri, self.dirpath) + def get_commits_from_packs(self, packs, from_commit): + """Get commits from a specific one using fetched packfiles""" + + hashes = [] + + for pack in packs: + commits = self._read_commits_from_pack(pack) + for commit in commits: + if hashes: + hashes.append(commit) + elif from_commit == commit: + hashes.append(commit) + + return hashes + + def packs_by_date(self): + """Get all packs ordered by date""" + + packs_dir = os.path.join(self.dirpath, 'objects/pack/') + + files = os.listdir(packs_dir) + # Sort by date, from older to newer + files.sort(key=lambda x: os.path.getmtime(os.path.join(packs_dir, x))) + packs = [f.split('.')[0].split('-')[1] + for f in files + if f.endswith('.idx')] + + return packs + def _fetch_pack(self): """Fetch changes and store them in a pack.""" diff --git a/tests/test_git.py b/tests/test_git.py index b783ac29f..4dab49d20 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -706,6 +706,95 @@ def test_fetch_from_file(self): self.assertEqual(commit['category'], 'commit') self.assertEqual(commit['tag'], 'http://example.com.git') + def test_fetch_recovery(self): + """Test whether recovery from a commits works""" + + origin_path = os.path.join(self.tmp_repo_path, 'gittest') + editable_path = os.path.join(self.tmp_path, 'editgit') + new_path = os.path.join(self.tmp_path, 'newgit') + new_file = os.path.join(editable_path, 'newfile') + + shutil.copytree(origin_path, editable_path) + + git = Git(editable_path, new_path) + commits = [commit for commit in git.fetch()] + + # Count the number of commits before adding some new + expected = [('bc57a9209f096a130dcc5ba7089a8663f758a703', 1344965413.0), + ('87783129c3f00d2c81a3a8e585eb86a47e39891a', 1344965535.0), + ('7debcf8a2f57f86663809c58b5c07a398be7674c', 1344965607.0), + ('c0d66f92a95e31c77be08dc9d0f11a16715d1885', 1344965702.0), + ('c6ba8f7a1058db3e6b4bc6f1090e932b107605fb', 1344966351.0), + ('589bb080f059834829a2a5955bebfd7c2baa110a', 1344967441.0), + ('ce8e0b86a1e9877f42fe9453ede418519115f367', 1392185269.0), + ('51a3b654f252210572297f47597b31527c475fb8', 1392185366.0), + ('456a68ee1407a77f3e804a30dff245bb6c6b872f', 1392185439.0)] + + self.assertEqual(len(commits), len(expected)) + + for x in range(len(commits)): + expected_uuid = uuid(editable_path, expected[x][0]) + commit = commits[x] + self.assertEqual(commit['uuid'], expected_uuid) + self.assertEqual(commit['data']['commit'], expected[x][0]) + + # Check we can recover from a commit + from_commit = 'c6ba8f7a1058db3e6b4bc6f1090e932b107605fb' + commits_recovery = [commit for commit in git.fetch(recovery_commit=from_commit)] + + expected_recovery = expected[4:] + + self.assertEqual(len(commits_recovery), len(expected_recovery)) + + for x in range(len(commits_recovery)): + expected_uuid = uuid(editable_path, expected_recovery[x][0]) + commit = commits_recovery[x] + self.assertEqual(commit['uuid'], expected_uuid) + self.assertEqual(commit['data']['commit'], expected_recovery[x][0]) + + # Create some new commits + cmd = ['git', 'checkout', '-b', 'mybranch'] + subprocess.check_output(cmd, stderr=subprocess.STDOUT, + cwd=editable_path, env={'LANG': 'C'}) + + with open(new_file, 'w') as f: + f.write("Testing sync method") + + cmd = ['git', 'add', new_file] + subprocess.check_output(cmd, stderr=subprocess.STDOUT, + cwd=editable_path, env={'LANG': 'C'}) + + cmd = ['git', '-c', 'user.name="mock"', + '-c', 'user.email="mock@example.com"', + 'commit', '-m', 'Testing sync'] + subprocess.check_output(cmd, stderr=subprocess.STDOUT, + cwd=editable_path, env={'LANG': 'C'}) + + cmd = ['git', 'rm', new_file] + subprocess.check_output(cmd, stderr=subprocess.STDOUT, + cwd=editable_path, env={'LANG': 'C'}) + + cmd = ['git', '-c', 'user.name="mock"', + '-c', 'user.email="mock@example.com"', + 'commit', '-m', 'Removing testing file for sync'] + subprocess.check_output(cmd, stderr=subprocess.STDOUT, + cwd=editable_path, env={'LANG': 'C'}) + + # Two new commits should have been fetched + commits = [commit for commit in git.fetch(latest_items=True)] + self.assertEqual(len(commits), 2) + + # Check if we can recover from the last packfile + from_commit = commits[0]['data']['commit'] + commits_recovery = [commit for commit in git.fetch(recovery_commit=from_commit)] + self.assertEqual(len(commits_recovery), 2) + self.assertEqual(commits_recovery[0]['uuid'], commits[0]['uuid']) + self.assertEqual(commits_recovery[1]['uuid'], commits[1]['uuid']) + + # Cleanup + shutil.rmtree(editable_path) + shutil.rmtree(new_path) + def test_git_parser(self): """Test if the static method parses a git log file""" @@ -873,6 +962,18 @@ def test_setup_cmd_parser(self): self.assertEqual(parsed_args.uri, 'http://example.com/') self.assertFalse(parsed_args.ssl_verify) + args = ['http://example.com/', + '--git-path', '/tmp/gitpath', + '--recovery', 'foocommit'] + + parsed_args = parser.parse(*args) + self.assertEqual(parsed_args.git_path, '/tmp/gitpath') + self.assertEqual(parsed_args.uri, 'http://example.com/') + self.assertEqual(parsed_args.recovery_commit, 'foocommit') + self.assertFalse(parsed_args.no_update) + self.assertFalse(parsed_args.latest_items) + self.assertTrue(parsed_args.ssl_verify) + def test_mutual_exclusive_update(self): """Test whether an exception is thrown when no-update and latest-items flags are set""" @@ -885,6 +986,18 @@ def test_mutual_exclusive_update(self): with self.assertRaises(SystemExit): _ = parser.parse(*args) + def test_mutual_exclusive_recovery(self): + """Test whether an exception is thrown when recovery and latest-items flags are set""" + + parser = GitCommand.setup_cmd_parser() + args = ['http://example.com/', + '--git-path', '/tmp/gitpath', + '--recovery', 'foocommit', + '--latest-items'] + + with self.assertRaises(SystemExit): + _ = parser.parse(*args) + class TestGitParser(TestCaseGit): """Git parser tests"""