Skip to content

Commit

Permalink
proof of concept: split history cache generation into chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
Vladimir Kotal committed Apr 16, 2021
1 parent 5ccae3f commit 61a50bc
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ private void doFileHistory(String filename, List<HistoryEntry> historyEntries,

storeFile(hist, file, repository, !renamed);

// TODO: readjust for per partes history indexing
statRepoHist.report(LOGGER, Level.FINER,
String.format("Done storing history cache for '%s'", filename),
"filehistorycache.history.store");
Expand Down Expand Up @@ -437,14 +438,6 @@ public void store(History history, Repository repository)
return;
}

LOGGER.log(Level.FINE,
"Storing history for repository {0}",
new Object[] {repository.getDirectoryName()});

// Firstly store the history for the top-level directory.
doFileHistory(repository.getDirectoryName(), history.getHistoryEntries(),
repository, env.getSourceRootFile(), null, false);

HashMap<String, List<HistoryEntry>> map = new HashMap<>();
HashMap<String, Boolean> acceptanceCache = new HashMap<>();

Expand Down Expand Up @@ -513,7 +506,8 @@ public void store(History history, Repository repository)
fileHistoryCount++;
}

LOGGER.log(Level.FINE, "Stored history for {0} files", fileHistoryCount);
LOGGER.log(Level.FINE, "Stored history for {0} files in repository ''{1}''",
new Object[]{fileHistoryCount, repository.getDirectoryName()});

if (!handleRenamedFiles) {
finishStore(repository, latestRev);
Expand Down Expand Up @@ -735,7 +729,8 @@ private String getRepositoryCachedRevPath(Repository repository) {
* @param repository repository
* @param rev latest revision which has been just indexed
*/
private void storeLatestCachedRevision(Repository repository, String rev) {
@Override
public void storeLatestCachedRevision(Repository repository, String rev) {
Writer writer = null;

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.ArrayList;
Expand Down Expand Up @@ -86,6 +87,7 @@
import org.opengrok.indexer.util.Executor;
import org.opengrok.indexer.util.ForbiddenSymlinkException;
import org.opengrok.indexer.util.LazilyInstantiate;
import org.opengrok.indexer.util.Statistics;
import org.opengrok.indexer.util.Version;

import static org.opengrok.indexer.history.HistoryEntry.TAGS_SEPARATOR;
Expand All @@ -94,7 +96,7 @@
* Access to a Git repository.
*
*/
public class GitRepository extends Repository {
public class GitRepository extends RepositoryPerPartesHistory {

private static final Logger LOGGER = LoggerFactory.getLogger(GitRepository.class);

Expand Down Expand Up @@ -524,6 +526,51 @@ History getHistory(File file) throws HistoryException {

@Override
History getHistory(File file, String sinceRevision) throws HistoryException {
return getHistory(file, sinceRevision, null);
}

// TODO: add test
public List<String> getBoundaryChangesetIDs(String sinceRevision) throws HistoryException {
List<String> result = new ArrayList<>();
final int maxCount = 1024; // TODO

Statistics stat = new Statistics();
try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName());
RevWalk walk = new RevWalk(repository)) {

if (sinceRevision != null) {
walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision)));
}
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));

int cnt = 0;
String lastId = null;
for (RevCommit commit : walk) {
if (cnt != 0 && cnt % maxCount == 0) {
// Do not abbreviate the Id as this could cause AmbiguousObjectException in getHistory().
lastId = commit.getId().name();
result.add(lastId);
}
cnt++;
}
} catch (IOException e) {
throw new HistoryException(e);
}

// The changesets need to go from oldest to newest.
Collections.reverse(result);

// Add null to finish the last step in Repository#createCache().
result.add(null);

stat.report(LOGGER, Level.FINE,
String.format("done getting boundary changesets for ''%s'' (%d entries)",
getDirectoryName(), result.size()));

return result;
}

public History getHistory(File file, String sinceRevision, String tillRevision) throws HistoryException {
final List<HistoryEntry> entries = new ArrayList<>();
final List<String> renamedFiles = new ArrayList<>();

Expand All @@ -533,7 +580,12 @@ History getHistory(File file, String sinceRevision) throws HistoryException {
if (sinceRevision != null) {
walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision)));
}
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));

if (tillRevision != null) {
walk.markStart(walk.lookupCommit(repository.resolve(tillRevision)));
} else {
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
}

String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file);
if (!getDirectoryNameRelative().equals(relativePath)) {
Expand Down Expand Up @@ -761,6 +813,14 @@ String determineBranch(CommandTimeoutType cmdType) throws IOException {
}
}

// TODO: add test for this
public String determineCurrentVersionId() throws IOException {
try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
Ref head = repository.exactRef(Constants.HEAD);
return getCommit(repository, head).getId().abbreviate(GIT_ABBREV_LEN).name();
}
}

@Override
public String determineCurrentVersion(CommandTimeoutType cmdType) throws IOException {
try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ boolean hasCacheForDirectory(File directory, Repository repository)
String getLatestCachedRevision(Repository repository)
throws HistoryException;

// TODO
void storeLatestCachedRevision(Repository repository, String version);

/**
* Get the last modified times for all files and subdirectories in the
* specified directory.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.opengrok.indexer.util.Executor;

import org.jetbrains.annotations.NotNull;
import org.opengrok.indexer.util.Statistics;

/**
* An interface for an external repository.
Expand Down Expand Up @@ -141,8 +142,7 @@ public String getRepoCommand() {
* @return partial history for file
* @throws HistoryException on error accessing the history
*/
History getHistory(File file, String sinceRevision)
throws HistoryException {
History getHistory(File file, String sinceRevision) throws HistoryException {

// If we want an incremental history update and get here, warn that
// it may be slow.
Expand Down Expand Up @@ -361,17 +361,16 @@ protected String getRevisionForAnnotate(String history_revision) {
*
* @throws HistoryException on error
*/
final void createCache(HistoryCache cache, String sinceRevision)
throws HistoryException {
final void createCache(HistoryCache cache, String sinceRevision) throws HistoryException {

if (!isWorking()) {
return;
}

// If we don't have a directory parser, we can't create the cache
// this way. Just give up and return.
if (!hasHistoryForDirectories()) {
LOGGER.log(
Level.INFO,
LOGGER.log(Level.INFO,
"Skipping creation of history cache for {0}, since retrieval "
+ "of history for directories is not implemented for this "
+ "repository type.", getDirectoryName());
Expand All @@ -381,31 +380,45 @@ final void createCache(HistoryCache cache, String sinceRevision)
File directory = new File(getDirectoryName());

History history;
try {
if (!(this instanceof RepositoryPerPartesHistory)) {
history = getHistory(directory, sinceRevision);
} catch (HistoryException he) {
if (sinceRevision == null) {
// Failed to get full history, so fail.
throw he;
finishCreateCache(cache, history);
return;
}

// To avoid storing complete History memory, split the work into multiple chunks.
RepositoryPerPartesHistory repo = (RepositoryPerPartesHistory) this;
List<String> boundaryChangesets = repo.getBoundaryChangesetIDs(sinceRevision);
int cnt = 0;
for (String tillRevision: boundaryChangesets) {
Statistics stat = new Statistics();
history = repo.getHistory(directory, sinceRevision, tillRevision);
if (history.getHistoryEntries().size() == 0) {
// TODO
break;
}
// Failed to get partial history. This may have been caused
// by changes in the revision numbers since the last update
// (bug #14724) so we'll try to regenerate the cache from
// scratch instead.
LOGGER.log(Level.WARNING,
"Failed to get partial history. Attempting to "
+ "recreate the history cache from scratch.", he);
history = null;

finishCreateCache(cache, history);
sinceRevision = tillRevision;

stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''",
++cnt, boundaryChangesets.size(), this.getDirectoryName()));
}

if (sinceRevision != null && history == null) {
// Failed to get partial history, now get full history instead.
history = getHistory(directory);
// Got full history successfully. Clear the history cache so that
// we can recreate it from scratch.
cache.clear(this);
/*
* Need to reset the latest cachedRevision as the last finishCreateCache() above
* wrote the changeset ID of the last part.
* TODO: probably not necessary now ?
*/
try {
// TODO: does not work well if finishStore() failed
cache.storeLatestCachedRevision(this, repo.determineCurrentVersionId());
} catch (IOException e) {
throw new HistoryException(e);
}
}

private void finishCreateCache(HistoryCache cache, History history) throws HistoryException {
// We need to refresh list of tags for incremental reindex.
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
if (env.isTagsEnabled() && this.hasFileBasedTags()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* See LICENSE.txt included in this distribution for the specific
* language governing permissions and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at LICENSE.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
*/
package org.opengrok.indexer.history;

import java.io.File;
import java.io.IOException;
import java.util.List;

public abstract class RepositoryPerPartesHistory extends Repository {
private static final long serialVersionUID = -3433255821312805064L;

abstract History getHistory(File directory, String sinceRevision, String tillRevision) throws HistoryException;

abstract List<String> getBoundaryChangesetIDs(String sinceRevision) throws HistoryException;

abstract String determineCurrentVersionId() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.GIT;
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.MERCURIAL;
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.SCCS;
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.SUBVERSION;
Expand Down Expand Up @@ -247,6 +248,24 @@ public void testStoreAndGetIncrementalTags() throws Exception {
retrievedUpdatedHistoryMainC.getHistoryEntries(), false);
}

/**
* TODO
* move this to RepositoryTest ?
*/
@Test
@EnabledOnOs({OS.LINUX, OS.MAC, OS.SOLARIS, OS.AIX, OS.OTHER})
@EnabledForRepository(GIT)
public void testIncrementalStore() throws Exception {
File reposRoot = new File(repositories.getSourceRoot(), "git");

Repository repo = RepositoryFactory.getRepository(reposRoot);
History historyToStore = repo.getHistory(reposRoot);

repo.createCache(cache, null);

// TODO
}

/**
* Basic tests for the {@code store()} and {@code get()} methods.
*/
Expand Down

0 comments on commit 61a50bc

Please sign in to comment.