Skip to content
This repository has been archived by the owner on Sep 26, 2023. It is now read-only.

Commit

Permalink
Load deps in batches (#216)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrismwendt authored Nov 10, 2021
1 parent 3c45b7a commit d30db6f
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 112 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Stats:
Packages traversed: 40
```

If lsif-go is using too much memory, try setting `--dep-batch-size=100` to only load 100 dependencies into memory at once (~1GB overhead). Lowering the batch size will decrease the overhead further, but increase the runtime a lot more because loading a batch has a fixed cost of ~500ms and each additional package loaded within a batch only adds ~10ms.

Use `lsif-go --help` for more information.

## Updating your index
Expand Down
7 changes: 3 additions & 4 deletions cmd/lsif-go/args.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ var (
verbosity int
noOutput bool
noAnimation bool
skipDeps bool
depBatchSize int
)

func init() {
Expand All @@ -45,13 +45,12 @@ func init() {
app.Flag("repository-remote", "Specifies the canonical name of the repository remote.").Default(defaultRepositoryRemote.Value()).StringVar(&repositoryRemote)
app.Flag("module-version", "Specifies the version of the module defined by module-root.").Default(defaultModuleVersion.Value()).StringVar(&moduleVersion)

// Feature options
app.Flag("skip-deps", "Do not load depedencies - reduces memory usage but omits interface implementation data from deps.").Default("true").BoolVar(&skipDeps)

// Verbosity options
app.Flag("quiet", "Do not output to stdout or stderr.").Short('q').Default("false").BoolVar(&noOutput)
app.Flag("verbose", "Output debug logs.").Short('v').CounterVar(&verbosity)
app.Flag("no-animation", "Do not animate output.").Default("false").BoolVar(&noAnimation)

app.Flag("dep-batch-size", "How many dependencies to load at once to limit memory usage (e.g. 100). 0 means load all at once.").Default("0").IntVar(&depBatchSize)
}

func parseArgs(args []string) (err error) {
Expand Down
4 changes: 2 additions & 2 deletions cmd/lsif-go/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"github.com/sourcegraph/sourcegraph/lib/codeintel/lsif/protocol/writer"
)

func writeIndex(repositoryRoot, repositoryRemote, projectRoot, moduleName, moduleVersion string, dependencies map[string]gomod.GoModule, projectDependencies []string, outFile string, outputOptions output.Options, skipDeps bool) error {
func writeIndex(repositoryRoot, repositoryRemote, projectRoot, moduleName, moduleVersion string, dependencies map[string]gomod.GoModule, projectDependencies []string, outFile string, outputOptions output.Options, depBatchSize int) error {
start := time.Now()

out, err := os.Create(outFile)
Expand Down Expand Up @@ -44,7 +44,7 @@ func writeIndex(repositoryRoot, repositoryRemote, projectRoot, moduleName, modul
writer.NewJSONWriter(out),
packageDataCache,
outputOptions,
skipDeps,
depBatchSize,
)

if err := indexer.Index(); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion cmd/lsif-go/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func mainErr() (err error) {
projectDependencies,
outFile,
outputOptions,
skipDeps,
depBatchSize,
); err != nil {
return fmt.Errorf("failed to index: %v", err)
}
Expand Down
142 changes: 99 additions & 43 deletions internal/indexer/implementation.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package indexer

import (
"go/ast"
"go/types"
"runtime"
"strings"

"github.com/sourcegraph/lsif-go/internal/output"
Expand All @@ -11,16 +11,23 @@ import (
)

type implDef struct {
defInfo *DefinitionInfo
ident *ast.Ident
methods []*types.Selection
methodsByName map[string]*types.Selection
pkg *packages.Package
typeName *types.TypeName
defInfo *DefinitionInfo
identIsExported bool
methods []string
methodsByName map[string]methodInfo
monikerPackage string
monikerIdentifier string
typeNameIsExported bool
typeNameIsAlias bool
}

type methodInfo struct {
definition *DefinitionInfo
monikerIdentifier string
}

func (def implDef) Exported() bool {
return def.typeName.Exported() || def.ident.IsExported()
return def.typeNameIsExported || def.identIsExported
}

type implEdge struct {
Expand Down Expand Up @@ -79,7 +86,7 @@ func (rel implRelation) interfaceIxToNodeIx(idx int) int {
return rel.ifaceOffset + idx
}

func (rel *implRelation) linkInterfaceToReceivers(idx int, interfaceMethods []*types.Selection, methodToReceivers map[string]*intsets.Sparse) {
func (rel *implRelation) linkInterfaceToReceivers(idx int, interfaceMethods []string, methodToReceivers map[string]*intsets.Sparse) {
// Empty interface - skip it.
if len(interfaceMethods) == 0 {
return
Expand All @@ -100,7 +107,7 @@ func (rel *implRelation) linkInterfaceToReceivers(idx int, interfaceMethods []*t

// If it doesn't match on the first method, then we can immediately quit.
// Concrete types must _always_ implement all the methods
if initialReceivers, ok := methodToReceivers[canonicalize(interfaceMethods[0])]; !ok {
if initialReceivers, ok := methodToReceivers[interfaceMethods[0]]; !ok {
return
} else {
candidateTypes.Copy(initialReceivers)
Expand All @@ -109,7 +116,7 @@ func (rel *implRelation) linkInterfaceToReceivers(idx int, interfaceMethods []*t
// Loop over the rest of the methods and find all the types that intersect
// every method of the interface.
for _, method := range interfaceMethods[1:] {
receivers, ok := methodToReceivers[canonicalize(method)]
receivers, ok := methodToReceivers[method]
if !ok {
return
}
Expand All @@ -130,7 +137,9 @@ func (rel *implRelation) linkInterfaceToReceivers(idx int, interfaceMethods []*t
//
// NOTE: if indexImplementations becomes multi-threaded then we would need to update
// Indexer.ensureImplementationMoniker to ensure that it uses appropriate locking.
func (i *Indexer) indexImplementations() {
func (i *Indexer) indexImplementations() error {
var implErr error

output.WithProgress("Indexing implementations", func() {
// When considering the connections we want to draw between the following four categories:
// - LocalInterfaces: Interfaces created in the currently project
Expand All @@ -157,7 +166,11 @@ func (i *Indexer) indexImplementations() {

// =========================
// Local Implementations
localInterfaces, localConcreteTypes := i.extractInterfacesAndConcreteTypes(i.packages)
localInterfaces, localConcreteTypes, err := i.extractInterfacesAndConcreteTypes([]string{"./..."})
if err != nil {
implErr = err
return
}

// LocalConcreteTypes -> LocalInterfaces
localRelation := buildImplementationRelation(localConcreteTypes, localInterfaces)
Expand All @@ -169,7 +182,11 @@ func (i *Indexer) indexImplementations() {

// =========================
// Remote Implementations
remoteInterfaces, remoteConcreteTypes := i.extractInterfacesAndConcreteTypes(i.depPackages)
remoteInterfaces, remoteConcreteTypes, err := i.extractInterfacesAndConcreteTypes(i.projectDependencies)
if err != nil {
implErr = err
return
}

// LocalConcreteTypes -> RemoteInterfaces (exported only)
localTypesToRemoteInterfaces := buildImplementationRelation(localConcreteTypes, filterToExported(remoteInterfaces))
Expand All @@ -180,6 +197,8 @@ func (i *Indexer) indexImplementations() {
localInterfacesToRemoteTypes.forEachImplementation(i.emitRemoteImplementation)

}, i.outputOptions)

return implErr
}

// emitLocalImplementation correlates implementations for both structs/interfaces (refered to as typeDefs) and methods.
Expand Down Expand Up @@ -209,18 +228,17 @@ func (i *Indexer) emitLocalImplementation(from implDef, tos []implDef) {

fromMethodDef := i.forEachMethodImplementation(tos, fromName, fromMethod, func(to implDef, _ *DefinitionInfo) {
toMethod := to.methodsByName[fromName]
toMethodDef := i.getDefinitionInfo(toMethod.Obj(), nil)

// This method is from an embedded type defined in some dependency.
if toMethodDef == nil {
if toMethod.definition == nil {
return
}

toDocument := toMethodDef.DocumentID
toDocument := toMethod.definition.DocumentID
if _, ok := methodDocToInvs[toDocument]; !ok {
methodDocToInvs[toDocument] = []uint64{}
}
methodDocToInvs[toDocument] = append(methodDocToInvs[toDocument], toMethodDef.RangeID)
methodDocToInvs[toDocument] = append(methodDocToInvs[toDocument], toMethod.definition.RangeID)
})

if fromMethodDef == nil {
Expand Down Expand Up @@ -248,13 +266,13 @@ func (i *Indexer) emitRemoteImplementation(from implDef, tos []implDef) {
if from.defInfo == nil {
continue
}
i.emitImplementationMoniker(from.defInfo.ResultSetID, to.pkg, to.typeName)
i.emitImplementationMoniker(from.defInfo.ResultSetID, to.monikerPackage, to.monikerIdentifier)
}

for fromName, fromMethod := range from.methodsByName {
i.forEachMethodImplementation(tos, fromName, fromMethod, func(to implDef, fromDef *DefinitionInfo) {
toMethod := to.methodsByName[fromName]
i.emitImplementationMoniker(fromDef.ResultSetID, to.pkg, toMethod.Obj())
i.emitImplementationMoniker(fromDef.ResultSetID, to.monikerPackage, toMethod.monikerIdentifier)
})
}
}
Expand All @@ -267,13 +285,11 @@ func (i *Indexer) emitRemoteImplementation(from implDef, tos []implDef) {
func (i *Indexer) forEachMethodImplementation(
tos []implDef,
fromName string,
fromMethod *types.Selection,
fromMethod methodInfo,
callback func(to implDef, fromDef *DefinitionInfo),
) *DefinitionInfo {
fromMethodDef := i.getDefinitionInfo(fromMethod.Obj(), nil)

// This method is from an embedded type defined in some dependency.
if fromMethodDef == nil {
if fromMethod.definition == nil {
return nil
}

Expand All @@ -282,27 +298,27 @@ func (i *Indexer) forEachMethodImplementation(
// methods to be considered an implementation.
for _, to := range tos {
if _, ok := to.methodsByName[fromName]; !ok {
return fromMethodDef
return fromMethod.definition
}
}

for _, to := range tos {
// Skip aliases because their methods are redundant with
// the underlying concrete type's methods.
if to.typeName.IsAlias() {
if to.typeNameIsAlias {
continue
}

callback(to, fromMethodDef)
callback(to, fromMethod.definition)
}

return fromMethodDef
return fromMethod.definition
}

// extractInterfacesAndConcreteTypes constructs a list of interfaces and
// concrete types from the list of given packages.
func (i *Indexer) extractInterfacesAndConcreteTypes(pkgs []*packages.Package) (interfaces []implDef, concreteTypes []implDef) {
for _, pkg := range pkgs {
func (i *Indexer) extractInterfacesAndConcreteTypes(pkgNames []string) (interfaces []implDef, concreteTypes []implDef, err error) {
visit := func(pkg *packages.Package) {
for ident, obj := range pkg.TypesInfo.Defs {
if obj == nil {
continue
Expand All @@ -321,24 +337,36 @@ func (i *Indexer) extractInterfacesAndConcreteTypes(pkgs []*packages.Package) (i

methods := listMethods(obj.Type().(*types.Named))

canonicalizedMethods := []string{}
for _, m := range methods {
canonicalizedMethods = append(canonicalizedMethods, canonicalize(m))
}

// ignore interfaces that are empty. they are too
// plentiful and don't provide useful intelligence.
if len(methods) == 0 {
continue
}

methodsByName := map[string]*types.Selection{}
methodsByName := map[string]methodInfo{}
for _, m := range methods {
methodsByName[m.Obj().Name()] = m
methodsByName[m.Obj().Name()] = methodInfo{
definition: i.getDefinitionInfo(m.Obj(), nil),
monikerIdentifier: joinMonikerParts(makeMonikerPackage(m.Obj()), makeMonikerIdentifier(i.packageDataCache, pkg, m.Obj())),
}
}

monikerPackage := makeMonikerPackage(obj)

d := implDef{
pkg: pkg,
typeName: typeName,
ident: ident,
defInfo: i.getDefinitionInfo(typeName, ident),
methods: methods,
methodsByName: methodsByName,
monikerPackage: monikerPackage,
monikerIdentifier: joinMonikerParts(monikerPackage, makeMonikerIdentifier(i.packageDataCache, pkg, obj)),
typeNameIsExported: typeName.Exported(),
typeNameIsAlias: typeName.IsAlias(),
identIsExported: ident.IsExported(),
defInfo: i.getDefinitionInfo(typeName, ident),
methods: canonicalizedMethods,
methodsByName: methodsByName,
}
if types.IsInterface(obj.Type()) {
interfaces = append(interfaces, d)
Expand All @@ -348,7 +376,36 @@ func (i *Indexer) extractInterfacesAndConcreteTypes(pkgs []*packages.Package) (i
}
}

return interfaces, concreteTypes
batch := func(pkgBatch []string) error {
pkgs, err := i.loadPackage(true, pkgBatch...)
if err != nil {
return err
}

for _, pkg := range pkgs {
visit(pkg)
}
return nil
}

pkgBatch := []string{}
for ix, pkgName := range pkgNames {
pkgBatch = append(pkgBatch, pkgName)

if i.depBatchSize != 0 && ix%i.depBatchSize == 0 {
err := batch(pkgBatch)
runtime.GC() // Prevent a garbage pile
if err != nil {
return nil, nil, err
}
pkgBatch = pkgBatch[:0]
}
}
if err := batch(pkgBatch); err != nil {
return nil, nil, err
}

return interfaces, concreteTypes, nil
}

// buildImplementationRelation builds a map from concrete types to all the interfaces that they implement.
Expand All @@ -363,11 +420,10 @@ func buildImplementationRelation(concreteTypes, interfaces []implDef) implRelati
methodToReceivers := map[string]*intsets.Sparse{}
for idx, t := range concreteTypes {
for _, method := range t.methods {
key := canonicalize(method)
if _, ok := methodToReceivers[key]; !ok {
methodToReceivers[key] = &intsets.Sparse{}
if _, ok := methodToReceivers[method]; !ok {
methodToReceivers[method] = &intsets.Sparse{}
}
methodToReceivers[key].Insert(idx)
methodToReceivers[method].Insert(idx)
}
}

Expand Down
Loading

0 comments on commit d30db6f

Please sign in to comment.