Skip to content

Commit

Permalink
perf: optimise solver graph evaluation loop (#6728)
Browse files Browse the repository at this point in the history
This fix makes the `loop`  method in the solver much more efficient for
large graphs (lots of actions and dependencies).

While graph evaluation in the solver isn't usually the performance
bottleneck, the performance impact can be noticeable in larger projects.

Here, we use a generator to get leaf nodes ready for processing (whereas
previously, we'd traverse the whole pending graph on each loop iteration
and load it node-by-node into a new dependency graph data structure,
which was costly when iterating over a large graph in a tight loop).

We also used a simple boolean flag (dirty) in the solver to track
whether any tasks had been requested or completed (successfully or with
an error) since the last loop. This was used to avoid evaluating the
graph when we know it's not going to result in any changes to the
in-progress or pending graphs, or in any new nodes being scheduled for
processing.

There are further optimizations we can make to the solver (see the
comments added in this commit), but they would require careful
implementation and testing to avoid possible regressions, so we're
leaving them be for now.

In a test project that was specifically designed to get the solver to
struggle (lots of actions with lots of dependencies), fully resolving
the graph before this change took 3 minutes on my laptop, whereas it's
down to 55 seconds after these changes. I suspect it could be brought
down to 20-30 seconds with the further optimizations outlined in
the added code comments, but this might be enough for the time being.

Co-authored-by: Steffen Neubauer <[email protected]>
  • Loading branch information
thsig and stefreak authored Dec 18, 2024
1 parent f014cb6 commit bd6c6ba
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 592 deletions.
111 changes: 75 additions & 36 deletions core/src/graph/solver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ import type { Log } from "../logger/log-entry.js"
import type { GardenError, GardenErrorParams } from "../exceptions.js"
import { GraphError, toGardenError } from "../exceptions.js"
import { uuidv4 } from "../util/random.js"
import { DependencyGraph } from "./common.js"
import { Profile } from "../util/profiling.js"
import { TypedEventEmitter } from "../util/events.js"
import { groupBy, keyBy } from "lodash-es"
import { keyBy } from "lodash-es"
import type { GraphResult, TaskEventBase } from "./results.js"
import { GraphResults, resultToString } from "./results.js"
import { gardenEnv } from "../constants.js"
Expand Down Expand Up @@ -53,6 +52,7 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
// Tasks currently running
private readonly inProgress: WrappedNodes

private dirty: boolean
private inLoop: boolean

private log: Log
Expand All @@ -68,19 +68,20 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {

this.log = garden.log.createLog({ name: "graph-solver" })
this.inLoop = false
this.dirty = false
this.requestedTasks = {}
this.nodes = {}
this.pendingNodes = {}
this.inProgress = {}
this.lock = new AsyncLock()

this.on("start", () => {
this.log.silly(`GraphSolver: start`)
this.log.silly(() => `GraphSolver: start`)
this.emit("loop", {})
})

this.on("loop", () => {
this.log.silly(`GraphSolver: loop`)
this.log.silly(() => `GraphSolver: loop`)
this.loop()
})
}
Expand Down Expand Up @@ -214,31 +215,34 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
})
}

private getPendingGraph() {
private *getPendingLeaves(): Generator<TaskNode, undefined, undefined> {
const nodes = Object.values(this.pendingNodes)
const graph = new DependencyGraph<TaskNode>()
const visitedKeys = new Set<string>()

const addNode = (node: TaskNode) => {
function* addNode(node: TaskNode) {
const key = node.getKey()

if (node.isComplete()) {
if (node.isComplete() || visitedKeys.has(key)) {
return
}
graph.addNode(key, node)
visitedKeys.add(key)

// TODO: We could optimize further by making this method a generator too.
const deps = node.getRemainingDependencies()
if (deps.length === 0) {
// Leaf node found
yield node
return
}

for (const dep of deps) {
addNode(dep)
graph.addDependency(key, dep.getKey())
yield* addNode(dep)
}
}

for (const node of nodes) {
addNode(node)
yield* addNode(node)
}

return graph
}

start() {
Expand Down Expand Up @@ -286,6 +290,12 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
if (this.inLoop) {
return
}
if (!this.dirty) {
// The graph becomes dirty when a task is requested or completed: This means that either a new task node
// may have been added, or that a new result has been set on a node (which will affect the next graph
// evaluation).
return
}
this.inLoop = true

try {
Expand All @@ -299,36 +309,56 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
}
}

const graph = this.getPendingGraph()
const leafGenerator = this.getPendingLeaves()
let leafCount = 0

if (graph.size() === 0) {
return
}
const inProgressNodes = Object.values(this.inProgress)

const leaves = graph.overallOrder(true)
const pending = leaves.map((key) => this.nodes[key])
// Enforce concurrency limits per task type and concurrency group key
const pendingConcurrencyGroupCapacitites: { [key: string]: number } = {}

const inProgressNodes = Object.values(this.inProgress)
const inProgressByGroup = groupBy(inProgressNodes, "type")

// Enforce concurrency limits per task type
const grouped = groupBy(pending, (n) => n.concurrencyGroupKey)
const limitedByGroup = Object.values(grouped).flatMap((nodes) => {
// Note: We can be sure there is at least one node in the array
const groupLimit = nodes[0].concurrencyLimit
const inProgress = inProgressByGroup[nodes[0].type] || []
return nodes.slice(0, groupLimit - inProgress.length)
})
this.dirty = false

// We could do this with a `groupBy`, but this is more efficient (and the loop method is run frequently).
for (const node of inProgressNodes) {
const groupKey = node.concurrencyGroupKey
if (!pendingConcurrencyGroupCapacitites[groupKey]) {
pendingConcurrencyGroupCapacitites[groupKey] = 0
}
pendingConcurrencyGroupCapacitites[groupKey]++
}
const leavesLimitedByGroup: TaskNode[] = []
for (const node of leafGenerator) {
if (leafCount >= this.hardConcurrencyLimit - inProgressNodes.length) {
// Enforce hard global limit. Note that we never get more leaves than this from `leafGenerator`, which can
// save on compute in big graphs.
break
}
leafCount++
const groupKey = node.concurrencyGroupKey
// Note: All nodes within a given concurrency group should have the same limit.
const groupLimit = node.concurrencyLimit
if (!pendingConcurrencyGroupCapacitites[groupKey]) {
pendingConcurrencyGroupCapacitites[groupKey] = 0
}
if (pendingConcurrencyGroupCapacitites[groupKey] >= groupLimit) {
// We've already reached the concurrency limit for this group, so we won't schedule this node now.
continue
}
// There's capacity available for this group, so we schedule the node
leavesLimitedByGroup.push(node)
pendingConcurrencyGroupCapacitites[groupKey]++
}
if (leafCount === 0) {
return
}

if (limitedByGroup.length === 0) {
if (leavesLimitedByGroup.length === 0) {
this.emit("loop", {})
return
}

// Enforce hard global limit
const nodesToProcess = limitedByGroup
.slice(0, this.hardConcurrencyLimit - inProgressNodes.length)
.filter((node) => !this.inProgress[node.getKey()])
const nodesToProcess = leavesLimitedByGroup.filter((node) => !this.inProgress[node.getKey()])

if (nodesToProcess.length === 0) {
this.emit("loop", {})
Expand Down Expand Up @@ -370,6 +400,7 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
* Processes a single task to completion, handling errors and providing its result to in-progress task batches.
*/
private async processNode(node: TaskNode, startedAt: Date) {
this.dirty = true
// Check for missing dependencies by calculating the input version so we can handle the exception
// as a user error before getting deeper into the control flow (where it would result in an internal
// error with a noisy stack trace).
Expand All @@ -381,6 +412,8 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
}

this.log.silly(() => `Processing node ${taskStyle(node.getKey())}`)
// TODO-performance: Record that a result or an error has become available for this node, use for looping
// in evaluateRequests.

try {
const processResult = await node.execute()
Expand All @@ -394,6 +427,7 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
}

private requestTask(params: TaskRequestParams) {
this.dirty = true
const request = new RequestTaskNode(params)
if (!this.requestedTasks[params.batchId]) {
this.requestedTasks[params.batchId] = {}
Expand All @@ -403,6 +437,7 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
}

private evaluateRequests() {
// TODO-performance: Only iterate over requests with new results since last loop
for (const [_batchId, requests] of Object.entries(this.requestedTasks)) {
for (const request of Object.values(requests)) {
if (request.isComplete()) {
Expand Down Expand Up @@ -430,6 +465,8 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
this.log.silly(() => `Request ${request.getKey()} has ready status and force=false, no need to process.`)
this.completeTask({ ...status, node: request })
} else {
// TODO-performance: Add processing nodes for requests only once, during the solve call before looping.
// We create exactly one request node for each requested task, so this is known up front.
const processNode = this.getNode({ type: "process", task, statusOnly: request.statusOnly })
const result = this.getPendingResult(processNode)

Expand All @@ -452,6 +489,7 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
}

private ensurePendingNode(node: TaskNode, dependant: TaskNode) {
this.dirty = true
const key = node.getKey()
const existing = this.pendingNodes[key]

Expand All @@ -465,6 +503,7 @@ export class GraphSolver extends TypedEventEmitter<SolverEvents> {
}

private completeTask(params: CompleteTaskParams & { node: TaskNode }) {
this.dirty = true
const node = params.node
const result = node.complete(params)
delete this.inProgress[node.getKey()]
Expand Down
Loading

0 comments on commit bd6c6ba

Please sign in to comment.