diff --git a/src/controllers.ts b/src/controllers.ts index 4a988d2..2f10c36 100644 --- a/src/controllers.ts +++ b/src/controllers.ts @@ -2,14 +2,17 @@ import Router from "koa-router" import { CustomContext, CustomState } from "./types" import multer from "@koa/multer" import gateKeeper, { - INSTANCES, + CPU_CORES_IN_SYSTEM, getBusyInstances, freeInstance, + getReservedMemory, + TOTAL_SYSTEM_MEMORY_GB, } from "./middleware/gatekeeper" import { BadRequestError } from "./util/error" import handleSubmission, { RunResult } from "./sandbox" import Axios from "axios" import { SupportedMimeTypes } from "./util/file_extractor" +import extractResourceLimitsFromRequest from "./util/extractResourceLimitsFromRequest" const upload = multer({ dest: "uploads/" }) export const ALLOWED_ALTERNATIVE_DOCKER_IMAGES = ["nygrenh/sandbox-next"] @@ -19,7 +22,12 @@ const api = new Router() .get("/status.json", async (ctx) => { ctx.body = { busy_instances: getBusyInstances(), - total_instances: INSTANCES, + // This is intentionally the same as busy instances, this is more descriptive name but we're keeping busy_instances for backwards compatibility + reserved_cpu_cores: getBusyInstances(), + + total_instances: CPU_CORES_IN_SYSTEM, + reserved_memory: getReservedMemory(), + total_memory: TOTAL_SYSTEM_MEMORY_GB, } }) @@ -29,11 +37,13 @@ const api = new Router() // concurrent tasks in a middleware because we want to do it before receiving // the uploaded file. + const resourceLimits = extractResourceLimitsFromRequest(ctx.request.body) + if ( ctx.file.mimetype !== "application/x-tar" && ctx.file.mimetype !== "application/zstd" ) { - freeInstance() + freeInstance(resourceLimits) throw new BadRequestError( `Uploaded file type is not supported! Mimetype was: ${ctx.file.mimetype}}. Supported types are application/x-tar and application/zstd.`, ) @@ -47,7 +57,7 @@ const api = new Router() ALLOWED_ALTERNATIVE_DOCKER_IMAGES.indexOf(dockerImage) !== -1 ) ) { - freeInstance() + freeInstance(resourceLimits) throw new BadRequestError("Docker image was not whitelisted.") } @@ -66,12 +76,13 @@ const api = new Router() dockerImage, ctx.log.child({ async: true }), ctx.file.mimetype as SupportedMimeTypes, + resourceLimits, ) } catch (reason1) { ctx.log.error("Handling submission failed.", { reason: reason1 }) return } finally { - freeInstance() + freeInstance(resourceLimits) } ctx.log.info(`Notifying ${ctx.request.body.notify}...`, { @@ -89,7 +100,7 @@ const api = new Router() exit_code: output.exit_code, }) } catch (reason2) { - ctx.log.error("Notifying failed", { error: reason2.message }) + ctx.log.error("Notifying failed", { error: (reason2 as Error).message }) } }) diff --git a/src/middleware/gatekeeper.ts b/src/middleware/gatekeeper.ts index 7c0ec9e..1a348fd 100644 --- a/src/middleware/gatekeeper.ts +++ b/src/middleware/gatekeeper.ts @@ -1,20 +1,32 @@ import { CustomContext } from "../types" -import { cpus } from "os" +import { cpus, totalmem } from "os" import { SandboxBusyError } from "../util/error" +import extractResourceLimitsFromRequest, { + ResourceLimits, +} from "../util/extractResourceLimitsFromRequest" -export const INSTANCES = cpus().length -let busyInstances = 0 +export const CPU_CORES_IN_SYSTEM = cpus().length +export const TOTAL_SYSTEM_MEMORY_GB = totalmem() / 1024 ** 3 + +let reservedCPUCores = 0 +let reservedMemory = 0 export function getBusyInstances(): number { - return busyInstances + return reservedCPUCores +} + +export function getReservedMemory(): number { + return reservedMemory } -export function freeInstance(): void { - busyInstances-- +export function freeInstance(limits: ResourceLimits): void { + reservedCPUCores -= limits.cpus + reservedMemory -= limits.memoryGB } -function reserveInstance() { - busyInstances++ +function reserveInstance(limits: ResourceLimits): void { + reservedCPUCores += limits.cpus + reservedMemory += limits.memoryGB } // Enforces the server is not processing too many submissions at once. @@ -22,10 +34,17 @@ const gateKeeper = async ( ctx: CustomContext, next: () => Promise, ): Promise => { - if (busyInstances >= INSTANCES) { + const limits = extractResourceLimitsFromRequest(ctx.request.body) + console.info( + `Sandbox sumbission requesting ${limits.memoryGB}GB of memory and ${limits.cpus} CPUs`, + ) + if (reservedCPUCores + limits.cpus > CPU_CORES_IN_SYSTEM) { + throw new SandboxBusyError() + } + if (reservedMemory + limits.memoryGB > TOTAL_SYSTEM_MEMORY_GB) { throw new SandboxBusyError() } - reserveInstance() + reserveInstance(limits) await next() } diff --git a/src/sandbox.ts b/src/sandbox.ts index 0b271f3..2347439 100644 --- a/src/sandbox.ts +++ b/src/sandbox.ts @@ -4,6 +4,7 @@ import winston from "winston" import { exec as origExec } from "child_process" import { readFile as origReadFile, unlink as origUnlink } from "fs" import extract, { SupportedMimeTypes } from "./util/file_extractor" +import { ResourceLimits } from "./util/extractResourceLimitsFromRequest" const exec = promisify(origExec) const readFile = promisify(origReadFile) const unlink = promisify(origUnlink) @@ -29,6 +30,7 @@ const handleSubmission = async ( dockerImage: string | undefined, log: winston.Logger, mimetype: SupportedMimeTypes, + resourceLimits: ResourceLimits, ): Promise => { log.info("Handling submission") const outputPath = join("work", id) @@ -38,7 +40,13 @@ const handleSubmission = async ( await exec(`chmod -R 777 ${outputPath}`) try { await exec(`chmod -R 777 ${outputPath}`) - const results = await runTests(outputPath, id, dockerImage, log) + const results = await runTests( + outputPath, + id, + dockerImage, + log, + resourceLimits, + ) return results } catch (e) { log.error(`Error while running: ${e}`) @@ -62,6 +70,7 @@ async function runTests( submission_id: string, dockerImage: string | undefined, log: winston.Logger, + resourceLimits: ResourceLimits, ): Promise { const id = `sandbox-submission-${submission_id}` let status = "failed" @@ -79,11 +88,19 @@ async function runTests( const image = dockerImage || "nygrenh/sandbox-next" let command if (SUPERDEBUG) { - command = `docker create --name '${id}' --memory 2G --kernel-memory=50M --pids-limit=200 --ulimit nproc=10000:10000 --cpus 1 --mount type=bind,source=${resolve( + command = `docker create --name '${id}' --memory '${ + resourceLimits.memoryGB + }G' --kernel-memory=50M --pids-limit=200 --ulimit nproc=10000:10000 --cpus '${ + resourceLimits.cpus + }' --mount type=bind,source=${resolve( path, )},target=/app -it '${image}' /bin/sleep infinity ` } else { - command = `docker create --name '${id}' --network none --memory 2G --kernel-memory=50M --pids-limit=200 --ulimit nproc=10000:10000 --cpus 1 --cap-drop SETPCAP --cap-drop SETFCAP --cap-drop AUDIT_WRITE --cap-drop SETGID --cap-drop SETUID --cap-drop NET_BIND_SERVICE --cap-drop SYS_CHROOT --cap-drop NET_RAW --mount type=bind,source=${resolve( + command = `docker create --name '${id}' --network none --memory '${ + resourceLimits.memoryGB + }G' --kernel-memory=50M --pids-limit=200 --ulimit nproc=10000:10000 --cpus '${ + resourceLimits.cpus + }' --cap-drop SETPCAP --cap-drop SETFCAP --cap-drop AUDIT_WRITE --cap-drop SETGID --cap-drop SETUID --cap-drop NET_BIND_SERVICE --cap-drop SYS_CHROOT --cap-drop NET_RAW --mount type=bind,source=${resolve( path, )},target=/app -it '${image}' /app/init` } @@ -114,7 +131,7 @@ async function runTests( } catch (e) { const executionEndTime = new Date().getTime() const durationMs = executionEndTime - executionStartTime - log.error("Running tests failed", { error: e.message }) + log.error("Running tests failed", { error: (e as Error).message }) // If the process died within the last 5 seconds before timeout, it was // likely a timeout. if (durationMs > timeout_ms - 5000) { diff --git a/src/util/extractResourceLimitsFromRequest.ts b/src/util/extractResourceLimitsFromRequest.ts new file mode 100644 index 0000000..8d33c6c --- /dev/null +++ b/src/util/extractResourceLimitsFromRequest.ts @@ -0,0 +1,23 @@ +export interface ResourceLimits { + memoryGB: number + cpus: number +} + +const MAX_MEMORY_REQUEST_GB = 4 +const MAX_CPUS_REQUEST = 2 + +/** Extracts and validatates cpu and memory requests. Handles too big requests by making them smaller. */ +export default function extractResourceLimitsFromRequest( + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/explicit-module-boundary-types + requestBody: any, +): ResourceLimits { + let memoryGB = Number(requestBody.memory_limit_gb ?? 1) + let cpus = Number(requestBody.cpu_limit ?? 1) + if (memoryGB > MAX_MEMORY_REQUEST_GB) { + memoryGB = MAX_MEMORY_REQUEST_GB + } + if (cpus > MAX_CPUS_REQUEST) { + cpus = MAX_CPUS_REQUEST + } + return { memoryGB, cpus } +} diff --git a/tests/submissions.test.ts b/tests/submissions.test.ts index ba00fbf..2514e1c 100644 --- a/tests/submissions.test.ts +++ b/tests/submissions.test.ts @@ -50,14 +50,47 @@ test("POST /tasks.json works", async () => { "docker_image", "eu.gcr.io/moocfi-public/tmc-sandbox-tmc-langs-rust", ) - .field("token", "SUPER_SECERET") + .field("token", "SUPER_SECRET") .field("notify", notifyAddress) .set("Accept", "application/json") .expect("Content-Type", /json/) .expect(200) }, ) - expect(notifyResult.token).toBe("SUPER_SECERET") + expect(notifyResult.token).toBe("SUPER_SECRET") + expect(notifyResult.exit_code).toBe("0") + expect(notifyResult.status).toBe("finished") + expect(notifyResult.vm_log.length).toBeGreaterThan(5) + const testOutput = JSON.parse(notifyResult.test_output) + expect(testOutput.status).toBe("PASSED") + expect(testOutput.testResults.length).toBe(1) +}) + +test("POST /tasks.json with higher resource limits works", async () => { + jest.setTimeout(60000) + const notifyResult: NotifyResult = await new Promise( + async (resolve, _reject) => { + const notifyAddress = createResultServer((res) => { + resolve(res) + }) + + await request(server) + .post("/tasks.json") + .attach("file", "tests/data/submission.tar") + .field( + "docker_image", + "eu.gcr.io/moocfi-public/tmc-sandbox-tmc-langs-rust", + ) + .field("memory_limit_gb", "3") + .field("cpu_limit", "2") + .field("token", "SUPER_SECRET") + .field("notify", notifyAddress) + .set("Accept", "application/json") + .expect("Content-Type", /json/) + .expect(200) + }, + ) + expect(notifyResult.token).toBe("SUPER_SECRET") expect(notifyResult.exit_code).toBe("0") expect(notifyResult.status).toBe("finished") expect(notifyResult.vm_log.length).toBeGreaterThan(5) @@ -83,7 +116,7 @@ test("POST /tasks.json works with .tar.zst files", async () => { "docker_image", "eu.gcr.io/moocfi-public/tmc-sandbox-tmc-langs-rust", ) - .field("token", "SUPER_SECERET") + .field("token", "SUPER_SECRET") .field("notify", notifyAddress) .set("Accept", "application/json") .expect("Content-Type", /json/) @@ -91,7 +124,7 @@ test("POST /tasks.json works with .tar.zst files", async () => { }, ) - expect(notifyResult.token).toBe("SUPER_SECERET") + expect(notifyResult.token).toBe("SUPER_SECRET") expect(notifyResult.exit_code).toBe("0") expect(notifyResult.status).toBe("finished") expect(notifyResult.vm_log.length).toBeGreaterThan(5) @@ -117,7 +150,7 @@ testSkipOnCi("POST /tasks.json does not crash with fork bombs", async () => { "docker_image", "eu.gcr.io/moocfi-public/tmc-sandbox-tmc-langs-rust", ) - .field("token", "SUPER_SECERET") + .field("token", "SUPER_SECRET") .field("notify", notifyAddress) .set("Accept", "application/json") .expect("Content-Type", /json/) @@ -125,7 +158,7 @@ testSkipOnCi("POST /tasks.json does not crash with fork bombs", async () => { }, ) - expect(notifyResult.token).toBe("SUPER_SECERET") + expect(notifyResult.token).toBe("SUPER_SECRET") // hard to predict what happens in this case const case1 = @@ -155,14 +188,14 @@ test("POST /tasks.json works when submission uses too much memory", async () => "docker_image", "eu.gcr.io/moocfi-public/tmc-sandbox-tmc-langs-rust", ) - .field("token", "SUPER_SECERET") + .field("token", "SUPER_SECRET") .field("notify", notifyAddress) .set("Accept", "application/json") .expect("Content-Type", /json/) .expect(200) }, ) - expect(notifyResult.token).toBe("SUPER_SECERET") + expect(notifyResult.token).toBe("SUPER_SECRET") expect(notifyResult.status).toBe("out-of-memory") }) @@ -178,14 +211,14 @@ test("POST /tasks.json works with java", async () => { .post("/tasks.json") .attach("file", "tests/data/java.tar") .field("docker_image", "eu.gcr.io/moocfi-public/tmc-sandbox-java") - .field("token", "SUPER_SECERET") + .field("token", "SUPER_SECRET") .field("notify", notifyAddress) .set("Accept", "application/json") .expect("Content-Type", /json/) .expect(200) }, ) - expect(notifyResult.token).toBe("SUPER_SECERET") + expect(notifyResult.token).toBe("SUPER_SECRET") expect(notifyResult.exit_code).toBe("0") expect(notifyResult.status).toBe("finished") expect(notifyResult.vm_log.length).toBeGreaterThan(5)