Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add inferencing routes #1097

Merged
merged 11 commits into from
Mar 4, 2024
9 changes: 9 additions & 0 deletions backend/config/default.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ module.exports = {
registry: {
host: 'localhost:8080',
},

inference: {
enabled: true,
connection: {
host: 'example.com',
},

gpus: {},
},
},

connectors: {
Expand Down
11 changes: 11 additions & 0 deletions backend/src/connectors/audit/Base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Request } from 'express'

import { AccessRequestDoc } from '../../models/AccessRequest.js'
import { FileInterface, FileInterfaceDoc } from '../../models/File.js'
import { InferenceDoc } from '../../models/Inference.js'
import { ModelCardInterface, ModelDoc, ModelInterface } from '../../models/Model.js'
import { ReleaseDoc } from '../../models/Release.js'
import { ReviewInterface } from '../../models/Review.js'
Expand Down Expand Up @@ -88,6 +89,11 @@ export const AuditInfo = {
UpdateSchema: { typeId: 'UpdateSchema', description: 'Schema Updated', auditKind: AuditKind.Update },

ViewModelImages: { typeId: 'ViewModelImages', description: 'Model Images Viewed', auditKind: AuditKind.View },

CreateInference: { typeId: 'CreateInference', description: 'Inference Service Created', auditKind: AuditKind.Create },
UpdateInference: { typeId: 'UpdateInference', description: 'Inference Service Updated', auditKind: AuditKind.Update },
ViewInference: { typeId: 'ViewInference', description: 'Inference Service Viewed', auditKind: AuditKind.View },
ViewInferences: { typeId: 'ViewInferences', description: 'Inferences Viewed', auditKind: AuditKind.View },
} as const
export type AuditInfoKeys = (typeof AuditInfo)[keyof typeof AuditInfo]

Expand Down Expand Up @@ -131,6 +137,11 @@ export abstract class BaseAuditConnector {
abstract onDeleteSchema(req: Request, schemaId: string)
abstract onUpdateSchema(req: Request, schema: SchemaDoc)

abstract onCreateInference(req: Request, inference: InferenceDoc)
abstract onUpdateInference(req: Request, inference: InferenceDoc)
abstract onViewInference(req: Request, inference: InferenceDoc)
abstract onViewInferences(req: Request, inference: InferenceDoc[])

abstract onViewModelImages(
req: Request,
modelId: string,
Expand Down
5 changes: 5 additions & 0 deletions backend/src/connectors/audit/__mocks__/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ const audit = {
onDeleteSchema: vi.fn(),
onSearchSchemas: vi.fn(),

onCreateInference: vi.fn(),
onViewInference: vi.fn(),
onUpdateInference: vi.fn(),
onViewInferences: vi.fn(),

onViewModelImages: vi.fn(),

onError: vi.fn(),
Expand Down
5 changes: 5 additions & 0 deletions backend/src/connectors/audit/silly.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Request } from 'express'

import { AccessRequestDoc } from '../../models/AccessRequest.js'
import { FileInterface, FileInterfaceDoc } from '../../models/File.js'
import { InferenceDoc } from '../../models/Inference.js'
import { ModelCardInterface, ModelDoc, ModelInterface } from '../../models/Model.js'
import { ReleaseDoc } from '../../models/Release.js'
import { ReviewInterface } from '../../models/Review.js'
Expand Down Expand Up @@ -49,5 +50,9 @@ export class SillyAuditConnector extends BaseAuditConnector {
onUpdateSchema(_req: Request, _schema: SchemaDoc) {}
onViewSchema(_req: Request, _schema: SchemaInterface) {}
onViewModelImages(_req: Request, _modelId: string, _images: { repository: string; name: string; tags: string[] }[]) {}
onViewInferences(_req: Request, _inferences: InferenceDoc[]) {}
onViewInference(_req: Request, _inferences: InferenceDoc) {}
onUpdateInference(_req: Request, _inferences: InferenceDoc) {}
onCreateInference(_req: Request, _inferences: InferenceDoc) {}
onError(_req: Request, _error: BailoError) {}
}
35 changes: 35 additions & 0 deletions backend/src/connectors/audit/stdout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Request } from 'express'

import { AccessRequestDoc } from '../../models/AccessRequest.js'
import { FileInterface, FileInterfaceDoc } from '../../models/File.js'
import { InferenceDoc } from '../../models/Inference.js'
import { ModelCardInterface, ModelDoc, ModelInterface } from '../../models/Model.js'
import { ReleaseDoc } from '../../models/Release.js'
import { ReviewInterface } from '../../models/Review.js'
Expand Down Expand Up @@ -274,4 +275,38 @@ export class StdoutAuditConnector extends BaseAuditConnector {
})
req.log.info(event, req.audit.description)
}

onViewInference(req: Request, inference: InferenceDoc) {
this.checkEventType(AuditInfo.ViewInference, req)
const event = this.generateEvent(req, {
modelId: inference.modelId,
imageName: inference.image,
imageTag: inference.tag,
})
req.log.info(event, req.audit.description)
}

onViewInferences(req: Request, inferences: InferenceDoc[]) {
this.checkEventType(AuditInfo.ViewInferences, req)
const event = this.generateEvent(req, {
results: inferences.map((inference) => ({
modelId: inference.modelId,
image: inference.image,
tag: inference.tag,
})),
})
req.log.info(event, req.audit.description)
}

onCreateInference(req: Request, inference: InferenceDoc) {
this.checkEventType(AuditInfo.CreateInference, req)
const event = this.generateEvent(req, { modelId: inference.modelId, image: inference.image, tag: inference.tag })
req.log.info(event, req.audit.description)
}

onUpdateInference(req: Request, inference: InferenceDoc) {
this.checkEventType(AuditInfo.UpdateInference, req)
const event = this.generateEvent(req, { modelId: inference.modelId, image: inference.image, tag: inference.tag })
req.log.info(event, req.audit.description)
}
}
62 changes: 62 additions & 0 deletions backend/src/models/Inference.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { Document, model, Schema } from 'mongoose'

export interface InferenceSetting {
processorType: string
memory?: number
port: number
}

export interface InferenceInterface {
modelId: string
image: string
tag: string

description: string

settings: InferenceSetting

createdBy: string
createdAt: Date
updatedAt: Date
}

export type InferenceDoc = InferenceInterface & Document<any, any, InferenceInterface>

const InferenceSchema = new Schema<InferenceInterface>(
{
modelId: { type: String, required: true },
image: { type: String, required: true },
tag: { type: String, required: true },

description: { type: String, required: false, default: '' },

settings: {
processorType: { type: String, required: true },
memory: {
type: Number,
required: function (this: InferenceInterface): boolean {
return this.settings.processorType === 'cpu'
},
validate: function (this: InferenceInterface, val: any): boolean {
if (this.settings.processorType === 'cpu' && val) {
return true
}
throw new Error(`Cannot specify memory allocation without choosing cpu as the processor type`)
},
},
port: { type: Number, required: true },
},

createdBy: { type: String, required: true },
},
{
timestamps: true,
collection: 'v2_model_inferences',
},
)

InferenceSchema.index({ modelId: 1, image: 1, tag: 1 }, { unique: true })

const InferenceModel = model<InferenceInterface>('v2_Model_Inference', InferenceSchema)

export default InferenceModel
9 changes: 9 additions & 0 deletions backend/src/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import { postStartMultipartUpload } from './routes/v2/model/file/postStartMultip
import { getModel } from './routes/v2/model/getModel.js'
import { getModelsSearch } from './routes/v2/model/getModelsSearch.js'
import { getImages } from './routes/v2/model/images/getImages.js'
import { getInference } from './routes/v2/model/inferencing/getInferenceService.js'
import { getInferences } from './routes/v2/model/inferencing/getInferenceServices.js'
import { postInference } from './routes/v2/model/inferencing/postInferenceService.js'
import { putInference } from './routes/v2/model/inferencing/putInferenceService.js'
import { getModelCard } from './routes/v2/model/modelcard/getModelCard.js'
import { getModelCardRevisions } from './routes/v2/model/modelcard/getModelCardRevisions.js'
import { postFromSchema } from './routes/v2/model/modelcard/postFromSchema.js'
Expand Down Expand Up @@ -140,6 +144,11 @@ server.post('/api/v2/model/:modelId/files/upload/multipart/start', ...postStartM
server.post('/api/v2/model/:modelId/files/upload/multipart/finish', ...postFinishMultipartUpload)
server.delete('/api/v2/model/:modelId/file/:fileId', ...deleteFile)

server.get('/api/v2/model/:modelId/inferences', ...getInferences)
server.get('/api/v2/model/:modelId/inference/:image/:tag', ...getInference)
server.post('/api/v2/model/:modelId/inference', ...postInference)
server.put('/api/v2/model/:modelId/inference/:image/:tag', ...putInference)

// *server.get('/api/v2/model/:modelId/release/:semver/file/:fileCode/list', ...getModelFileList)
// *server.get('/api/v2/model/:modelId/release/:semver/file/:fileCode/raw', ...getModelFileRaw)

Expand Down
58 changes: 58 additions & 0 deletions backend/src/routes/v2/model/inferencing/getInferenceService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'
import { z } from 'zod'

import { AuditInfo } from '../../../../connectors/audit/Base.js'
import audit from '../../../../connectors/audit/index.js'
import { InferenceInterface } from '../../../../models/Inference.js'
import { getInferenceByImage } from '../../../../services/inference.js'
import { inferenceInterfaceSchema, registerPath } from '../../../../services/specification.js'
import { parse } from '../../../../utils/validate.js'

export const getInferenceSchema = z.object({
params: z.object({
modelId: z.string(),
image: z.string(),
tag: z.string(),
}),
})

registerPath({
method: 'get',
path: '/api/v2/model/{modelId}/inference/{image}/{tag}',
tags: ['inference'],
description: 'Get details for an inferencing service within the cluster.',
schema: getInferenceSchema,
responses: {
200: {
description: 'Details for a specific inferencing instance.',
content: {
'application/json': {
schema: z.object({ inference: inferenceInterfaceSchema }),
},
},
},
},
})

interface GetInferenceService {
inference: InferenceInterface
}

export const getInference = [
bodyParser.json(),
async (req: Request, res: Response<GetInferenceService>) => {
req.audit = AuditInfo.ViewInference
const {
params: { modelId, image, tag },
} = parse(req, getInferenceSchema)

const inference = await getInferenceByImage(req.user, modelId, image, tag)

await audit.onViewInference(req, inference)

return res.json({
inference,
})
},
]
56 changes: 56 additions & 0 deletions backend/src/routes/v2/model/inferencing/getInferenceServices.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'
import { z } from 'zod'

import { AuditInfo } from '../../../../connectors/audit/Base.js'
import audit from '../../../../connectors/audit/index.js'
import { InferenceInterface } from '../../../../models/Inference.js'
import { getInferencesByModel } from '../../../../services/inference.js'
import { inferenceInterfaceSchema, registerPath } from '../../../../services/specification.js'
import { parse } from '../../../../utils/validate.js'

export const getInferencesSchema = z.object({
params: z.object({
modelId: z.string(),
}),
})

registerPath({
method: 'get',
path: '/api/v2/model/{modelId}/inferences',
tags: ['inference'],
description: 'Get all of the inferencing services associated with a model.',
schema: getInferencesSchema,
responses: {
200: {
description: 'An array of inferencing services.',
content: {
'application/json': {
schema: z.object({
inferences: z.array(inferenceInterfaceSchema),
}),
},
},
},
},
})

interface GetInferenceService {
inferences: Array<InferenceInterface>
}

export const getInferences = [
bodyParser.json(),
async (req: Request, res: Response<GetInferenceService>) => {
req.audit = AuditInfo.ViewInferences
const { params } = parse(req, getInferencesSchema)

const inferences = await getInferencesByModel(req.user, params.modelId)

await audit.onViewInferences(req, inferences)

return res.json({
inferences,
})
},
]
67 changes: 67 additions & 0 deletions backend/src/routes/v2/model/inferencing/postInferenceService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'
import { z } from 'zod'

import { AuditInfo } from '../../../../connectors/audit/Base.js'
import audit from '../../../../connectors/audit/index.js'
import { InferenceInterface } from '../../../../models/Inference.js'
import { createInference } from '../../../../services/inference.js'
import { inferenceInterfaceSchema, registerPath } from '../../../../services/specification.js'
import { parse } from '../../../../utils/validate.js'

export const postInferenceSchema = z.object({
params: z.object({
modelId: z.string(),
}),
body: z.object({
image: z.string(),
tag: z.string(),
description: z.string(),
settings: z.object({
processorType: z.string(),
memory: z.number().optional(),
port: z.number(),
}),
}),
})

registerPath({
method: 'post',
path: '/api/v2/model/{modelId}/inference',
tags: ['inference'],
description: 'Create a inferencing service within Bailo',
schema: postInferenceSchema,
responses: {
200: {
description: 'The created inferencing service.',
content: {
'application/json': {
schema: inferenceInterfaceSchema,
},
},
},
},
})

interface PostInferenceService {
inference: InferenceInterface
}

export const postInference = [
bodyParser.json(),
async (req: Request, res: Response<PostInferenceService>) => {
req.audit = AuditInfo.CreateInference
const {
params: { modelId },
body,
} = parse(req, postInferenceSchema)

const inference = await createInference(req.user, modelId, body)

await audit.onCreateInference(req, inference)

return res.json({
inference,
})
},
]
Loading
Loading