Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add inferencing routes #1097

Merged
merged 11 commits into from
Mar 4, 2024
11 changes: 11 additions & 0 deletions backend/config/default.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,15 @@ module.exports = {
registry: 'registry',
},
},

inference: {
// Enable / disable the inferencing service
enabled: true,

connection: {
host: 'example.com',
},

gpus: {},
},
}
71 changes: 71 additions & 0 deletions backend/src/models/v2/Inference.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { Document, model, Schema } from 'mongoose'

import config from '../../utils/v2/config.js'

// This interface stores information about the properties on the base object.
// It should be used for plain object representations, e.g. for sending to the
// client.
export const ProcessorType = { CPU: 'cpu', ...config.inference.gpus }

export type ProcessorTypeKeys = (typeof ProcessorType)[keyof typeof ProcessorType]

export interface InferenceSetting {
processorType: ProcessorTypeKeys
memory?: number
port: number
}

export interface InferenceInterface {
modelId: string
image: string
tag: string

description: string

settings: InferenceSetting

createdBy: string
createdAt: Date
updatedAt: Date
}

export type InferenceDoc = InferenceInterface & Document<any, any, InferenceInterface>

const InferenceSchema = new Schema<InferenceInterface>(
{
modelId: { type: String, required: true },
image: { type: String, required: true },
tag: { type: String, required: true },

description: { type: String, required: false, default: '' },

settings: {
processorType: { type: String, required: true },
memory: {
type: Number,
required: function (this: InferenceInterface): boolean {
return this.settings.processorType === ProcessorType.CPU
},
validate: function (this: InferenceInterface, val: any): boolean {
if (this.settings.processorType === ProcessorType.CPU && val) {
return true
}
throw new Error(`Cannot specify memory allocation without choosing cpu as the processor type`)
},
},
port: { type: Number, required: true },
},

createdBy: { type: String, required: true },
},
{
timestamps: true,
collection: 'v2_model_inferences',
},
)

InferenceSchema.index({ modelId: 1, image: 1, tag: 1 }, { unique: true })

const InferenceModel = model<InferenceInterface>('v2_Model_Inference', InferenceSchema)

export default InferenceModel
9 changes: 9 additions & 0 deletions backend/src/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ import { postStartMultipartUpload } from './routes/v2/model/file/postStartMultip
import { getModel } from './routes/v2/model/getModel.js'
import { getModelsSearch } from './routes/v2/model/getModelsSearch.js'
import { getImages } from './routes/v2/model/images/getImages.js'
import { getInferences } from './routes/v2/model/inferencing/getInferenceService.js'
import { postInference } from './routes/v2/model/inferencing/postInferenceService.js'
import { putInference } from './routes/v2/model/inferencing/putInferenceService.js'
import { getModelCard } from './routes/v2/model/modelcard/getModelCard.js'
import { getModelCardRevisions } from './routes/v2/model/modelcard/getModelCardRevisions.js'
import { postFromSchema } from './routes/v2/model/modelcard/postFromSchema.js'
Expand Down Expand Up @@ -284,6 +287,12 @@ server.post('/api/v2/model/:modelId/files/upload/multipart/start', ...postStartM
server.post('/api/v2/model/:modelId/files/upload/multipart/finish', ...postFinishMultipartUpload)
server.delete('/api/v2/model/:modelId/file/:fileId', ...deleteFile)

if (config.inference.enabled) {
server.get('/api/v2/model/:modelId/inferences', ...getInferences)
server.post('/api/v2/model/:modelId/inference', ...postInference)
server.put('/api/v2/model/:modelId/inference', ...putInference)
}

// *server.get('/api/v2/model/:modelId/release/:semver/file/:fileCode/list', ...getModelFileList)
// *server.get('/api/v2/model/:modelId/release/:semver/file/:fileCode/raw', ...getModelFileRaw)

Expand Down
52 changes: 52 additions & 0 deletions backend/src/routes/v2/model/inferencing/getInferenceService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'
import { z } from 'zod'

import { InferenceInterface } from '../../../../models/v2/Inference.js'
import { getInferencesByModel } from '../../../../services/v2/inference.js'
import { inferenceInterfaceSchema, registerPath } from '../../../../services/v2/specification.js'
import { parse } from '../../../../utils/v2/validate.js'

export const getInferenceSchema = z.object({
params: z.object({
modelId: z.string({
required_error: 'Must specify model id as param',
}),
}),
})

registerPath({
method: 'get',
path: '/api/v2/model/{modelId}/inferences',
tags: ['inference'],
description: 'Get all of the inferencing services associated with a model.',
schema: getInferenceSchema,
responses: {
200: {
description: 'An array of inferencing services.',
content: {
'application/json': {
schema: z.object({
inferences: z.array(inferenceInterfaceSchema),
}),
},
},
},
},
})

interface GetInferenceService {
inferences: Array<InferenceInterface>
}

export const getInferences = [
bodyParser.json(),
async (req: Request, res: Response<GetInferenceService>) => {
const { params } = parse(req, getInferenceSchema)

const inferences = await getInferencesByModel(req.user, params.modelId)
return res.json({
inferences,
})
},
]
63 changes: 63 additions & 0 deletions backend/src/routes/v2/model/inferencing/postInferenceService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'
import { z } from 'zod'

import { InferenceInterface, ProcessorType } from '../../../../models/v2/Inference.js'
import { createInference } from '../../../../services/v2/inference.js'
import { inferenceInterfaceSchema, registerPath } from '../../../../services/v2/specification.js'
import { parse } from '../../../../utils/v2/validate.js'

export const postInferenceSchema = z.object({
params: z.object({
modelId: z.string({
required_error: 'Must specify model id as param',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove custom Zod error

}),
}),
body: z.object({
image: z.string(),
tag: z.string(),
description: z.string(),
settings: z.object({
processorType: z.nativeEnum(ProcessorType),
memory: z.number().optional(),
port: z.number(),
}),
}),
})

registerPath({
method: 'post',
path: '/api/v2/model/{modelId}/inference',
tags: ['inference'],
description: 'Create a inferencing service within Bailo',
schema: postInferenceSchema,
responses: {
200: {
description: 'The created inferencing service.',
content: {
'application/json': {
schema: inferenceInterfaceSchema,
},
},
},
},
})

interface PostInferenceService {
inference: InferenceInterface
}

export const postInference = [
bodyParser.json(),
async (req: Request, res: Response<PostInferenceService>) => {
const {
params: { modelId },
body,
} = parse(req, postInferenceSchema)

const inference = await createInference(req.user, modelId, body)
return res.json({
inference,
})
},
]
63 changes: 63 additions & 0 deletions backend/src/routes/v2/model/inferencing/putInferenceService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'
import { z } from 'zod'

import { InferenceInterface, ProcessorType } from '../../../../models/v2/Inference.js'
import { updateInference } from '../../../../services/v2/inference.js'
import { inferenceInterfaceSchema, registerPath } from '../../../../services/v2/specification.js'
import { parse } from '../../../../utils/v2/validate.js'

export const putInferenceSchema = z.object({
params: z.object({
modelId: z.string({
required_error: 'Must specify model id as param',
}),
}),
body: z.object({
image: z.string(),
tag: z.string(),
description: z.string(),
settings: z.object({
processorType: z.nativeEnum(ProcessorType),
memory: z.number().optional(),
port: z.number(),
}),
}),
})

registerPath({
method: 'put',
path: '/api/v2/model/{modelId}/inference',
tags: ['inference'],
description: 'Update a inferencing service within Bailo',
schema: putInferenceSchema,
responses: {
200: {
description: 'The created inferencing service.',
content: {
'application/json': {
schema: inferenceInterfaceSchema,
},
},
},
},
})

interface PutInferenceService {
inference: InferenceInterface
}

export const putInference = [
bodyParser.json(),
async (req: Request, res: Response<PutInferenceService>) => {
const {
params: { modelId },
body,
} = parse(req, putInferenceSchema)

const inference = await updateInference(req.user, modelId, body)
return res.json({
inference,
})
},
]
5 changes: 5 additions & 0 deletions backend/src/routes/v2/specification.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ export const getSpecification = [
name: 'user',
description: 'A user represents an individual who has accessed this service.',
},
{
name: 'inference',
description:
'An inference service is used to run models within Bailo. Each contains settings for a specific configuration',
},
],
}),
)
Expand Down
Loading
Loading