server.cfg

{
    "host": "0.0.0.0",
    "port": 8000,
    "models": [
        {
            "model": "/home/test/llm-models/chatglm3-ggml.bin",
            "model_alias": "chatglm-3",
            "chat_format": "chatglm",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "embedding": false,
            "n_threads": 12,
            "n_batch": 512
        },
        {
            "model": "/home/test/llm-models/chatglm4-ggml.bin",
            "model_alias": "glm-4",
            "chat_format": "chatglm",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "embedding": false,
            "n_threads": 12,
            "n_batch": 512
        },
       {
            "model": "/home/test/llm-models/bge-large-zh-v1.5-q4_k_m.gguf",
            "model_alias": "bge-large-zh-v1.5",
            "chat_format": "bert",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 8192,
            "embedding": true,
            "n_batch": 512,
            "verbose": false
        },
        {
            "model": "/home/test/.cache/huggingface/hub/models--BAAI--bge-m3/snapshots/babcf60cae0a1f438d7ade582983d4ba462303c2/onnx/",
            "model_alias": "bge-m3",
            "chat_format": "bge-onnx",
            "embedding": true,
            "n_gpu_layers": 0,
            "n_ctx": 8192,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512
        },
       {
            "model": "/home/test/llm-models/chatglm3-ggml-q8.bin",
            "model_alias": "chatglm-3-q8",
            "chat_format": "chatglm",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "embedding": false,
            "n_threads": 12,
            "n_batch": 512
        },
        {
            "model": "/home/test/llm-models/gorilla-openfunctions-v2-q4_K_M.gguf",
            "hf_pretrained_model_name_or_path":"gorilla-llm/gorilla-openfunctions-v2",
            "model_alias": "openfunctions",
            "chat_format": "openfunctions",
            "n_gpu_layers": 0,
            "n_ctx":4096,
            "embedding": false, 
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512
        },
       {
            "model": "/home/test/llm-models/functionary-small-v2.4.Q4_0.gguf",
            "model_alias": "functionary",
            "chat_format": "functionary-v2",
            "hf_pretrained_model_name_or_path":"meetkai/functionary-small-v2.4",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "embedding": false,
            "n_ctx": 8192,
            "use_mmap":true
        },
         {
            "model": "/home/test/llm-models/functionary-small-v2.5.Q4_0.gguf",
            "model_alias": "functionary-v2.5",
            "chat_format": "functionary-v2",
            "hf_pretrained_model_name_or_path":"meetkai/functionary-small-v2.5",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "embedding": false,
            "n_ctx": 8192,
            "use_mmap":true
        },
        {
            "model": "/home/test/llm-models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
            "hf_pretrained_model_name_or_path":"meta-llama/Meta-Llama-3-8B-Instruct",            
            "model_alias": "llama-3-8b",
            "chat_format": "llama-3",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "n_ctx": 8192,
            "embedding": true
        },       
        {
            "model": "/home/test/llm-models/ggml-model-q4_k.gguf",
            "model_alias": "llava",
            "chat_format": "llava-1-5",
            "clip_model_path": "/home/test/llm-models/mmproj-model-f16.gguf",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "embedding": false,
            "n_threads": 12,
            "n_ctx": 4096,
            "n_batch": 512
        },
        {
            "model": "/home/test/llm-models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
            "model_alias": "mistral-7b",
            "chat_format": "mistral-instruct",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 8192,
            "n_batch": 512
        },
        {
            "model": "/home/test/llm-models/mixtral-8x7b-instruct-v0.1.Q3_K_M.gguf",
            "model_alias": "mixtral-8x7b-instruct",
            "chat_format": "mistral-instruct",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 8192,
            "n_batch": 512
        },
        {  
            "model": "/home/test/llm-models/sqlcoder-7b-2.Q4_K_M.gguf",
            "model_alias": "sqlcoder",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 16384,  
            "n_batch": 1024
        },
        {
            "model": "/home/test/llm-models/qwen2-0_5b-instruct-q4_k_m.gguf",
            "model_alias": "qwen",
            "chat_format":"qwen",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 32768,
            "n_batch": 1024
        },
        {
            "model": "/home/test/llm-models/Qwen2-1.5B-Instruct.Q4_K_M.gguf",
            "model_alias": "qwen2-1.5b",
            "chat_format":"qwen",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 32768,
            "n_batch": 1024
        },
        {
            "model": "/home/test/llm-models/qwen2-7b-instruct-q5_k_m.gguf",
            "model_alias": "qwen2-7b",
            "chat_format":"qwen",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 32768,
            "n_batch": 1024
        },
        {
            "model": "/home/test/llm-models/Baichuan2-13B-Chat-Q4_K_M.gguf",
            "model_alias": "baichuan-2",
            "chat_format":"baichuan-2",
            "n_gpu_layers": 0,
            "offload_kqv": true,
            "n_threads": 12,
            "n_ctx": 8192,
            "n_batch": 1024
        }      
       
    ]
}