-
Notifications
You must be signed in to change notification settings - Fork 1
/
.project-metadata.yaml
79 lines (72 loc) · 2.6 KB
/
.project-metadata.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
name: RAG Studio
description: "Build a RAG application to ask questions about your documents"
author: "Cloudera"
date: "2024-09-10"
specification_version: 1.0
prototype_version: 1.0
environment_variables:
AWS_DEFAULT_REGION:
default: "us-west-2"
description: "AWS Region where Bedrock is configured and the S3 bucket is located. Required if Bedrock or S3 is used."
required: false
S3_RAG_DOCUMENT_BUCKET:
default: ""
description: "The S3 bucket where uploaded documents are stored. Only set if S3 is used for file storage."
required: false
S3_RAG_BUCKET_PREFIX:
default: "rag-studio"
description: "A prefix added to all S3 paths used by Rag Studio. Only recommended if S3 is used for file storage."
required: false
AWS_ACCESS_KEY_ID:
default: ""
description: "AWS Access Key ID. Required if Bedrock or S3 is used."
required: false
AWS_SECRET_ACCESS_KEY:
default: ""
description: "AWS Secret Access Key. Required if Bedrock or S3 is used."
required: false
USE_ENHANCED_PDF_PROCESSING:
default: "false"
description: "Use enhanced PDF processing for better text extraction. This option makes PDF parsing take significantly longer. A GPU is highly recommended to speed up the process."
required: false
CAII_DOMAIN:
default: ""
description: "The domain of the CAII service. Setting this will enable CAII as the sole source for both inference and embedding models."
required: false
runtimes:
- editor: JupyterLab
kernel: Python 3.10
edition: Standard
tasks:
- type: run_session
name: Download/Install Project Dependencies
script: scripts/01_install_base.py
short_summary: Install Project Dependencies
kernel: python3
cpu: 2
memory: 4
- type: create_job
name: Update/build RAG Studio
entity_label: refresh_project
script: scripts/refresh_project.py
arguments: None
cpu: 2
memory: 4
short_summary: Create job to allow refreshing the project from source and rebuilding.
environment:
TASK_TYPE: CREATE/RUN_JOB
- type: run_job
entity_label: refresh_project
short_summary: Run job to refresh the project from source and rebuilding.
- type: start_application
name: RagStudio
subdomain: ragstudio
bypass_authentication: false
static_subdomain: false
script: scripts/startup_app.py
short_summary: Create and start RagStudio.
long_summary: Create and start RagStudio.
cpu: 2
memory: 4
environment_variables:
TASK_TYPE: START_APPLICATION