Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ThejasNU/Fix init command #75

Merged
merged 4 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion libs/agentc/agentc/auditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ def _initialize_auditor(self) -> typing.Self:
if self.auditor_output is None and self.conn_string is None:
error_message = textwrap.dedent("""
Could not initialize a local or remote auditor!
If this is a new project, please run the command `agentc index` before instantiating an auditor.
If this is a new project, please run the command `agentc init activity` before instantiating an auditor.
Execute `agentc init --help` for more information.
If you are intending to use a remote-only auditor, please ensure that all of the relevant variables
(i.e., conn_string, username, password, and bucket) are set.
""")
Expand Down
5 changes: 5 additions & 0 deletions libs/agentc_cli/agentc_cli/cmds/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .util import init_db_catalog
from .util import init_local_activity
from .util import init_local_catalog
from .util import init_local_embedding_model
from agentc_core.util.models import CouchbaseConnect
from agentc_core.util.models import Keyspace
from agentc_core.util.publish import get_connection
Expand All @@ -25,6 +26,10 @@ def cmd_init(
initialize_db = "db" in catalog_type
initialize_catalog = "catalog" in type_metadata
initialize_auditor = "auditor" in type_metadata
initialize_model = "model" in type_metadata

if initialize_model:
init_local_embedding_model()

if initialize_local:
if initialize_catalog:
Expand Down
36 changes: 32 additions & 4 deletions libs/agentc_cli/agentc_cli/cmds/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import typing

from ..models.context import Context
from agentc_core.analytics.create import create_analytics_udfs
from agentc_core.analytics.create import create_analytics_views
from agentc_core.analytics.create import create_query_udfs
from agentc_core.catalog import CatalogChain
from agentc_core.catalog import CatalogDB
from agentc_core.catalog import CatalogMem
Expand All @@ -23,8 +24,10 @@
from agentc_core.defaults import DEFAULT_AUDIT_SCOPE
from agentc_core.defaults import DEFAULT_CATALOG_COLLECTION_NAME
from agentc_core.defaults import DEFAULT_CATALOG_NAME
from agentc_core.defaults import DEFAULT_EMBEDDING_MODEL
from agentc_core.defaults import DEFAULT_MAX_ERRS
from agentc_core.defaults import DEFAULT_META_COLLECTION_NAME
from agentc_core.defaults import DEFAULT_MODEL_CACHE_FOLDER
from agentc_core.defaults import DEFAULT_SCAN_DIRECTORY_OPTS
from agentc_core.learned.embedding import EmbeddingModel
from agentc_core.util.ddl import create_gsi_indexes
Expand Down Expand Up @@ -266,10 +269,35 @@ def init_db_auditor(ctx: Context, cluster: Cluster, keyspace_details: Keyspace):
else:
click.secho("Scope and collection for the auditor have been successfully created!\n", fg="green")

click.secho("Now creating the analytics UDFs for the auditor.", fg="yellow")
click.secho("Now creating query UDFs for the auditor.", fg="yellow")
try:
create_analytics_udfs(cluster, keyspace_details.bucket)
click.secho("All analytics UDFs for the auditor have been successfully created!\n", fg="green")
create_query_udfs(cluster, keyspace_details.bucket)
click.secho("All query UDFs for the auditor have been successfully created!\n", fg="green")
except CouchbaseException as e:
click.secho("Query UDFs could not be created.", fg="red")
logger.warning("Query UDFs could not be created: %s", e)

click.secho("Now creating the analytics views for the auditor.", fg="yellow")
try:
create_analytics_views(cluster, keyspace_details.bucket)
click.secho("All analytics views for the auditor have been successfully created!\n", fg="green")
except CouchbaseException as e:
click.secho("Analytics views could not be created.", fg="red")
logger.warning("Analytics views could not be created: %s", e)


def init_local_embedding_model():
# import only in this function to avoid large import times
import sentence_transformers

try:
sentence_transformers.SentenceTransformer(
os.getenv("AGENT_CATALOG_EMBEDDING_MODEL_NAME", DEFAULT_EMBEDDING_MODEL),
tokenizer_kwargs={"clean_up_tokenization_spaces": True},
cache_folder=DEFAULT_MODEL_CACHE_FOLDER,
local_files_only=False,
)
except Exception as e:
raise RuntimeError(
f"Unable to download model {os.getenv("AGENT_CATALOG_EMBEDDING_MODEL_NAME", DEFAULT_EMBEDDING_MODEL)}!!\n{e}"
) from None
13 changes: 9 additions & 4 deletions libs/agentc_cli/agentc_cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def click_main(ctx, catalog, activity, verbose, interactive):
)
@click.argument(
"type_metadata",
type=click.Choice(["catalog", "auditor", "all"], case_sensitive=False),
type=click.Choice(["model", "catalog", "auditor", "all"], case_sensitive=False),
)
@click.option(
"--bucket",
Expand All @@ -141,18 +141,23 @@ def click_main(ctx, catalog, activity, verbose, interactive):
show_default=False,
)
def init(ctx, catalog_type, type_metadata, bucket):
"""Initialize the necessary files/collections for local/database catalog."""
"""Initialize the necessary files/collections for local/database catalog or download sentence-transformer model required for embedding."""
ctx_obj: Context = ctx.obj

if not catalog_type:
catalog_type = ["local", "db"]

type_metadata = ["catalog", "auditor"] if type_metadata == "all" else [type_metadata]
type_metadata = ["model", "catalog", "auditor"] if type_metadata == "all" else [type_metadata]

connection_details_env = None
keyspace_details = None

if "db" in catalog_type:
if "model" in type_metadata:
raise ValueError(
"Model initialization can be used only with local keyword, db is not supported!!\nPlease execute separately 'agentc init local model' to download the model."
)

# Load all Couchbase connection related data from env
connection_details_env = CouchbaseConnect(
connection_url=os.getenv("AGENT_CATALOG_CONN_STRING"),
Expand Down Expand Up @@ -529,7 +534,7 @@ def find(
@click.option(
"-em",
"--embedding-model-name",
default=DEFAULT_EMBEDDING_MODEL,
default=os.getenv("AGENT_CATALOG_EMBEDDING_MODEL_NAME", DEFAULT_EMBEDDING_MODEL),
help="Name of the embedding model used when indexing source files into the local catalog.",
show_default=True,
)
Expand Down
23 changes: 20 additions & 3 deletions libs/agentc_cli/tests/test_click.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def test_index(tmp_path):
pathlib.Path(tool_folder / tool.parent.name).mkdir(exist_ok=True)
shutil.copy(tool, tool_folder / tool.parent.name / (uuid.uuid4().hex + tool.suffix))
shutil.copy(resources_folder / "_good_spec.json", tool_folder / "_good_spec.json")
runner.invoke(click_main, ["init", "local", "model"])
invocation = runner.invoke(click_main, ["index", str(tool_folder.absolute()), "--no-prompts"])

# We should see 11 files scanned and 12 tools indexed.
Expand Down Expand Up @@ -489,11 +490,25 @@ def test_init_local(tmp_path):

runner.invoke(click_main, ["init", "local", "catalog"])
files_present = os.listdir()
assert ".agent-catalog" in files_present and ".agent-activity" not in files_present
assert (
".agent-catalog" in files_present
and ".agent-activity" not in files_present
and ".model-cache" not in files_present
)

runner.invoke(click_main, ["init", "local", "auditor"])
files_present = os.listdir()
assert ".agent-catalog" in files_present and ".agent-activity" in files_present
assert (
".agent-catalog" in files_present
and ".agent-activity" in files_present
and ".model-cache" not in files_present
)

runner.invoke(click_main, ["init", "local", "model"])
files_present = os.listdir()
assert (
".agent-catalog" in files_present and ".agent-activity" in files_present and ".model-cache" in files_present
)


@pytest.mark.smoke
Expand All @@ -505,4 +520,6 @@ def test_init_local_all(tmp_path):

runner.invoke(click_main, ["init", "local", "all"])
files_present = os.listdir()
assert ".agent-catalog" in files_present and ".agent-activity" in files_present
assert (
".agent-catalog" in files_present and ".agent-activity" in files_present and ".model-cache" in files_present
)
20 changes: 19 additions & 1 deletion libs/agentc_core/agentc_core/analytics/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# pass


def create_analytics_udfs(cluster: couchbase.cluster.Cluster, bucket: str) -> None:
def create_analytics_views(cluster: couchbase.cluster.Cluster, bucket: str) -> None:
logger.debug("Creating analytics log scope.")
ddl_result = cluster.analytics_query(f"""
CREATE ANALYTICS SCOPE `{bucket}`.`{DEFAULT_AUDIT_SCOPE}`
Expand Down Expand Up @@ -63,3 +63,21 @@ def create_analytics_udfs(cluster: couchbase.cluster.Cluster, bucket: str) -> No
ddl_result = cluster.analytics_query(ddl_string)
for _ in ddl_result.rows():
pass


def create_query_udfs(cluster: couchbase.cluster.Cluster, bucket: str) -> None:
udfs_folder = pathlib.Path(__file__).parent / "udfs"
udfs_files = sorted(file for file in udfs_folder.iterdir())
for udf_file in udfs_files:
with open(udf_file, "r") as fp:
raw_udf_string = fp.read()
udf_string = (
raw_udf_string.replace("[BUCKET_NAME]", bucket)
.replace("[SCOPE_NAME]", DEFAULT_AUDIT_SCOPE)
.replace("[LOG_COLLECTION_NAME]", DEFAULT_AUDIT_COLLECTION)
)
logger.debug(f"Issuing the following statement: {udf_string}")

ddl_result = cluster.query(udf_string)
for _ in ddl_result.rows():
pass
10 changes: 5 additions & 5 deletions libs/agentc_core/agentc_core/analytics/udfs/all_sessions.sqlpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ WITH
PARTITION BY
_id
ORDER BY
rsli.timestamp ASC
STR_TO_MILLIS(rsli.timestamp) ASC
) AS rn
)
SELECT
Expand All @@ -29,7 +29,7 @@ WITH
SELECT VALUE
gi.rsl.timestamp
ORDER BY
gi.rsl.timestamp ASC
STR_TO_MILLIS(gi.rsl.timestamp) ASC
LIMIT 1
)[0] AS start_t,
vid,
Expand All @@ -50,19 +50,19 @@ LETTING
SELECT VALUE
gii
ORDER BY
gii.rsl.timestamp ASC
STR_TO_MILLIS(gii.rsl.timestamp) ASC
) AS gi
SELECT
gi.rsl.content AS content,
gi.rsl.timestamp AS timestamp,
gi.rsl.kind AS kind,
ROW_NUMBER() OVER(ORDER BY gi.rsl.timestamp ASC) AS msg_num
ROW_NUMBER() OVER(ORDER BY STR_TO_MILLIS(gi.rsl.timestamp) ASC) AS msg_num
),
vid = {
"identifier": ver.identifier,
"timestamp": ver.timestamp
}
ORDER BY
start_t DESC
STR_TO_MILLIS(start_t) DESC
)
};
17 changes: 10 additions & 7 deletions libs/agentc_core/agentc_core/analytics/udfs/exchanges.sqlpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-- Note: all_sessions.sqlpp should be run before this script.
CREATE OR REPLACE FUNCTION `[BUCKET_NAME]`.`[SCOPE_NAME]`.QueryLatestMessages() AS
CREATE OR REPLACE FUNCTION `[BUCKET_NAME]`.`[SCOPE_NAME]`.QueryLatestMessages() {
(
WITH LM AS
(
WITH msgs AS (
Expand All @@ -10,21 +11,21 @@ WITH msgs AS (
s.timestamp,
s.grouping,
s.session,
ROW_NUMBER() OVER (PARTITION BY s.grouping, s.session ORDER BY s.timestamp) AS row_num
ROW_NUMBER() OVER (PARTITION BY s.grouping, s.session ORDER BY STR_TO_MILLIS(s.timestamp)) AS row_num
FROM `[BUCKET_NAME]`.`[SCOPE_NAME]`.`[LOG_COLLECTION_NAME]` AS s
WHERE s.grouping IS NOT NULL
),
first_human AS (
SELECT msgsi.row_num
FROM msgs AS msgsi
WHERE msgsi.kind = "human"
ORDER BY msgsi.timestamp ASC
ORDER BY STR_TO_MILLIS(msgsi.timestamp) ASC
LIMIT 1
),
last_message AS (
SELECT msgsi.row_num
FROM msgs AS msgsi
ORDER BY msgsi.timestamp DESC
ORDER BY STR_TO_MILLIS(msgsi.timestamp) DESC
LIMIT 1
)
SELECT
Expand All @@ -33,14 +34,14 @@ SELECT
WHERE msgsi.kind = "human"
AND msgsi.grouping = g.grouping
AND msgsi.session = g.session
ORDER BY msgsi.timestamp ASC
ORDER BY STR_TO_MILLIS(msgsi.timestamp) ASC
LIMIT 1) AS question,

(SELECT RAW COALESCE(msgsi.content, msgsi.tool_calls)
FROM msgs AS msgsi
WHERE msgsi.grouping = g.grouping
AND msgsi.session = g.session
ORDER BY msgsi.timestamp DESC
ORDER BY STR_TO_MILLIS(msgsi.timestamp) DESC
LIMIT 1) AS answer,

(SELECT RAW COALESCE(msgsi.content, msgsi.tool_calls)
Expand All @@ -57,4 +58,6 @@ FROM (SELECT DISTINCT grouping, session FROM msgs) AS g
SELECT
(FROM LM e SELECT VALUE e.question ORDER BY e.row_num) AS question,
(FROM LM e SELECT VALUE e.answer ORDER BY e.row_num) AS answer,
(FROM LM e SELECT VALUE e.contexts ORDER BY e.row_num) AS contexts;
(FROM LM e SELECT VALUE e.contexts ORDER BY e.row_num) AS contexts
)
};
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CREATE OR REPLACE FUNCTION
SELECT VALUE
s.sid
ORDER BY
s.start_t DESC
STR_TO_MILLIS(s.start_t) DESC
LIMIT 1
)[0]
};
8 changes: 5 additions & 3 deletions libs/agentc_core/agentc_core/analytics/udfs/tool_calls.sqlpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-- Note: all_sessions.sqlpp should be run before this script.
CREATE OR REPLACE FUNCTION `[BUCKET_NAME]`.`[SCOPE_NAME]`.QueryToolCalls()
CREATE OR REPLACE FUNCTION `[BUCKET_NAME]`.`[SCOPE_NAME]`.QueryToolCalls(){
(
SELECT
s1.sid AS sid,
s1.vid AS vid,
Expand Down Expand Up @@ -30,5 +31,6 @@ LETTING
gi.m2.content.dump.kwargs.status AS tool_status
)
ORDER BY
vid.timestamp DESC
;
STR_TO_MILLIS(vid.timestamp) DESC
)
};
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ CREATE OR REPLACE FUNCTION
SELECT VALUE
gi.msg
ORDER BY
gi.msg.timestamp ASC
STR_TO_MILLIS(gi.msg.timestamp) ASC
)
SELECT
s.sid AS sid,
Expand Down
17 changes: 11 additions & 6 deletions libs/agentc_core/agentc_core/learned/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,17 @@ def _encode(_text: str) -> list[float]:
else:
import sentence_transformers

sentence_transformers_model = sentence_transformers.SentenceTransformer(
self.embedding_model_name,
tokenizer_kwargs={"clean_up_tokenization_spaces": True},
cache_folder=DEFAULT_MODEL_CACHE_FOLDER,
local_files_only=False,
)
try:
sentence_transformers_model = sentence_transformers.SentenceTransformer(
self.embedding_model_name,
tokenizer_kwargs={"clean_up_tokenization_spaces": True},
cache_folder=DEFAULT_MODEL_CACHE_FOLDER,
local_files_only=True,
)
except OSError:
raise ValueError(
f"Unable to find local embedding model {self.embedding_model_name}!!!\nPlease execute 'agentc init local model' to download the model."
) from None

def _encode(_text: str) -> list[float]:
return sentence_transformers_model.encode(_text, convert_to_tensor=False).tolist()
Expand Down
Loading