From fbf3ff75c8c1a5246e40805c6afc4968232b7634 Mon Sep 17 00:00:00 2001 From: Rohan Moniz <60864468+rm03@users.noreply.github.com> Date: Mon, 20 Nov 2023 02:27:35 -0500 Subject: [PATCH] use uuid instead of slug --- dbutils/mongo.go | 15 --------------- dbutils/utils.py | 9 ++++----- server.py | 13 +++++++------ 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/dbutils/mongo.go b/dbutils/mongo.go index 9f39f16..5cbfb7b 100644 --- a/dbutils/mongo.go +++ b/dbutils/mongo.go @@ -8,7 +8,6 @@ import ( "log" "net/http" "os" - "strconv" "strings" "time" @@ -54,13 +53,6 @@ func worker(id int, jobs <-chan string, db *mongo.Database) { page.Articles[i].CTime = cTime.Unix() page.Articles[i].MTime = mTime.Unix() page.Articles[i].PTime = pTime.Unix() - cMonth := "" - if cTime.Month() < 10 { - cMonth = fmt.Sprintf("0%d", int(cTime.Month())) - } else { - cMonth = strconv.Itoa(int(cTime.Month())) - } - page.Articles[i].Slug = fmt.Sprintf("/%d/%s/%s", cTime.Year(), cMonth, page.Articles[i].Slug) articlesInterface = append(articlesInterface, page.Articles[i]) } if _, err = coll.InsertMany(context.TODO(), articlesInterface); err != nil { @@ -113,13 +105,6 @@ func UpdateArticles(db *mongo.Database) { page.Articles[i].CTime = cTime.Unix() page.Articles[i].MTime = mTime.Unix() page.Articles[i].PTime = pTime.Unix() - cMonth := "" - if cTime.Month() < 10 { - cMonth = fmt.Sprintf("0%d", int(cTime.Month())) - } else { - cMonth = strconv.Itoa(int(cTime.Month())) - } - page.Articles[i].Slug = fmt.Sprintf("/%d/%s/%s", cTime.Year(), cMonth, page.Articles[i].Slug) articlesInterface = append(articlesInterface, page.Articles[i]) } else { p = false diff --git a/dbutils/utils.py b/dbutils/utils.py index d95e2d4..1c7195d 100644 --- a/dbutils/utils.py +++ b/dbutils/utils.py @@ -12,7 +12,7 @@ def initialize_schema(): redis_client = redis.Redis().from_url(os.environ.get("REDIS_URI")) schema = ( - TextField("slug"), + TextField("uuid"), TextField("headline"), TextField("thumbnail_url"), NumericField("timestamp"), @@ -47,7 +47,7 @@ def refresh_recent_articles(cron=False): cursor = mongo_client.Cluster.articles.find( {}, projection={ - "slug": 1, + "uuid": 1, "headline": 1, "dominantmedia": 1, "content": 1, @@ -62,11 +62,10 @@ def refresh_recent_articles(cron=False): ) ] byte_embedding = np.array(embedding, dtype=np.float32).tobytes() - key = doc["slug"] pipe.hset( - name=f"article:{key}", + name=f"article:{doc['uuid']}", mapping={ - "slug": key, + "uuid": doc["uuid"], "headline": doc["headline"], "embedding": byte_embedding, "timestamp": datetime.strptime( diff --git a/server.py b/server.py index fe2fd82..c3c8ff5 100644 --- a/server.py +++ b/server.py @@ -30,7 +30,7 @@ async def startup(): @app.get("/recommend") @cache(expire=60 * 60) -def recommend(slug: str = ""): +def recommend(uuid: str = ""): dt = datetime.now() - timedelta( days=90 ) # only show articles published in the last 90 days @@ -39,15 +39,16 @@ def recommend(slug: str = ""): f"(@timestamp:[({dt.timestamp()} inf])=>[KNN {top_k + 1} @embedding $vec as score]" ) .sort_by("score") - .return_fields("slug", "headline", "thumbnail_url") + .return_fields("uuid", "headline", "thumbnail_url") .dialect(2) ) - vec = redis_client.hget(f"article:{slug}", "embedding") + key = f"article:{uuid}" + vec = redis_client.hget(key, "embedding") if vec: query_params = {"vec": vec} results = redis_client.ft("articles").search(query, query_params).docs - return results[1:] if results[0]["slug"] == slug else results[:-1] - doc = mongo_client.Cluster.articles.find_one({"slug": slug}) + return results[1:] if results[0]["id"] == key else results[:-1] + doc = mongo_client.Cluster.articles.find_one({"uuid": uuid}) if doc: embedding = [ float(val) @@ -58,7 +59,7 @@ def recommend(slug: str = ""): byte_embedding = np.array(embedding, dtype=np.float32).tobytes() query_params = {"vec": byte_embedding} results = redis_client.ft("articles").search(query, query_params).docs - return results[1:] if results[0]["slug"] == slug else results[:-1] + return results[1:] if results[0]["id"] == key else results[:-1] return None