make the use of text index

yindaheng98 · May 16, 2024 · c2fc27e · c2fc27e
1 parent 6b14ba2
commit c2fc27e
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -165,6 +165,7 @@ python -m dblp_crawler -k video -k edge -p 27d5dc70280c8628f181a7f8881912025f808
 Without index, NEO4J query will be very very slow. So before you start, you should add some index:
 
 ```cql
+CREATE TEXT INDEX publication_title_hash_text_index FOR (p:Publication) ON (p.title_hash);
 CREATE INDEX publication_title_hash_index FOR (p:Publication) ON (p.title_hash);
 CREATE INDEX publication_dblp_key_index FOR (p:Publication) ON (p.dblp_key);
 CREATE INDEX publication_paper_id_index FOR (p:Publication) ON (p.paperId);

diff --git a/citation_crawler/init/neo4j.py b/citation_crawler/init/neo4j.py
@@ -47,7 +47,7 @@ def match_papers_keywords(tx, year, *arg_keywords):
             if not k:
                 continue
             ki += 1
-            k_and.append(f"toLower(p.title) CONTAINS $keyword{ki}")
+            k_and.append(f"p.title_hash CONTAINS $keyword{ki}")
             v_and[f"keyword{ki}"] = k
         k_or.append(f"({' and '.join(k_and)})")
         v_or = {**v_or, **v_and}

diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 
 setup(
     name='citation_crawler',
-    version='2.10',
+    version='2.10.1',
     author='yindaheng98',
     author_email='[email protected]',
     url='https://github.com/yindaheng98/citation-crawler',