InternLM · tpoisonooo · Aug 27, 2024 · Aug 22, 2024 · Aug 27, 2024 · Aug 27, 2024
diff --git a/.github/scripts/doc_link_checker.py b/.github/scripts/doc_link_checker.py
@@ -58,7 +58,7 @@ def analyze_doc(home, path):
                         ref = ref[ref.find('#'):]
                     fullpath = os.path.join(home, ref)
                     if not os.path.exists(fullpath):
-                        raise ValueError(fullpath)
+                        # raise ValueError(fullpath)
                         problem_list.append(ref)
             else:
                 continue
@@ -79,6 +79,8 @@ def traverse(target):
             if filename.endswith('.md'):
                 path = os.path.join(home, filename)
                 if os.path.islink(path) is False:
+                    if 'copy_' in path:
+                        continue
                     analyze_doc(home, path)
 
 

diff --git a/README.md b/README.md
@@ -8,8 +8,11 @@ English | [简体中文](README_zh.md)
   <a href="resource/figures/wechat.jpg" target="_blank">
     <img alt="Wechat" src="https://img.shields.io/badge/wechat-robot%20inside-brightgreen?logo=wechat&logoColor=white" />
   </a>
-  <a href="https://pypi.org/project/huixiangdou" target="_blank">
-    <img alt="PyPI" src="https://img.shields.io/badge/PyPI-install-blue?logo=pypi&logoColor=white" />
+  <!-- <a href="https://huixiangdou.readthedocs.io/en/latest/" target="_blank">
+    <img alt="Readthedocs" src="https://img.shields.io/badge/readthedocs-chat%20with%20AI-brightgreen?logo=readthedocs&logoColor=white" />
+  </a> -->
+  <a href="https://huixiangdou.readthedocs.io/zh-cn/latest/" target="_blank">
+    <img alt="Readthedocs" src="https://img.shields.io/badge/readthedocs-black?logo=readthedocs&logoColor=white" />
   </a>
   <a href="https://youtu.be/ylXrT-Tei-Y" target="_blank">
     <img alt="YouTube" src="https://img.shields.io/badge/YouTube-black?logo=youtube&logoColor=red" />
@@ -35,7 +38,7 @@ HuixiangDou is a **professional knowledge assistant** based on LLM.
 Advantages:
 
 1. Design three-stage pipelines of preprocess, rejection and response
-    * `chat_in_group` copes with **group chat** scenario, answer user questions without message flooding, see [2401.08772](https://arxiv.org/abs/2401.08772), [2405.02817](https://arxiv.org/abs/2405.02817), [Hybrid Retrieval](./docs/knowledge_graph_en.md) and [Precision Report](./evaluation/)
+    * `chat_in_group` copes with **group chat** scenario, answer user questions without message flooding, see [2401.08772](https://arxiv.org/abs/2401.08772), [2405.02817](https://arxiv.org/abs/2405.02817), [Hybrid Retrieval](./docs/en/doc_knowledge_graph.md) and [Precision Report](./evaluation/)
     * `chat_with_repo` for **real-time streaming** chat
 2. No training required, with CPU-only, 2G, 10G, 20G and 80G configuration
 3. Offers a complete suite of Web, Android, and pipeline source code, industrial-grade and commercially viable
@@ -50,9 +53,9 @@ Our Web version has been released to [OpenXLab](https://openxlab.org.cn/apps/det
 
 - \[2024/08\] `chat_with_repo` [pipeline](./huixiangdou/service/parallel_pipeline.py) 👍
 - \[2024/07\] Image and text retrieval & Removal of `langchain` 👍
-- \[2024/07\] [Hybrid Knowledge Graph and Dense Retrieval](./docs/knowledge_graph_en.md) improve 1.7% F1 score 🎯
+- \[2024/07\] [Hybrid Knowledge Graph and Dense Retrieval](./docs/en/doc_knowledge_graph.md) improve 1.7% F1 score 🎯
 - \[2024/06\] [Evaluation of chunksize, splitter, and text2vec model](./evaluation) 🎯
-- \[2024/05\] [wkteam WeChat access](./docs/add_wechat_commercial_zh.md), parsing image & URL, support coreference resolution
+- \[2024/05\] [wkteam WeChat access](./docs/zh/doc_add_wechat_commercial.md), parsing image & URL, support coreference resolution
 - \[2024/05\] [SFT LLM on NLP task, F1 increased by 29%](./sft/) 🎯
   <table>
       <tr>
@@ -63,9 +66,9 @@ Our Web version has been released to [OpenXLab](https://openxlab.org.cn/apps/det
           <td><a href="https://arxiv.org/abs/2405.02817">arXiv</a></td>
       </tr>
   </table>
-- \[2024/04\] [RAG Annotation SFT Q&A Data and Examples](./docs/rag_annotate_sft_data_zh.md)
+- \[2024/04\] [RAG Annotation SFT Q&A Data and Examples](./docs/zh/doc_rag_annotate_sft_data.md)
 - \[2024/04\] Release [Web Front and Back End Service Source Code](./web) 👍
-- \[2024/03\] New [Personal WeChat Integration](./docs/add_wechat_accessibility_zh.md) and [**Prebuilt APK**](https://github.com/InternLM/HuixiangDou/releases/download/v0.1.0rc1/huixiangdou-20240508.apk) !
+- \[2024/03\] New [Personal WeChat Integration](./docs/zh/doc_add_wechat_accessibility.md) and [**Prebuilt APK**](https://github.com/InternLM/HuixiangDou/releases/download/v0.1.0rc1/huixiangdou-20240508.apk) !
 - \[2024/02\] \[Experimental Feature\] [WeChat Group](https://github.com/InternLM/HuixiangDou/blob/main/resource/figures/wechat.jpg) Integration of multimodal to achieve OCR
 
 # 📖 Support Status
@@ -117,7 +120,7 @@ Our Web version has been released to [OpenXLab](https://openxlab.org.cn/apps/det
 
 <td>
 
-- [Knowledge Graph](./docs/knowledge_graph_en.md)
+- [Knowledge Graph](./docs/en/doc_knowledge_graph.md)
 - [Internet Search](./huixiangdou/service/web_search.py)
 - [SourceGraph](https://sourcegraph.com)
 - Image and text (only markdown)
@@ -126,7 +129,7 @@ Our Web version has been released to [OpenXLab](https://openxlab.org.cn/apps/det
 
 <td>
 
-- WeChat([android](./docs/add_wechat_accessibility_zh.md)/[wkteam](./docs/add_wechat_commercial_zh.md))
+- WeChat([android](./docs/zh/doc_add_wechat_accessibility.md)/[wkteam](./docs/zh/doc_add_wechat_commercial.md))
 - Lark
 - [OpenXLab Web](https://openxlab.org.cn/apps/detail/tpoisonooo/huixiangdou-web)
 - [Gradio Demo](./huixiangdou/gradio.py)
@@ -244,10 +247,10 @@ Please update the `repodir` documents, [good_questions](./resource/good_question
 
 ## III. Integration into Feishu, WeChat group
 
-- [**One-way** sending to Feishu group](./docs/send_only_lark_group_zh.md)
-- [**Two-way** Feishu group receiving and sending, recalling](./docs/add_lark_group_zh.md)
-- [Personal WeChat Android access](./docs/add_wechat_accessibility_zh.md)
-- [Personal WeChat wkteam access](./docs/add_wechat_commercial_zh.md)
+- [**One-way** sending to Feishu group](./docs/zh/doc_send_only_lark_group.md)
+- [**Two-way** Feishu group receiving and sending, recalling](./docs/zh/doc_add_lark_group.md)
+- [Personal WeChat Android access](./docs/zh/doc_add_wechat_accessibility.md)
+- [Personal WeChat wkteam access](./docs/zh/doc_add_wechat_commercial.md)
 
 ## IV. Deploy web front and back end
 
@@ -328,7 +331,7 @@ Note:
 
 - You need to manually download [Visualized_m3.pth](https://huggingface.co/BAAI/bge-visualized/blob/main/Visualized_m3.pth) to the [bge-m3](https://huggingface.co/BAAI/bge-m3) directory
 - Install FlagEmbedding on main branch, we have made [bugfix](https://github.com/FlagOpen/FlagEmbedding/commit/3f84da0796d5badc3ad519870612f1f18ff0d1d3). [Here](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/visual/eva_clip/bpe_simple_vocab_16e6.txt.gz) you can download `bpe_simple_vocab_16e6.txt.gz` 
-- Install [requirments-multimodal.txt](./requirements-multimodal.txt)
+- Install [requirements/multimodal.txt](./requirements/multimodal.txt)
 
 Run gradio to test, see the image and text retrieval result [here](https://github.com/InternLM/HuixiangDou/pull/326).
 
@@ -348,11 +351,11 @@ The "HuiXiangDou" in the WeChat experience group has enabled all features:
 
 Please read the following topics:
 
-- [Hybrid knowledge graph and dense retrieval](./docs/knowledge_graph_en.md)
-- [Refer to config-advanced.ini configuration to improve effects](./docs/full_dev_en.md)
+- [Hybrid knowledge graph and dense retrieval](./docs/en/doc_knowledge_graph.md)
+- [Refer to config-advanced.ini configuration to improve effects](./docs/en/doc_full_dev.md)
 - [Group chat scenario anaphora resolution training](./sft)
-- [Use wkteam WeChat access, integrate images, public account parsing, and anaphora resolution](./docs/add_wechat_commercial_zh.md)
-- [Use rag.py to annotate SFT training data](./docs/rag_annotate_sft_data_zh.md)
+- [Use wkteam WeChat access, integrate images, public account parsing, and anaphora resolution](./docs/zh/doc_add_wechat_commercial.md)
+- [Use rag.py to annotate SFT training data](./docs/zh/doc_rag_annotate_sft_data.md)
 
 ## **Android Tools**
 

diff --git a/README_zh.md b/README_zh.md
@@ -7,8 +7,11 @@
   <a href="resource/figures/wechat.jpg" target="_blank">
     <img alt="Wechat" src="https://img.shields.io/badge/wechat-robot%20inside-brightgreen?logo=wechat&logoColor=white" />
   </a>
-  <a href="https://pypi.org/project/huixiangdou" target="_blank">
-    <img alt="PyPI" src="https://img.shields.io/badge/PyPI-install-blue?logo=pypi&logoColor=white" />
+  <!-- <a href="https://huixiangdou.readthedocs.io/zh-cn/latest/" target="_blank">
+    <img alt="Readthedocs" src="https://img.shields.io/badge/readthedocs-chat%20with%20AI-brightgreen?logo=readthedocs&logoColor=white" />
+  </a> -->
+  <a href="https://huixiangdou.readthedocs.io/zh-cn/latest/" target="_blank">
+    <img alt="Readthedocs" src="https://img.shields.io/badge/readthedocs-black?logo=readthedocs&logoColor=white" />
   </a>
   <a href="https://youtu.be/ylXrT-Tei-Y" target="_blank">
     <img alt="YouTube" src="https://img.shields.io/badge/YouTube-black?logo=youtube&logoColor=red" />
@@ -32,7 +35,7 @@
 茴香豆是一个基于 LLM 的专业知识助手，优势：
 
 1. 设计预处理、拒答、响应三阶段 pipeline：
-    * `chat_in_group` 群聊场景，解答问题时不会消息泛滥。见 [2401.08772](https://arxiv.org/abs/2401.08772)，[2405.02817](https://arxiv.org/abs/2405.02817)，[混合检索](./docs/knowledge_graph_zh.md)和[业务数据精度测试](./evaluation)
+    * `chat_in_group` 群聊场景，解答问题时不会消息泛滥。见 [2401.08772](https://arxiv.org/abs/2401.08772)，[2405.02817](https://arxiv.org/abs/2405.02817)，[混合检索](./docs/zh/doc_knowledge_graph.md)和[业务数据精度测试](./evaluation)
     * `chat_with_repo` 实时聊天场景，响应更快
 2. 无需训练适用各行业，提供 CPU-only、2G、10G、20G、80G 规格配置
 3. 提供一整套前后端 web、android、算法源码，工业级开源可商用
@@ -49,9 +52,9 @@ Web 版视频教程见 [BiliBili](https://www.bilibili.com/video/BV1S2421N7mn)
 
 - \[2024/08\] `chat_with_repo` [pipeline](./huixiangdou/service/parallel_pipeline.py) 
 - \[2024/07\] 图文检索 & 移除 `langchain` 👍
-- \[2024/07\] [混合知识图谱和稠密检索，F1 提升 1.7%](./docs/knowledge_graph_zh.md) 🎯
+- \[2024/07\] [混合知识图谱和稠密检索，F1 提升 1.7%](./docs/zh/doc_knowledge_graph.md) 🎯
 - \[2024/06\] [评估 chunksize，splitter 和 text2vec 模型](./evaluation) 🎯
-- \[2024/05\] [wkteam 微信接入](./docs/add_wechat_commercial_zh.md)，整合图片&公众号解析、集成指代消歧
+- \[2024/05\] [wkteam 微信接入](./docs/zh/doc_add_wechat_commercial.md)，整合图片&公众号解析、集成指代消歧
 - \[2024/05\] [SFT LLM 处理 NLP 任务，F1 提升 29%](./sft/) 🎯
   <table>
       <tr>
@@ -62,9 +65,9 @@ Web 版视频教程见 [BiliBili](https://www.bilibili.com/video/BV1S2421N7mn)
           <td><a href="https://arxiv.org/abs/2405.02817">arXiv</a></td>
       </tr>
   </table>
-- \[2024/04\] 实现 [RAG 标注 SFT 问答数据和样例](./docs/rag_annotate_sft_data_zh.md)
+- \[2024/04\] 实现 [RAG 标注 SFT 问答数据和样例](./docs/zh/doc_rag_annotate_sft_data.md)
 - \[2024/04\] 发布 [web 前后端服务源码](./web) 👍
-- \[2024/03\] 新的[个人微信集成方法](./docs/add_wechat_accessibility_zh.md)和[**预编译 apk**](https://github.com/InternLM/HuixiangDou/releases/download/v0.1.0rc1/huixiangdou-20240508.apk) !
+- \[2024/03\] 新的[个人微信集成方法](./docs/zh/doc_add_wechat_accessibility.md)和[**预编译 apk**](https://github.com/InternLM/HuixiangDou/releases/download/v0.1.0rc1/huixiangdou-20240508.apk) !
 - \[2024/02\] \[实验功能\] [微信群](https://github.com/InternLM/HuixiangDou/blob/main/resource/figures/wechat.jpg) 集成多模态以实现 OCR
 
 # 📖 支持情况
@@ -116,7 +119,7 @@ Web 版视频教程见 [BiliBili](https://www.bilibili.com/video/BV1S2421N7mn)
 
 <td>
 
-- [知识图谱](./docs/knowledge_graph_zh.md)
+- [知识图谱](./docs/zh/doc_knowledge_graph.md)
 - [联网搜索](./huixiangdou/service/web_search.py)
 - [SourceGraph](https://sourcegraph.com)
 - 图文混合（仅 markdown）
@@ -125,7 +128,7 @@ Web 版视频教程见 [BiliBili](https://www.bilibili.com/video/BV1S2421N7mn)
 
 <td>
 
-- 微信（[android](./docs/add_wechat_accessibility_zh.md)/[wkteam](./docs/add_wechat_commercial_zh.md)）
+- 微信（[android](./docs/zh/doc_add_wechat_accessibility.md)/[wkteam](./docs/zh/doc_add_wechat_commercial.md)）
 - 飞书
 - [OpenXLab Web](https://openxlab.org.cn/apps/detail/tpoisonooo/huixiangdou-web)
 - [Gradio Demo](./huixiangdou/gradio.py)
@@ -243,10 +246,10 @@ curl -X POST http://127.0.0.1:23333/huixiangdou_inference  -H "Content-Type: app
 
 ## 三、集成到飞书、微信群
 
-- [**单向**发送到飞书群](./docs/send_only_lark_group_zh.md)
-- [**双向**飞书群收发、撤回](./docs/add_lark_group_zh.md)
-- [个微 android 接入](./docs/add_wechat_accessibility_zh.md)
-- [个微 wkteam 接入](./docs/add_wechat_commercial_zh.md)
+- [**单向**发送到飞书群](./docs/zh/doc_send_only_lark_group.md)
+- [**双向**飞书群收发、撤回](./docs/zh/doc_add_lark_group.md)
+- [个微 android 接入](./docs/zh/doc_add_wechat_accessibility.md)
+- [个微 wkteam 接入](./docs/zh/doc_add_wechat_commercial.md)
 
 ## 四、WEB 前后端部署，零编程集成飞书微信
 
@@ -326,7 +329,7 @@ reranker_model_path = "BAAI/bge-reranker-v2-minicpm-layerwise"
 
 - 先下载 [bge-m3](https://huggingface.co/BAAI/bge-m3)，然后把 [Visualized_m3.pth](https://huggingface.co/BAAI/bge-visualized/blob/main/Visualized_m3.pth) 放进 `bge-m3` 目录
 - FlagEmbedding 需要安装 master 最新版，我们做了 [bugfix](https://github.com/FlagOpen/FlagEmbedding/commit/3f84da0796d5badc3ad519870612f1f18ff0d1d3)；[这里](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/visual/eva_clip/bpe_simple_vocab_16e6.txt.gz)可以下载 BGE 打包漏掉的 `bpe_simple_vocab_16e6.txt.gz`
-- 安装 [requirments-multimodal.txt](./requirements-multimodal.txt)
+- 安装 [requirements/multimodal.txt](./requirements/multimodal.txt)
 
 运行 gradio 测试，图文检索效果见[这里](https://github.com/InternLM/HuixiangDou/pull/326).
 
@@ -346,11 +349,11 @@ python3 tests/test_query_gradio.py
 
 请阅读以下话题：
 
-- [混合**知识图谱**和稠密检索提升精度](./docs/knowledge_graph_zh.md)
-- [参照 config-advanced.ini 配置提升效果](./docs/full_dev_zh.md)
+- [混合**知识图谱**和稠密检索提升精度](./docs/zh/doc_knowledge_graph.md)
+- [参照 config-advanced.ini 配置提升效果](./docs/zh/doc_full_dev.md)
 - [群聊场景指代消歧训练](./sft)
-- [使用 wkteam 微信接入，整合图片、公众号解析和指代消歧](./docs/add_wechat_commercial_zh.md)
-- [使用 rag.py 标注 SFT 训练数据](./docs/rag_annotate_sft_data_zh.md)
+- [使用 wkteam 微信接入，整合图片、公众号解析和指代消歧](./docs/zh/doc_add_wechat_commercial.md)
+- [使用 rag.py 标注 SFT 训练数据](./docs/zh/doc_rag_annotate_sft_data.md)
 
 ## **移动端**
 

diff --git a/config.ini b/config.ini
@@ -209,4 +209,4 @@ introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
 # github.com/tencent/ncnn contributors
 [frontend.wechat_wkteam.18356748488]
 name = "卷卷群"
-introduction = "ncnn contributors group"
+introduction = "ncnn contributors group"
diff --git a/docs/en/.readthedocs.yaml b/docs/en/.readthedocs.yaml
@@ -0,0 +1,17 @@
+version: 2
+
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.8"
+
+formats:
+    - epub
+
+sphinx:
+  configuration: docs/en/conf.py
+
+python:
+  install:
+    - requirements: requirements/docs.txt
diff --git a/docs/en/Makefile b/docs/en/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/en/_static/css/readthedocs.css b/docs/en/_static/css/readthedocs.css
@@ -0,0 +1,62 @@
+.header-logo {
+    background-image: url("../image/logo.svg");
+    background-size: 444px 93px;
+    height: 93px;
+    width: 444px;
+}
+
+@media screen and (min-width: 1100px) {
+  .header-logo {
+    top: -25px;
+  }
+}
+
+pre {
+    white-space: pre;
+}
+
+@media screen and (min-width: 2000px) {
+  .pytorch-content-left {
+    width: 1200px;
+    margin-left: 30px;
+  }
+  article.pytorch-article {
+    max-width: 1200px;
+  }
+  .pytorch-breadcrumbs-wrapper {
+    width: 1200px;
+  }
+  .pytorch-right-menu.scrolling-fixed {
+    position: fixed;
+    top: 45px;
+    left: 1580px;
+  }
+}
+
+
+article.pytorch-article section code {
+  padding: .2em .4em;
+  background-color: #f3f4f7;
+  border-radius: 5px;
+}
+
+/* Disable the change in tables */
+article.pytorch-article section table code {
+  padding: unset;
+  background-color: unset;
+  border-radius: unset;
+}
+
+table.autosummary td {
+  width: 50%
+}
+
+img.align-center {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+article.pytorch-article p.rubric {
+  font-weight: bold;
+}