From 747ee24320c8fedf819a98770c5320d55270e0f1 Mon Sep 17 00:00:00 2001 From: benhaotang Date: Tue, 4 Feb 2025 14:12:43 +0100 Subject: [PATCH 1/2] better handle search query list --- open_deep_researcher.ipynb | 79 ++++++++++++++++++------------- open_deep_researcher_gradio.ipynb | 59 +++++++++++++---------- 2 files changed, 80 insertions(+), 58 deletions(-) diff --git a/open_deep_researcher.ipynb b/open_deep_researcher.ipynb index 841e4c4..4117944 100644 --- a/open_deep_researcher.ipynb +++ b/open_deep_researcher.ipynb @@ -1,26 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyOe5BsaH0aplNCjknkFtnjg", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -28,11 +12,7 @@ }, { "cell_type": "code", - "source": [ - "!pip install nest_asyncio\n", - "import nest_asyncio\n", - "nest_asyncio.apply()" - ], + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -40,15 +20,19 @@ "id": "y7cTpP9rDZW-", "outputId": "5a443ad2-7a8d-4fef-f315-12108c28f1a2" }, - "execution_count": null, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Requirement already satisfied: nest_asyncio in /usr/local/lib/python3.11/dist-packages (1.6.0)\n" ] } + ], + "source": [ + "!pip install nest_asyncio\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" ] }, { @@ -62,6 +46,8 @@ "import asyncio\n", "import aiohttp\n", "import json\n", + "import re\n", + "import ast\n", "\n", "# =======================\n", "# Configuration Constants\n", @@ -267,14 +253,23 @@ " cleaned = response.strip()\n", " if cleaned == \"\":\n", " return \"\"\n", + " # First, try to directly evaluate the cleaned string\n", " try:\n", - " new_queries = eval(cleaned)\n", + " new_queries = ast.literal_eval(cleaned)\n", " if isinstance(new_queries, list):\n", " return new_queries\n", - " else:\n", - " print(\"LLM did not return a list for new search queries. Response:\", response)\n", - " return []\n", " except Exception as e:\n", + " # Direct evaluation failed; try to extract the list part from the string.\n", + " match = re.search(r'(\\[.*\\])', cleaned, re.DOTALL)\n", + " if match:\n", + " list_str = match.group(1)\n", + " try:\n", + " new_queries = ast.literal_eval(list_str)\n", + " if isinstance(new_queries, list):\n", + " return new_queries\n", + " except Exception as e_inner:\n", + " print(\"Error parsing extracted list:\", e_inner, \"\\nExtracted text:\", list_str)\n", + " return []\n", " print(\"Error parsing new search queries:\", e, \"\\nResponse:\", response)\n", " return []\n", " return []\n", @@ -405,12 +400,28 @@ }, { "cell_type": "code", - "source": [], + "execution_count": null, "metadata": { "id": "46Q5XpapDJZT" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [] } - ] -} \ No newline at end of file + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyOe5BsaH0aplNCjknkFtnjg", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/open_deep_researcher_gradio.ipynb b/open_deep_researcher_gradio.ipynb index 9d287bb..ab371ab 100644 --- a/open_deep_researcher_gradio.ipynb +++ b/open_deep_researcher_gradio.ipynb @@ -1,26 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyM6XDBP8oqAaLL0GMT0mBj+", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -43,6 +27,8 @@ "import aiohttp\n", "import gradio as gr\n", "import json\n", + "import re\n", + "import ast\n", "\n", "# ---------------------------\n", "# Configuration Constants\n", @@ -208,14 +194,23 @@ " cleaned = response.strip()\n", " if cleaned == \"\":\n", " return \"\"\n", + " # First, try to directly evaluate the cleaned string\n", " try:\n", - " new_queries = eval(cleaned)\n", + " new_queries = ast.literal_eval(cleaned)\n", " if isinstance(new_queries, list):\n", " return new_queries\n", - " else:\n", - " print(\"LLM did not return a list for new search queries. Response:\", response)\n", - " return []\n", " except Exception as e:\n", + " # Direct evaluation failed; try to extract the list part from the string.\n", + " match = re.search(r'(\\[.*\\])', cleaned, re.DOTALL)\n", + " if match:\n", + " list_str = match.group(1)\n", + " try:\n", + " new_queries = ast.literal_eval(list_str)\n", + " if isinstance(new_queries, list):\n", + " return new_queries\n", + " except Exception as e_inner:\n", + " print(\"Error parsing extracted list:\", e_inner, \"\\nExtracted text:\", list_str)\n", + " return []\n", " print(\"Error parsing new search queries:\", e, \"\\nResponse:\", response)\n", " return []\n", " return []\n", @@ -344,5 +339,21 @@ "iface.launch()" ] } - ] -} \ No newline at end of file + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyM6XDBP8oqAaLL0GMT0mBj+", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From f34f7eb9f455df39918906fdd1008f4586245b20 Mon Sep 17 00:00:00 2001 From: benhaotang Date: Wed, 5 Feb 2025 02:39:36 +0100 Subject: [PATCH 2/2] fix vscode formatting issue --- open_deep_researcher.ipynb | 103 +++++++++++++++++------------- open_deep_researcher_gradio.ipynb | 82 ++++++++++++++---------- 2 files changed, 104 insertions(+), 81 deletions(-) diff --git a/open_deep_researcher.ipynb b/open_deep_researcher.ipynb index 4117944..258ea7e 100644 --- a/open_deep_researcher.ipynb +++ b/open_deep_researcher.ipynb @@ -1,10 +1,26 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOe5BsaH0aplNCjknkFtnjg", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, "cells": [ { "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "view-in-github" + "id": "view-in-github", + "colab_type": "text" }, "source": [ "\"Open" @@ -12,7 +28,11 @@ }, { "cell_type": "code", - "execution_count": null, + "source": [ + "!pip install nest_asyncio\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -20,19 +40,15 @@ "id": "y7cTpP9rDZW-", "outputId": "5a443ad2-7a8d-4fef-f315-12108c28f1a2" }, + "execution_count": null, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Requirement already satisfied: nest_asyncio in /usr/local/lib/python3.11/dist-packages (1.6.0)\n" ] } - ], - "source": [ - "!pip install nest_asyncio\n", - "import nest_asyncio\n", - "nest_asyncio.apply()" ] }, { @@ -46,8 +62,8 @@ "import asyncio\n", "import aiohttp\n", "import json\n", - "import re\n", - "import ast\n", + "import re", + "import ast", "\n", "# =======================\n", "# Configuration Constants\n", @@ -117,16 +133,26 @@ " ]\n", " response = await call_openrouter_async(session, messages)\n", " if response:\n", + " cleaned = response.strip()\n", + " # Remove triple backticks and language specifier if present\n", + " cleaned = re.sub(r\"```(?:\\w+)?\\n(.*?)\\n```\", r\"\\1\", cleaned, flags=re.DOTALL).strip()\n", " try:\n", - " # Expect exactly a Python list (e.g., \"['query1', 'query2']\")\n", - " search_queries = eval(response)\n", - " if isinstance(search_queries, list):\n", - " return search_queries\n", - " else:\n", - " print(\"LLM did not return a list. Response:\", response)\n", - " return []\n", + " new_queries = eval(cleaned)\n", + " if isinstance(new_queries, list):\n", + " return new_queries\n", " except Exception as e:\n", - " print(\"Error parsing search queries:\", e, \"\\nResponse:\", response)\n", + " # Direct evaluation failed; try to extract the list part from the string\n", + " match = re.search(r\"(\\[.*\\])\", cleaned, re.DOTALL)\n", + " if match:\n", + " list_str = match.group(1)\n", + " try:\n", + " new_queries = eval(list_str)\n", + " if isinstance(new_queries, list):\n", + " return new_queries\n", + " except Exception as e_inner:\n", + " print(\"Error parsing extracted list:\", e_inner, \"\\nExtracted text:\", list_str)\n", + " return []\n", + " print(\"Error parsing new search queries or no search queries at all:\", e, \"\\nResponse:\", response)\n", " return []\n", " return []\n", "\n", @@ -253,24 +279,25 @@ " cleaned = response.strip()\n", " if cleaned == \"\":\n", " return \"\"\n", - " # First, try to directly evaluate the cleaned string\n", + " # Remove triple backticks and language specifier if present\n", + " cleaned = re.sub(r\"```(?:\\w+)?\\n(.*?)\\n```\", r\"\\1\", cleaned, flags=re.DOTALL).strip()\n", " try:\n", - " new_queries = ast.literal_eval(cleaned)\n", + " new_queries = eval(cleaned)\n", " if isinstance(new_queries, list):\n", " return new_queries\n", " except Exception as e:\n", - " # Direct evaluation failed; try to extract the list part from the string.\n", - " match = re.search(r'(\\[.*\\])', cleaned, re.DOTALL)\n", + " # Direct evaluation failed; try to extract the list part from the string\n", + " match = re.search(r\"(\\[.*\\])\", cleaned, re.DOTALL)\n", " if match:\n", " list_str = match.group(1)\n", " try:\n", - " new_queries = ast.literal_eval(list_str)\n", + " new_queries = eval(list_str)\n", " if isinstance(new_queries, list):\n", " return new_queries\n", " except Exception as e_inner:\n", " print(\"Error parsing extracted list:\", e_inner, \"\\nExtracted text:\", list_str)\n", " return []\n", - " print(\"Error parsing new search queries:\", e, \"\\nResponse:\", response)\n", + " print(\"Error parsing new search queries or no search queries at all:\", e, \"\\nResponse:\", response)\n", " return []\n", " return []\n", "\n", @@ -400,28 +427,12 @@ }, { "cell_type": "code", - "execution_count": null, + "source": [], "metadata": { "id": "46Q5XpapDJZT" }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "colab": { - "authorship_tag": "ABX9TyOe5BsaH0aplNCjknkFtnjg", - "include_colab_link": true, - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" + "execution_count": null, + "outputs": [] } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/open_deep_researcher_gradio.ipynb b/open_deep_researcher_gradio.ipynb index ab371ab..9c8be57 100644 --- a/open_deep_researcher_gradio.ipynb +++ b/open_deep_researcher_gradio.ipynb @@ -1,10 +1,26 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyM6XDBP8oqAaLL0GMT0mBj+", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, "cells": [ { "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "view-in-github" + "id": "view-in-github", + "colab_type": "text" }, "source": [ "\"Open" @@ -27,8 +43,8 @@ "import aiohttp\n", "import gradio as gr\n", "import json\n", - "import re\n", - "import ast\n", + "import re", + "import ast", "\n", "# ---------------------------\n", "# Configuration Constants\n", @@ -86,15 +102,26 @@ " ]\n", " response = await call_openrouter_async(session, messages)\n", " if response:\n", + " cleaned = response.strip()\n", + " # Remove triple backticks and language specifier if present\n", + " cleaned = re.sub(r\"```(?:\\w+)?\\n(.*?)\\n```\", r\"\\1\", cleaned, flags=re.DOTALL).strip()\n", " try:\n", - " search_queries = eval(response)\n", - " if isinstance(search_queries, list):\n", - " return search_queries\n", - " else:\n", - " print(\"LLM did not return a list. Response:\", response)\n", - " return []\n", + " new_queries = eval(cleaned)\n", + " if isinstance(new_queries, list):\n", + " return new_queries\n", " except Exception as e:\n", - " print(\"Error parsing search queries:\", e, \"\\nResponse:\", response)\n", + " # Direct evaluation failed; try to extract the list part from the string\n", + " match = re.search(r\"(\\[.*\\])\", cleaned, re.DOTALL)\n", + " if match:\n", + " list_str = match.group(1)\n", + " try:\n", + " new_queries = eval(list_str)\n", + " if isinstance(new_queries, list):\n", + " return new_queries\n", + " except Exception as e_inner:\n", + " print(\"Error parsing extracted list:\", e_inner, \"\\nExtracted text:\", list_str)\n", + " return []\n", + " print(\"Error parsing new search queries or no search queries at all:\", e, \"\\nResponse:\", response)\n", " return []\n", " return []\n", "\n", @@ -194,24 +221,25 @@ " cleaned = response.strip()\n", " if cleaned == \"\":\n", " return \"\"\n", - " # First, try to directly evaluate the cleaned string\n", + " # Remove triple backticks and language specifier if present\n", + " cleaned = re.sub(r\"```(?:\\w+)?\\n(.*?)\\n```\", r\"\\1\", cleaned, flags=re.DOTALL).strip()\n", " try:\n", - " new_queries = ast.literal_eval(cleaned)\n", + " new_queries = eval(cleaned)\n", " if isinstance(new_queries, list):\n", " return new_queries\n", " except Exception as e:\n", - " # Direct evaluation failed; try to extract the list part from the string.\n", - " match = re.search(r'(\\[.*\\])', cleaned, re.DOTALL)\n", + " # Direct evaluation failed; try to extract the list part from the string\n", + " match = re.search(r\"(\\[.*\\])\", cleaned, re.DOTALL)\n", " if match:\n", " list_str = match.group(1)\n", " try:\n", - " new_queries = ast.literal_eval(list_str)\n", + " new_queries = eval(list_str)\n", " if isinstance(new_queries, list):\n", " return new_queries\n", " except Exception as e_inner:\n", " print(\"Error parsing extracted list:\", e_inner, \"\\nExtracted text:\", list_str)\n", " return []\n", - " print(\"Error parsing new search queries:\", e, \"\\nResponse:\", response)\n", + " print(\"Error parsing new search queries or no search queries at all:\", e, \"\\nResponse:\", response)\n", " return []\n", " return []\n", "\n", @@ -339,21 +367,5 @@ "iface.launch()" ] } - ], - "metadata": { - "colab": { - "authorship_tag": "ABX9TyM6XDBP8oqAaLL0GMT0mBj+", - "include_colab_link": true, - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file