diff --git a/examples/gemini/python/docs-agent/CONTRIBUTING.md b/examples/gemini/python/docs-agent/CONTRIBUTING.md deleted file mode 100644 index 8956a61b8..000000000 --- a/examples/gemini/python/docs-agent/CONTRIBUTING.md +++ /dev/null @@ -1,32 +0,0 @@ -# How to Contribute - -We would love to accept your patches and contributions to this project. - -## Before you begin - -### Sign our Contributor License Agreement - -Contributions to this project must be accompanied by a -[Contributor License Agreement](https://cla.developers.google.com/about) (CLA). -You (or your employer) retain the copyright to your contribution; this simply -gives us permission to use and redistribute your contributions as part of the -project. - -If you or your current employer have already signed the Google CLA (even if it -was for a different project), you probably don't need to do it again. - -Visit to see your current agreements or to -sign a new one. - -### Review our Community Guidelines - -This project follows [Google's Open Source Community -Guidelines](https://opensource.google/conduct/). - -## Contribution process - -### Code Reviews - -All submissions, including submissions by project members, require review. We -use [GitHub pull requests](https://docs.github.com/articles/about-pull-requests) -for this purpose. diff --git a/examples/gemini/python/docs-agent/LICENSE b/examples/gemini/python/docs-agent/LICENSE deleted file mode 100644 index 7a4a3ea24..000000000 --- a/examples/gemini/python/docs-agent/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/examples/gemini/python/docs-agent/README.md b/examples/gemini/python/docs-agent/README.md deleted file mode 100644 index d3d368eb9..000000000 --- a/examples/gemini/python/docs-agent/README.md +++ /dev/null @@ -1,432 +0,0 @@ -# Docs Agent - -The Docs Agent project explores applications and use cases that involve using a large -corpus of documentation as a knowledge source for AI language models. - -Docs Agent provides a set of easy-to-use self-service tools designed to give you and -your team access to Google's [Gemini API][genai-doc-site] for learning, experimentation, -and project deployment. - -## Overview - -Docs Agent apps use a technique known as Retrieval Augmented Generation (RAG), which -allows you to bring your own documents as knowledge sources to AI language models. -This approach helps the AI language models to generate relevant and accurate responses -that are grounded in the information that you provide and control. - -![Docs Agent architecture](docs/images/docs-agent-architecture-01.png) - -**Figure 1**. Docs Agent uses a vector database to retrieve context for augmenting prompts. - -Docs Agent apps are designed to be easily set up and configured in a Linux environment. -If you want to set up and launch the Docs Agent chat app on your host machine, check out -the [Set up Docs Agent][set-up-docs-agent] section below. - -### Summary of features - -The following list summarizes the tasks and features supported by Docs Agent: - -- **Process Markdown**: Split Markdown files into small plain text chunks. (See - [Docs Agent chunking process][chunking-process].) -- **Generate embeddings**: Use an embedding model to process text chunks into embeddings - and store them in a vector database. -- **Perform semantic search**: Compare embeddings in a vector database to retrieve - chunks that are most relevant to user questions. -- **Add context to a user question**: Add chunks returned from a semantic search as - [context][prompt-structure] to a prompt. -- **Fact-check responses**: This [experimental feature][fact-check-section] composes - a follow-up prompt and asks the language model to “fact-check” its own previous response. -- **Generate related questions**: In addition to answering a question, Docs Agent can - [suggest related questions][related-questions-section] based on the context of the - question. -- **Return URLs of source documents**: URLs are stored as chunks' metadata. This enables - Docs Agent to return the URLs of the source documents. -- **Collect feedback from users**: Docs Agent's web app has buttons that allow users - to [like responses][like-generated-responses] or [submit rewrites][submit-a-rewrite]. -- **Convert Google Docs, PDF, and Gmail into Markdown files**: This feature uses - [Apps Script][apps-script-readme] to convert Google Docs, PDF, and Gmail into - Markdown files, which then can be used as input datasets for Docs Agent. -- **Run benchmark test**: Docs Agent can [run benchmark test][benchmark-test] to measure - and compare the quality of text chunks, embeddings, and AI-generated responses. -- **Use the Semantic Retrieval API and AQA model**: Docs Agent can use Gemini's - [Semantic Retrieval API][semantic-api] to upload source documents to online corpora - and use the [AQA model][aqa-model] for answering questions. -- **Manage online corpora using the Docs Agent CLI**: The [Docs Agent CLI][cli-reference] - lets you create, update and delete online corpora using the Semantic Retrieval AI. -- **Prevent duplicate chunks and delete obsolete chunks in databases**: Docs Agent - uses [metadata in chunks][chunking-process] to prevent uploading duplicate chunks - and delete obsolete chunks that are no longer present in the source. -- **Run the Docs Agent CLI from anywhere in a terminal**: - [Set up the Docs Agent CLI][cli-readme] to make requests to the Gemini models - from anywhere in a terminal. -- **Support the Gemini 1.5 models**: Docs Agent works with the Gemini 1.5 models, - `gemini-1.5-pro-latest` and `text-embedding-004`. The new ["1.5"][new-15-mode] web app - mode uses all three Gemini models to their strength: AQA (`aqa`), Gemini 1.0 Pro - (`gemini-pro`), and Gemini 1.5 Pro (`gemini-1.5-pro-latest`). -- **Complete a task using the Docs Agent CLI**: The `agent runtask` command allows you - to run pre-defined chains of prompts, which are referred to as tasks. These tasks - simplify complex interactions by defining a series of steps that the Docs Agent will - execute. The tasks are defined in .yaml files stored in the [`tasks`][tasks-dir] - directory of your Docs Agent project. To run a task in this directory, for example: - - ```sh - agent runtask --task DraftReleaseNotes - ``` - -For more information on Docs Agent's architecture and features, -see the [Docs Agent concepts][docs-agent-concepts] page. - -![Docs Agent chat app](docs/images/docs-agent-chat-app-screenshot-01.png) - -**Figure 2**. A screenshot of the Docs Agent chat app launched using Flutter docs. - -## Set up Docs Agent - -**Note**: For instructions on the Docs Agent CLI setup, see the -[`README.md`][cli-readme] file in the `docs_agent/interfaces` directory. - -This section provides instructions on how to set up and launch the Docs Agent -chatbot web app on a Linux host machine. - -### 1. Prerequisites - -Setting up Docs Agent requires the following prerequisite items: - -- A Linux host machine - -- A [Google Cloud][google-cloud] project with the setup below: - - - An API key enabled with the Generative Language API (that is, - the [Gemini API][genai-doc-site]) - - - (**Optional**) [Authenticated OAuth client credentials][oauth-client] - stored on the host machine - -### 2 Update your host machine's environment - -Update your host machine's environment to prepare for the Docs Agent setup: - -1. Update the Linux package repositories on the host machine: - - ``` - sudo apt update - ``` - -2. Install the following dependencies: - - ``` - sudo apt install git pipx python3-venv - ``` - -3. Install `poetry`: - - ``` - pipx install poetry - ``` - -4. To add `$HOME/.local/bin` to your `PATH` variable, run the following - command: - - ``` - pipx ensurepath - ``` - -5. To set the Google API key as a environment variable, add the following - line to your `$HOME/.bashrc` file: - - ``` - export GOOGLE_API_KEY= - ``` - - Replace `` with the API key to the - [Gemini API][genai-doc-site]. - -6. Update your environment: - - ``` - source ~/.bashrc - ``` - -### 3. (Optional) Authorize credentials for Docs Agent - -**This step is needed only if you plan to use [Gemini's AQA model][aqa-model-concept].** - -Authorize Google Cloud credentials on your host machine: - -1. Download the `client_secret.json` file from your - [Google Cloud project][authorize-credentials]. - -2. Copy the `client_secret.json` file to your host machine. - -3. Install the Google Cloud SDK on your host machine: - - ``` - sudo apt install google-cloud-sdk - ``` - -4. To authenticate credentials, run the following command in the directory of - the host machine where the `client_secret.json` file is located: - - ``` - gcloud auth application-default login --client-id-file=client_secret.json --scopes='https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/generative-language.retriever' - ``` - - This command opens a browser and asks to log in using your Google account. - -5. Follow the instructions on the browser and click **Allow** to authenticate. - - This saves the authenticated credentials for Docs Agent - (`application_default_credentials.json`) in the `$HOME/.config/gcloud/` - directory of your host machine. - -### 4. Clone the Docs Agent project - -**Note**: This guide assumes that you're creating a new project directory -from your `$HOME` directory. - -Clone the Docs Agent project and install dependencies: - -1. Clone the following repo: - - ``` - git clone https://github.com/google/generative-ai-docs.git - ``` - -2. Go to the Docs Agent project directory: - - ``` - cd generative-ai-docs/examples/gemini/python/docs-agent - ``` - -3. Install dependencies using `poetry`: - - ``` - poetry install - ``` - -4. Enter the `poetry` shell environment: - - ``` - poetry shell - ``` - - **Important**: From this point, all `agent` command lines below need to - run in this `poetry shell` environment. - -### 5. Edit the Docs Agent configuration file - -This guide uses the [open source Flutter documents][flutter-docs-src] as an example dataset, -which are the source Markdown files for the [Flutter website][flutter-docs-site]. - -To complete this setup walkthrough, run the command below to download the open source -Flutter documents somewhere on your host machine (for instance, in your `$HOME` directory): - -``` -git clone --recurse-submodules https://github.com/flutter/website.git -``` - -Update settings in the Docs Agent project to use your custom dataset: - -1. Go to the Docs Agent project home directory, for example: - - ``` - cd $HOME/generative-ai-docs/examples/gemini/python/docs-agent - ``` - -2. Open the [`config.yaml`][config-yaml] file using a text editor, for example: - - ``` - nano config.yaml - ``` - -3. Edit the file to update the `product_name` field, for example: - - ``` - product_name: "Flutter" - ``` - - This product name is displayed on the Docs Agent chat app UI. - -4. Under the `inputs` field, define the following entries to specify the directories - that contain your source Markdown files. - - - `path`: The directory where the source Markdown files are stored. - - `url_prefix`: The prefix used to create URLs for the source Markdown files. - - **Important**: If URLs do not exist for your Markdown files, you still need to - provide a placeholder string in the `url_prefix` field. - - The example below shows the entries for the Flutter documents downloaded in the - `$HOME/website` directory): - - ``` - inputs: - - path: "/usr/local/home/user01/website/src/content" - url_prefix: "https://docs.flutter.dev" - ``` - - You can also provide multiple input directories (`path` and `url_prefix` sets) under - the `inputs` field, for example: - - ``` - inputs: - - path: "/usr/local/home/user01/website/src/content/ui" - url_prefix: "https://docs.flutter.dev/ui" - - path: "/usr/local/home/user01/website/src/content/tools" - url_prefix: "https://docs.flutter.dev/tools" - ``` - -6. If you want to use the `gemini-pro` model with a local vector database setup - (`chroma`), use the following settings: - - ``` - models: - - language_model: "models/gemini-pro" - ... - db_type: "chroma" - ``` - - (**Optional**) Or if you want to use the Gemini AQA model and populate - a corpus online via the [Semantic Retrieval API][semantic-api], use the - following settings (and update the `corpus_name` field): - - ``` - models: - - language_model: "models/aqa" - ... - db_type: "google_semantic_retriever" - db_configs: - ... - - db_type: "google_semantic_retriever" - corpus_name: "corpora/flutter-dev" - ``` - -7. Save the `config.yaml` file and exit the text editor. - - -### 6. Populate a new vector database - -The Docs Agent CLI can help you chunk documents, generate embeddings extract metadata, -and populate a vector database from Markdown files and more. - -**Note**: The `agent` commands below need to run within the `poetry shell` environment. - -To populate a new vector database: - -1. Go to the Docs Agent project home directory, for example: - - ``` - cd $HOME/generative-ai-docs/examples/gemini/python/docs-agent - ``` - -2. Process Markdown files into small text chunks: - - ``` - agent chunk - ``` - - The command takes documents under the `inputs` fields (specified in your - `config.yaml` file), splits the documents into small text chunk files, and - stores them in the `output_path` direcoty. - -3. Create and populate a new vector database: - - ``` - agent populate - ``` - - This command takes the plain text files in the `output_path` directory - and creates a new Chroma collection in the `vector_stores/` directory. - -### 7. Launch the Docs Agent chat app - -Docs Agent's Flask-based chat app lets users interact with the Docs Agent service through -a web browser. - -**Note**: The `agent chatbot` command needs to run within the `poetry shell` environment. - -To start the Docs Agent chat app: - -1. Go to the Docs Agent project home directory, for example: - - ``` - cd $HOME/generative-ai-docs/examples/gemini/python/docs-agent - ``` - -2. Launch the Docs Agent chat app: - - ``` - agent chatbot - ``` - - The Docs Agent chat app runs on port 5000 by default. If you have an application - already running on port 5000 on your host machine, you can use the `--port` flag to - specify a different port (for example, `agent chatbot --port 5050`). - - **Note**: If this `agent chatbot` command fails to run, check the `HOSTNAME` environment - variable on your host machine (for example, `echo $HOSTNAME`). If this variable is unset, - try setting it to `localhost` by running `export HOSTNAME=localhost` - - Once the app starts running, this command prints output similar to the following: - - ``` - $ agent chatbot - Launching the chatbot UI. - * Serving Flask app 'docs_agent.interfaces.chatbot' - * Debug mode: on - INFO:werkzeug:WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. - * Running on http://example.com:5000 - INFO:werkzeug:Press CTRL+C to quit - INFO:werkzeug: * Restarting with stat - Launching the chatbot UI. - WARNING:werkzeug: * Debugger is active! - INFO:werkzeug: * Debugger PIN: 391-260-142 - ``` - - Notice the line that shows the URL of this server (`http://example.com:5000` - in the example above). - -3. Open the URL above on a browser. - - Now, users can start asking questions related to your product. - -**The Docs Agent chat app is all set!** - -## Contributors - -Nick Van der Auwermeulen (`@nickvander`), Rundong Du (`@rundong08`), -Meggin Kearney (`@Meggin`), and Kyo Lee (`@kyolee415`). - - - -[contribute-to-docs-agent]: #contribute-to-docs-agent -[set-up-docs-agent]: #set-up-docs-agent -[preprocess-dir]: ./docs_agent/preprocess/ -[populate-vector-database]: ./docs_agent/preprocess/populate_vector_database.py -[fact-check-section]: ./docs/concepts.md#using-a-language-model-to-fact_check-its-own-response -[related-questions-section]: ./docs/concepts.md#using-a-language-model-to-suggest-related-questions -[submit-a-rewrite]: ./docs/concepts.md#enabling-users-to-submit-a-rewrite-of-a-generated-response -[like-generated-responses]: ./docs/concepts.md#enabling-users-to-like-generated-responses -[populate-db-steps]: #populate-a-new-vector-database-from-markdown-files -[start-the-app-steps]: #start-the-docs-agent-chat-app -[genai-doc-site]: https://ai.google.dev/docs/gemini_api_overview -[chroma-docs]: https://docs.trychroma.com/ -[flutter-docs-src]: https://github.com/flutter/website/tree/main/src -[flutter-docs-site]: https://docs.flutter.dev/ -[apps-script-readme]: ./apps_script/README.md -[scripts-readme]: ./docs_agent/preprocess/README.md -[config-yaml]: config.yaml -[benchmark-test]: ./docs_agent/benchmarks/README.md -[semantic-api]: https://ai.google.dev/docs/semantic_retriever -[aqa-model]: https://ai.google.dev/models/gemini#model_variations -[authorize-credentials]: https://ai.google.dev/docs/oauth_quickstart#authorize-credentials -[aqa-model-concept]: ./docs/concepts.md#using-the-semantic-retrieval-api-and-aqa-model -[prompt-structure]: ./docs/concepts.md#structure-of-a-prompt-to-a-language-model -[docs-agent-concepts]: ./docs/concepts.md -[google-cloud]: https://console.cloud.google.com/ -[oauth-client]: https://ai.google.dev/docs/oauth_quickstart#set-cloud -[cli-readme]: docs_agent/interfaces/README.md -[cli-reference]: docs/cli-reference.md -[chunking-process]: docs/chunking-process.md -[new-15-mode]: docs/config-reference.md#app_mode -[tasks-dir]: tasks/ diff --git a/examples/gemini/python/docs-agent/apps_script/README.md b/examples/gemini/python/docs-agent/apps_script/README.md deleted file mode 100644 index d333f36a8..000000000 --- a/examples/gemini/python/docs-agent/apps_script/README.md +++ /dev/null @@ -1,165 +0,0 @@ -# Convert Google Docs, PDF, and Gmail to Markdown files - -The collection of scripts in this `apps_script` directory allows you to convert -the contents of Google Drive folders and Gmail to Markdown files that are -compatible with Docs Agent. - -The steps are: - -1. [Prepare a Google Drive folder](#1_prepare-a-google-driver-folder). -2. [Mount Google Drive on your host machine](#2_mount-google-drive-on-your-host-machine). -3. [Create an Apps Script project](#3_create-an-apps-script-project). -4. [Edit and run main.gs on Apps Script](#4_edit-and-run-main_gs-on-apps-script). -5. [Update config.yaml to include the mounted directory](#5_update-config_yaml-to-include-the-mounted-directory). - -## 1. Prepare a Google Drive folder - -First, create a new folder in Google Drive and add your Google Docs (which will be -used as source documents to Docs Agent) to the folder. - -Do the following: - -1. Browser to https://drive.google.com/. -1. Click **+ New** on the top left corner. -1. Click **New folder**. -1. Name your new folder (for example, `my source Google Docs`). -1. To enter the newly created folder, double click the folder. -1. Add (or move) your source Google Docs to this new folder. - -## 2. Mount Google Drive on your host machine - -Mount your Google Drive to your host machine, so that it becomes easy to access the -folders in Google Drive from your host machine (later in step 5). - -There are a variety of methods and tools available online that enable this setup -(for example, see [`google-drive-ocamlfuse`][google-drive-ocamlfuse] for Linux machines). - -## 3. Create an Apps Script project - -Create a new Apps Script project and copy all the `.gs` scripts in this -`apps_script` directory to your new Apps Script project. - -Do the following: - -1. Browse to https://script.google.com/. -1. Click **New Project**. -1. At the top of the page, click **Untitled Project** and enter a meaningful - title (for example, `gDocs to Docs Agent`). -1. Click the **+** icon next to **Files**. -1. Click **Script**. -1. Name the new script to be one of the `.gs` files in this `apps_script` directory - (for example, `drive_to_markdown`). -1. Copy the content of the `.gs` file to the new script on your Apps Script project. -1. To save, click the "Save project" icon in the toolbar. -1. Repeat the steps until all the `.gs` files are copied to your Apps Script project. -1. Click the **+** icon next to **Services**. -1. Scroll down and click **Drive API**. -1. Select **v2**. -1. Click **Add**. - -You are now ready to edit the parameters on the `main.gs` file to select a folder -in Google Drive and export emails from Gmail. - -![Apps Script project](../docs/images/apps-script-screenshot-01.png) - -**Figure 1**. A screenshot of an example Apps Script project. - -## 4. Edit and run main.gs on Apps Script - -Edit the `main.gs` file on your Apps Script project to select which functions -(features) you want to run. - -Do the following: - -1. Browse to your project on https://script.google.com/. - -1. Open the `main.gs` file. - -1. In the `main` function, comment out any functions that you don't want to run - (see Figure 1): - - * `convertDriveFolderToMDForDocsAgent(folderInput)`: This function converts - the contents of a Google Drive folder to Markdown files (currently only Google - Docs and PDF). Make sure to specify a valid Google Drive folder in the `folderInput` - variable. Use the name of the folder created in **step 1** above, for example: - - ``` - var folderInput = "my source Google Docs" - function main() { - convertDriveFolderToMDForDocsAgent(folderInput); - //exportEmailsToMarkdown(SEARCH_QUERY, folderOutput); - } - ``` - - * `exportEmailsToMarkdown(SEARCH_QUERY, folderOutput)`: This function converts - the emails returned from a Gmail search query into Markdown files. Make sure to - specify a search query in the `SEARCH_QUERY` variable. You can test this search - query directly in the Gmail search bar. Also, specify an output directory for the - resulting emails. - -1. To save, click the "Save project" icon in the toolbar. - -1. Click the "Run" icon in the toolbar. - - When this script runs successfully, the Execution log panel prints output similar - to the following: - - ``` - 9:55:59 PM Notice Execution completed - ``` - - Also, the script creates a new folder in your Google Drive and stores the converted - Markdown files in this folder. The name of this new folder has `-output` as a postfix. - For example, with the folder name `my source Google Docs`, the name of the new folder - is `my source Google Docs-output`. - - With Google Drive mounted on your host machine in step 2, you can now directly access - this folder from the host machine, for example: - - ``` - user@hostname:~/DriveFileStream/My Drive/my source Google Docs-output$ ls - Copy_of_My_Google_Docs_To_Be_Converted.md - ``` - -## 5. Update config.yaml to include the mounted directory - -Once you have your Google Drive mounted on the host machine, you can now -specify one of its folders as an input source directory for Docs Agent. - -Do the following: - -1. In the Docs Agent project, open the [`config.yaml`][config-yaml] file - with a text editor. - -1. Specify your mounted Google Drive folder as an `input` group, for example: - - ``` - input: - - path: "/usr/local/home/user01/DriveFileStream/My Drive/my source Google Docs-output" - url_prefix: "docs.google.com" - ``` - - You **must** specify a value to the `url_prefix` field, such as `docs.google.com`. - Currently this value is used to generate hashes for the content. - -1. (**Optional**) Add an additional Google Drive folder for your exported emails, - for example: - - ``` - input: - - path: "/usr/local/home/user01/DriveFileStream/My Drive/my source Google Docs-output" - url_prefix: "docs.google.com" - - path: "/usr/local/home/user01/DriveFileStream/My Drive/psa-output" - url_prefix: "mail.google.com" - ``` - -1. Save the changes in the `config.yaml` file. - -You're all set with a new documentation source for Docs Agent. You can now follow the -instructions in the project's main [`README`][main-readme] file to launch the Docs Agent app. - - - -[config-yaml]: ../config.yaml -[main-readme]: ../README.md -[google-drive-ocamlfuse]: https://github.com/astrada/google-drive-ocamlfuse diff --git a/examples/gemini/python/docs-agent/apps_script/drive_to_markdown.gs b/examples/gemini/python/docs-agent/apps_script/drive_to_markdown.gs deleted file mode 100644 index b19d33d84..000000000 --- a/examples/gemini/python/docs-agent/apps_script/drive_to_markdown.gs +++ /dev/null @@ -1,240 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -function convertDriveFolderToMDForDocsAgent(folderName, outputFolderName=""){ - gdoc_count = 0; - pdf_count = 0; - new_file_count = 0; - updated_file_count = 0; - unchanged_file_count = 0; - gdoc_count, pdf_count, new_file_count, updated_file_count, unchanged_file_count = convertDriveFolder(folderName, outputFolderName=outputFolderName) - let conversion_count = pdf_count + gdoc_count - let file_count = new_file_count + updated_file_count + unchanged_file_count - Logger.log("Converted a total of: " + gdoc_count + " Google Doc files."); - Logger.log("Converted a total of: " + pdf_count + " PDF files."); - Logger.log("Converted a grand total of: " + conversion_count + " files."); - Logger.log("New files: " + new_file_count) - Logger.log("Updated a total of: " + updated_file_count + " files.") - Logger.log("Files that haven't changed: " + unchanged_file_count); - Logger.log("Input directory had a total of: " + file_count + " files.") -} - -function convertDriveFolder(folderName, outputFolderName="", indexFile="") { - - //Checks if input folder exists or exits - if(folderExistsInput(folderName)){ - var file_count = 0; - var folders = DriveApp.getFoldersByName(folderName); - if (outputFolderName=="") { - var folderOutput = folderName + "-output"; - var output_file_name = folderName + "-index"; - } - else { - var folderOutput = outputFolderName + "-output"; - var output_file_name = outputFolderName + "-index"; - } - Logger.log("Output directory: "+ folderOutput); - folderExistsOrCreate(folderOutput); - var folderOutputObj = DriveApp.getFoldersByName(folderOutput); - if (folderOutputObj.hasNext()){ - var folderOutputName = folderOutputObj.next(); - } - if (indexFile=="") { - var sheet = checkIndexOutputOrCreate(output_file_name, folderOutputName); - var timeZone = Session.getScriptTimeZone(); - var date = Utilities.formatDate(new Date(), timeZone, "MM-dd-yyyy HH:mm:ss z"); - sheet.appendRow(["Created: ", date]) - sheet.appendRow(["Name","ID", "URL", "Markdown ID", "Markdown Output", "Date Created", "Last Updated", "Type", "Folder", "MD5 hash", "Status"]); - } - else { - var sheet = indexFile - } - // var sheet_id = sheet.getId(); - var foldersnext = folders.next(); - var myfiles = foldersnext.getFiles(); - var new_file_count = 0; - var unchanged_file_count = 0; - var updated_file_count = 0; - var gdoc_count = 0; - var pdf_count = 0; - var start_data_row = 2; - var status = "New content"; - - while (myfiles.hasNext()) { - var myfile = myfiles.next(); - var ftype = myfile.getMimeType(); - // If this is a shorcut, retrieve the target file - if (ftype == "application/vnd.google-apps.shortcut") { - var fid = myfile.getTargetId(); - var myfile = DriveApp.getFileById(fid); - var ftype = myfile.getMimeType(); - } - else{ - var fid = myfile.getId(); - } - if (ftype == "application/vnd.google-apps.folder") { - var folder = DriveApp.getFolderById(fid); - Logger.log("Sub-directory: " + folder); - sub_gdoc_count = 0; - sub_pdf_count = 0; - sub_new_file_count = 0; - sub_updated_file_count = 0; - sub_unchanged_file_count = 0; - sub_gdoc_count, sub_pdf_count, sub_new_file_count, sub_updated_file_count, sub_unchanged_file_count = convertDriveFolder(folder, outputFolderName=foldersnext, indexFile=sheet); - gdoc_count += sub_gdoc_count; - pdf_count += sub_pdf_count; - new_file_count += sub_new_file_count; - updated_file_count += sub_updated_file_count; - unchanged_file_count += sub_unchanged_file_count; - continue; - } - var fname = sanitizeFileName(myfile.getName()); - var fdate = myfile.getLastUpdated(); - var furl = myfile.getUrl(); - var fcreate = myfile.getDateCreated(); - - //Function returns an array, assign each array value to seperate variables - var backup_results = returnBackupHash(sheet, "Backup", fid, start_data_row, 1, 9, 3); - if (backup_results != undefined && backup_results[0] != "no_results") { - var backup_fid = backup_results[0]; - var md5_backup = backup_results[1]; - var mdoutput_backup_id = backup_results[2]; - } - if (ftype == "application/vnd.google-apps.document") { - Logger.log("File: " + fname + " is a Google doc."); - let gdoc = DocumentApp.openById(fid); - let gdoc_blob = gdoc.getBody().getText(); - var md5_hash = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5,gdoc_blob, - Utilities.Charset.US_ASCII); - var hash_str = byteToStr(md5_hash); - if (backup_fid == fid && hash_str == md5_backup) { - Logger.log("File is unchanged. Skipping conversion."); - if (mdoutput_backup_id){ - var saved_file = DriveApp.getFileById(mdoutput_backup_id); - var saved_file_id = saved_file.getId(); - } - status = "Unchanged content"; - unchanged_file_count += 1; - var convert_file = false; - } - else if (backup_fid == fid && hash_str != md5_backup){ - status = "Updated content"; - updated_file_count += 1; - var convert_file = true; - } - else { - status = "New content"; - new_file_count += 1; - var convert_file = true; - } - if (convert_file){ - var frontmatter = "---" + "\n"; - frontmatter += "title: \"" + fname + "\"\n"; - frontmatter += "type: \"" + ftype + "\"\n"; - frontmatter += "id: \"" + fid + "\"\n"; - frontmatter += "created: \"" + fcreate + "\"\n"; - frontmatter += "updated: \"" + fdate + "\"\n"; - frontmatter += "URL: \"" + furl + "\"\n"; - frontmatter += "---" + "\n\n"; - var saved_file = convertDocumentToMarkdown(gdoc, folderOutputName, frontmatter); - var saved_file_id = saved_file.getId(); - Logger.log("Finished converting file: " + fname + " to markdown."); - Logger.log("Markdown file: " + saved_file); - status = "New content"; - gdoc_count += 1; - } - file_count += 1; - } - if (ftype == "application/pdf") { - // Converts PDFs - First to a temporary Google Doc and then use convertDocumentToMarkdown to convert to markdown with frontmatter - Logger.log("File: " + fname + " is a PDF."); - let pdfBlob = DriveApp.getFileById(fid).getBlob(); - let pdfblobText = pdfBlob.getDataAsString(); - var md5_hash = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5,pdfblobText, - Utilities.Charset.US_ASCII); - var hash_str = byteToStr(md5_hash); - if (backup_fid == fid && hash_str == md5_backup) { - Logger.log("File is unchanged. Skipping conversion."); - if (mdoutput_backup_id){ - var saved_file = DriveApp.getFileById(mdoutput_backup_id); - var saved_file_id = saved_file.getId(); - } - status = "Unchanged content"; - unchanged_file_count += 1; - var convert_file = false; - } - else if (backup_fid == fid && hash_str != md5_backup){ - status = "Updated content"; - updated_file_count += 1; - var convert_file = true; - } - else { - status = "New content"; - new_file_count += 1; - var convert_file = true; - } - if (convert_file){ - let temp_doc_name = pdfBlob.getName() + "-temp"; - let temp_doc = {title: temp_doc_name, mimeType: pdfBlob.getContentType(), parents: [{id: folderOutputName.getId()}]} - let options = {ocr: true}; - let output = Drive.Files.insert(temp_doc, pdfBlob, options); - let output_id = output.getId(); - let gdoc = DocumentApp.openById(output_id); - var frontmatter = "---" + "\n"; - frontmatter += "title: \"" + fname + "\"\n"; - frontmatter += "type: \"" + ftype + "\"\n"; - frontmatter += "id: \"" + fid + "\"\n"; - frontmatter += "created: \"" + fcreate + "\"\n"; - frontmatter += "updated: \"" + fdate + "\"\n"; - frontmatter += "URL: \"" + furl + "\"\n"; - frontmatter += "---" + "\n\n"; - var saved_file = convertDocumentToMarkdown(gdoc, folderOutputName, frontmatter); - var saved_file_id = saved_file.getId(); - Logger.log("Finished converting file: "+ fname + " to markdown."); - Logger.log("Markdown file: " + saved_file); - Logger.log("Clearing temporary gdoc" ); - let output_file = DriveApp.getFileById(output_id); - output_file.setTrashed(true); - status = "New content"; - pdf_count += 1; - } - file_count += 1; - } - let md_chip = createRichText(saved_file); - let original_chip = createRichText(myfile); - let folder_chip = createRichText(foldersnext); - metadata = [ - fname, - fid, - "original_chip", - saved_file_id, - "md_chip", - fcreate, - fdate, - ftype, - "folder_chip", - hash_str, - status, - ]; - sheet.appendRow(metadata); - // Return final row to inserRichText into correct rows - row_number = sheet.getLastRow(); - insertRichText(sheet, original_chip, "C", row_number); - insertRichText(sheet, md_chip, "E", row_number); - insertRichText(sheet, folder_chip, "I", row_number); - } - } - return gdoc_count, pdf_count, new_file_count, updated_file_count, unchanged_file_count -} diff --git a/examples/gemini/python/docs-agent/apps_script/exportmd.gs b/examples/gemini/python/docs-agent/apps_script/exportmd.gs deleted file mode 100644 index 40a4461ff..000000000 --- a/examples/gemini/python/docs-agent/apps_script/exportmd.gs +++ /dev/null @@ -1,1310 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Original script is from: -https://github.com/lmmx/gdocs2md-html/blob/master/exportmd.gs -and commit: 0d86cfa -Parsing from mangini/gdocs2md. -Modified by clearf to add files to the google directory structure. -Modified by lmmx to write Markdown, going back to HTML-incorporation. - -Usage: - NB: don't use on top-level doc (in root Drive folder) See comment in setupScript function. - Adding this script to your doc: - - Tools > Script Manager > New - - Select "Blank Project", then paste this code in and save. - Running the script: - - Tools > Script Manager - - Select "convertDocumentToMarkdown" function. - - Click Run button. - - Converted doc will be added to a "Markdown" folder in the source document's directories. - - Images will be added to a subfolder of the "Markdown" folder. -*/ - -function onInstall(e) { - onOpen(e); -} - -function onOpen() { - // Add a menu with some items, some separators, and a sub-menu. - setupScript(); -// In future: -// DocumentApp.getUi().createAddonMenu(); - DocumentApp.getUi().createMenu('Markdown') - .addItem('View as markdown', 'markdownPopup') - .addSubMenu(DocumentApp.getUi().createMenu('Export \u2192 markdown') - .addItem('Export to local file', 'convertSingleDoc') - .addItem('Export entire folder to local file', 'convertFolder') - .addItem('Customise markdown conversion', 'changeDefaults')) - .addSeparator() - .addSubMenu(DocumentApp.getUi().createMenu('Toggle comment visibility') - .addItem('Image source URLs', 'toggleImageSourceStatus') - .addItem('All comments', 'toggleCommentStatus')) - .addItem("Add comment", 'addCommentDummy') - .addToUi(); -} - -function changeDefaults() { - var ui = DocumentApp.getUi(); - var default_settings = '{ use your imagination... }'; - var greeting = ui.alert('This should be set up to display defaults from variables passed to getDocComments etc., e.g. something like:\n\nDefault settings are:' - + '\ncomments - not checking deleted comments.\nDocument - this document (alternatively specify a document ID).' - + '\n\nClick OK to edit these, or cancel.', - ui.ButtonSet.OK_CANCEL); - ui.alert("There's not really need for this yet, so this won't proceed, regardless of what you just pressed."); - return; - - // Future: - if (greeting == ui.Button.CANCEL) { - ui.alert("Alright, never mind!"); - return; - } - // otherwise user clicked OK - // user clicked OK, to proceed with editing these defaults. Ask case by case whether to edit - - var response = ui.prompt('What is x (default y)?', ui.ButtonSet.YES_NO_CANCEL); - - // Example code from docs at https://developers.google.com/apps-script/reference/base/button-set - // Process the user's response. - if (response.getSelectedButton() == ui.Button.YES) { - Logger.log('The user\'s name is %s.', response.getResponseText()); - } else if (response.getSelectedButton() == ui.Button.NO) { - Logger.log('The user didn\'t want to provide a name.'); - } else { - Logger.log('The user clicked the close button in the dialog\'s title bar.'); - } -} - -function setupScript() { - var script_properties = PropertiesService.getScriptProperties(); - script_properties.setProperty("user_email", Drive.About.get().user.emailAddress); - - // manual way to do the following: - // script_properties.setProperty("folder_id", "INSERT_FOLDER_ID_HERE"); - // script_properties.setProperty("document_id", "INSERT_FILE_ID_HERE"); - - var doc_id = DocumentApp.getActiveDocument().getId(); - script_properties.setProperty("document_id", doc_id); - var doc_parents = DriveApp.getFileById(doc_id).getParents(); - var folders = doc_parents; - while (folders.hasNext()) { - var folder = folders.next(); - var folder_id = folder.getId(); - } - script_properties.setProperty("folder_id", folder_id); - script_properties.setProperty("image_folder_prefix", ""); // add if modifying image location -} - -function addCommentDummy() { - // Dummy function to be switched during development for addComment - DocumentApp.getUi() - .alert('Cancelling comment entry', - "There's not currently a readable anchor for Google Docs - you need to write your own!" - - + "\n\nThe infrastructure for using such an anchoring schema is sketched out in" - + " the exportmd.gs script's addComment function, for an anchor defined in anchor_props" - - + "\n\nSee github.com/lmmx/devnotes/wiki/Custom-Google-Docs-comment-anchoring-schema", - DocumentApp.getUi().ButtonSet.OK - ); - return; -} - -function addComment() { - - var doc_id = PropertiesService.getScriptProperties().getProperty('document_id'); - var user_email = PropertiesService.getScriptProperties().getProperty('email'); -/* Drive.Comments.insert({content: "hello world", - context: { - type: 'text/html', - value: 'hinges' - } - }, document_id); */ - var revision_list = Drive.Revisions.list(doc_id).items; - var recent_revision_id = revision_list[revision_list.length - 1].id; - var anchor_props = { - revision_id: recent_revision_id, - starting_offset: '', - offset_length: '', - total_chars: '' - } - insertComment(doc_id, 'hinges', 'Hello world!', my_email, anchor_props); -} - -function insertComment(fileId, selected_text, content, user_email, anchor_props) { - - // NB Deal with handling missing args - - /* - anchor_props is an object with 4 properties: - - revision_id, - - starting_offset, - - offset_length, - - total_chars - */ - - var context = Drive.newCommentContext(); - context.value = selected_text; - context.type = 'text/html'; - var comment = Drive.newComment(); - comment.kind = 'drive#comment'; - var author = Drive.newUser(); - author.kind = 'drive#user'; - author.displayName = user_email; - author.isAuthenticatedUser = true; - comment.author = author; - comment.content = type; - comment.context = context; - comment.status = 'open'; - comment.anchor = "{'r':" - + anchor_props.revision_id - + ",'a':[{'txt':{'o':" - + anchor_props.starting_offset - + ",'l':" - + anchor_props.offset_length - + ",'ml':" - + anchor_props.total_chars - + "}}]}"; - comment.fileId = fileId; - Drive.Comments.insert(comment, fileId); -} - -function decodeScriptSwitches(optional_storage_name) { - var property_name = (typeof(optional_storage_name) == 'string') ? optional_storage_name : 'switch_settings'; - var script_properties = PropertiesService.getScriptProperties(); - return script_properties - .getProperty(property_name) - .replace(/{|}/g,'') // Get the statements out of brackets... - .replace(',', ';'); // ...swap the separator for a semi-colon... - // ...evaluate the stored object string as statements upon string return and voila, switches interpreted -} - - -function getDocComments(comment_list_settings) { - var possible_settings = ['images', 'include_deleted']; - - // switches are processed and set on a script-wide property called "comment_switches" - var property_name = 'comment_switches'; - switchHandler(comment_list_settings, possible_settings, property_name); - - var script_properties = PropertiesService.getScriptProperties(); - var comment_switches = decodeScriptSwitches(property_name); - eval(comment_switches); - - var document_id = script_properties.getProperty("document_id"); - var comments_list = Drive.Comments.list(document_id, - {includeDeleted: include_deleted, - maxResults: 100 }); // 0 to 100, default 20 - // See https://developers.google.com/drive/v2/reference/comments/list for all options - var comment_array = []; - var image_sources = []; - // To collect all comments' image URLs to match against inlineImage class elements LINK_URL attribute - - for (var i = 0; i < comments_list.items.length; i++) { - var comment = comments_list.items[i]; - var comment_text = comment.content; - var comment_status = comment.status; - /* - images is a generic parameter passed in as a switch to - return image URL-containing comments only. - - If the parameter is provided, it's no longer undefined. - */ - var img_url_regex = /(https?:\/\/.+?\.(png|gif|jpe?g))/; - var has_img_url = img_url_regex.test(comment_text); - - if (images && !has_img_url) continue; // no image URL, don't store comment - if (has_img_url) image_sources.push(RegExp.$1); - comment_array.push(comment); - } - script_properties.setProperty('image_source_URLs', image_sources) - return comment_array; -} - -function isValidAttrib(attribute) { // Sanity check function, called per element in array - - // Possible list of attributes to check against (leaving out unchanging ones like kind) - possible_attrs = [ - 'selfLink', - 'commentId', - 'createdDate', - 'modifiedDate', - 'author', - 'htmlContent', - 'content', - 'deleted', - 'status', - 'context', - 'anchor', - 'fileId', - 'fileTitle', - 'replies', - 'author' - ]; - - // Check if attribute(s) provided can be used to match/filter comments: - - if (typeof(attribute) == 'string' || typeof(attribute) == 'object') { - // Either a string/object (1-tuple) - - // Generated with Javascript, gist: https://gist.github.com/lmmx/451b301e1d78ed2c10b4 - - // Return false from the function if any of the attributes specified are not in the above list - - // If an object, the name is the key, otherwise it's just the string - if (attribute.constructor === Object) { - var att_keys = []; - for (var att_key in attribute) { - if (attribute.hasOwnProperty(att_key)) { - att_keys.push(att_key); - } - } - for (var n=0; n < att_keys.length; n++) { - var attribute_name = att_keys[n]; - var is_valid_attrib = (possible_attrs.indexOf(attribute_name) > -1); - - // The attribute needs to be one of the possible attributes listed above, match its given value(s), - // else returning false will throw an error from onAttribError when within getCommentAttributes - return is_valid_attrib; - } - } else if (typeof(attribute) == 'string') { - var attribute_name = attribute; - var is_valid_attrib = (possible_attrs.indexOf(attribute_name) > -1); - return is_valid_attrib; - // Otherwise is a valid (string) attribute - } else if (attribute.constructor === Array) { - return false; // Again, if within getCommentAttributes this will cause an error - shouldn't pass an array - } else { - // Wouldn't expect this to happen, so give a custom error message - Logger.log('Unknown type (assumed impossible) passed to isValidAttrib: ', attribute, attribute.constructor); - throw new TypeError('Unknown passed to isValidAttrib - this should be receiving 1-tuples only, see logs for details.'); - } - } else return false; // Neither string/object / array of strings &/or objects - not a valid attribute -} - -function getCommentAttributes(attributes, comment_list_settings) { - - // A filter function built on Comments.list, for a given list of attributes - // Objects' values are ignored here, only their property titles are used to filter comments. - - - /* - - attributes: array of attributes to filter/match on - - comment_list_settings: (optional) object with properties corresponding to switches in getDocComments - - This function outputs an array of the same length as the comment list, containing - values for all fields matched/filtered on. - */ - - - /* - * All possible comment attributes are listed at: - * https://developers.google.com/drive/v2/reference/comments#properties - */ - - // Firstly, describe the type in a message to be thrown in case of TypeError: - - var attrib_def_message = "'attributes' should be a string (the attribute to get for each comment), " - + "an object (a key-value pair for attribute and desired value), " - + "or an array of objects (each with key-value pairs)"; - - function onAttribError(message) { - Logger.log(message); - throw new TypeError(message); - } - - // If (optional) comment_list_settings isn't set, make a getDocComments call with switches left blank. - if (typeof(comment_list_settings) == 'undefined') var comment_list_settings = {}; - if (typeof(attributes) == 'undefined') onAttribError(attrib_def_message); // no variables specified - - if (isValidAttrib(attributes)) { // This will be true if there's only one attribute, not provided in an array - - /* - Make a 1-tuple (array of 1) from either an object or a string, - i.e. a single attribute, with or without a defined value respectively. - */ - - var attributes = Array(attributes); - - } else if (attributes.constructor === Array) { - - // Check each item in the array is a valid attribute specification - for (var l = 0; l < attributes.length; l++) { - if (! isValidAttrib(attributes[l]) ) { - onAttribError('Error in attribute ' - + (l+1) + ' of ' + attributes.length - + '\n\n' + + attrib_def_message); - } - } - - } else { // Neither attribute nor array of attributes - throw new TypeError(attrib_def_message); - } - - // Attributes now holds an array of string and/or objects specifying a comment match and/or filter query - - var comment_list = getDocComments(comment_list_settings); - var comment_attrib_lists = []; - for (var i in comment_list) { - var comment = comment_list[i]; - var comment_attrib_list = []; - for (var j in attributes) { - var comment_attribute = comment_list[i][attributes[j]]; - comment_attrib_list.push(comment_attribute); - } - comment_attrib_lists.push(comment_attrib_list); - } - // The array comment_attrib_lists is now full of the requested attributes, - // of length equal to that of attributes - return comment_attrib_lists; -} - -// Example function to use getCommentAttributes: - -function filterComments(attributes, comment_list_settings) { - var comment_attributes = getCommentAttributes(attributes, comment_list_settings); - var m = attribs.indexOf('commentId') // no need to keep track of commentID array position - comm_attribs.map(function(attrib_pair) { - if (attrib_pair[1]); - }) -} - -function toggleCommentStatus(comment_switches){ - // Technically just image URL-containing comments, not sources just yet - var attribs = ['commentId', 'status']; - var comm_attribs = getCommentAttributes(attribs, comment_switches); - var rearrangement = []; - comm_attribs.map( - function(attrib_pair) { // for every comment return with the images_only / images: true comments.list setting, - switch (attrib_pair[1]){ // check the status of each - case 'open': - rearrangement.push([attrib_pair[0],'resolved']); - break; - case 'resolved': - rearrangement.push([attrib_pair[0],'open']); - break; - } - } - ); - var script_properties = PropertiesService.getScriptProperties(); - var doc_id = script_properties.getProperty("document_id"); - rearrangement.map( - function(new_attrib_pair) { // for every comment ID with flipped status - Drive.Comments.patch('{"status": "' - + new_attrib_pair[1] - + '"}', doc_id, new_attrib_pair[0]) - } - ); - return; -} - -function toggleImageSourceStatus(){ - toggleCommentStatus({images: true}); -} - -function flipResolved() { - // Flip the status of resolved comments to open, and open comments to resolved (respectful = true) - // I.e. make resolved URL-containing comments visible, without losing track of normal comments' status - - // To force all comments' statuses to switch between resolved and open en masse set respectful to false - - var switch_settings = {}; - switch_settings.respectful = true; - switch_settings.images_only = false; // If true, only switch status of comments with an image URL - switch_settings.switch_deleted_comments = false; // If true, also switch status of deleted comments - - var comments_list = getDocComments( - { images: switch_settings.images_only, - include_deleted: switch_settings.switch_deleted_comments }); - - // Note: these parameters are unnecessary if both false (in their absence assumed false) - // but included for ease of later reuse - - if (switch_settings.respectful) { - // flip between - } else { - // flip all based on status of first in list - } -} - -function markdownPopup() { - var css_style = ''; - - // The above was written with js since doesn't work: - // https://gist.github.com/lmmx/ec084fc351528395f2bb - - var mdstring = stringMiddleMan(); - - var htmlstring = - '' - + css_style - + '
'; - - var html5 = HtmlService.createHtmlOutput(htmlstring) - .setSandboxMode(HtmlService.SandboxMode.IFRAME) - .setWidth(800) - .setHeight(500); - - DocumentApp.getUi() - .showModalDialog(html5, 'Markdown output'); -} - -function stringMiddleMan() { - var returned_string; - convertSingleDoc({"return_string": true}); // for some reason needs the scope to be already set... - // could probably rework to use mdstring rather than returned_string, cut out middle man function - return this.returned_string; -} - -function convertSingleDoc(optional_switches) { - var script_properties = PropertiesService.getScriptProperties(); - // renew comments list on every export - var doc_comments = getDocComments(); - var image_urls = getDocComments({images: true}); // NB assumed false - any value will do - script_properties.setProperty("comments", doc_comments); - script_properties.setProperty("image_srcs", image_urls); - var folder_id = script_properties.getProperty("folder_id"); - var document_id = script_properties.getProperty("document_id"); - var source_folder = DriveApp.getFolderById(folder_id); - var markdown_folders = source_folder.getFoldersByName("Markdown"); - - var markdown_folder; - if (markdown_folders.hasNext()) { - markdown_folder = markdown_folders.next(); - } else { - // Create a Markdown folder if it doesn't exist. - markdown_folder = source_folder.createFolder("Markdown") - } - - convertDocumentToMarkdown(DocumentApp.openById(document_id), markdown_folder, optional_switches); -} - -function convertFolder() { - var script_properties = PropertiesService.getScriptProperties(); - var folder_id = script_properties.getProperty("folder_id"); - var source_folder = DriveApp.getFolderById(folder_id); - var markdown_folders = source_folder.getFoldersByName("Markdown"); - - - var markdown_folder; - if (markdown_folders.hasNext()) { - markdown_folder = markdown_folders.next(); - } else { - // Create a Markdown folder if it doesn't exist. - markdown_folder = source_folder.createFolder("Markdown"); - } - - // Only try to convert google docs files. - var gdoc_files = source_folder.getFilesByType("application/vnd.google-apps.document"); - - // For every file in this directory - while(gdoc_files.hasNext()) { - var gdoc_file = gdoc_files.next() - - var filename = gdoc_file.getName(); - var md_files = markdown_folder.getFilesByName(filename + ".md"); - var update_file = false; - - if (md_files.hasNext()) { - var md_file = md_files.next(); - - if (md_files.hasNext()){ // There are multiple markdown files; delete and rerun - update_file = true; - } else if (md_file.getLastUpdated() < gdoc_file.getLastUpdated()) { - update_file = true; - } - } else { - // There is no folder and the conversion needs to be rerun - update_file = true; - } - - if (update_file) { - convertDocumentToMarkdown(DocumentApp.openById(gdoc_file.getId()), markdown_folder); - } - } -} - -function switchHandler(input_switches, potential_switches, optional_storage_name) { - - // Firstly, if no input switches were set, make an empty input object - if (typeof(input_switches) == 'undefined') input_switches = {}; - - // Use optional storage name if it's defined (must be a string), else use default variable name "switch_settings" - var property_name = (typeof(optional_storage_name) == 'string') ? optional_storage_name : 'switch_settings'; - - // Make a blank object to be populated and stored as the script-wide property named after property_name - var switch_settings = {}; - - for (var i in potential_switches) { - var potential_switch = potential_switches[i]; - - // If each switch has been set (in input_switches), evaluate it, else assume it's switched off (false): - - if (input_switches.propertyIsEnumerable(potential_switch)) { - - // Evaluates a string representing a statement which sets switch_settings properties from input_switches - // e.g. "switch_settings.images = true" when input_switches = {images: true} - - eval('switch_settings.' + potential_switch + " = " + input_switches[potential_switch]); - - } else { - - // Alternatively, the evaluated statement sets anything absent from the input_switches object as false - // e.g. "switch_settings.images = false" when input_switches = {} and potential_switches = ['images'] - - eval('switch_settings.' + potential_switch + " = false"); - } - } - - PropertiesService.getScriptProperties().setProperty(property_name, switch_settings); - - /* - Looks bad but more sensible than repeatedly checking if arg undefined. - - Sets every variable named in the potential_switches array to false if - it wasn't passed into the input_switches object, otherwise evaluates. - - Any arguments not passed in are false, but so are any explicitly passed in as false: - all parameters are therefore Boolean until otherwise specified. - */ - -} - -function convertDocumentToMarkdown(document, destination_folder, frontmatter_input, optional_switches) { - // if returning a string, force_save_images will make the script continue - experimental - var possible_switches = ['return_string', 'force_save_images']; - var property_name = 'conversion_switches'; - switchHandler(optional_switches, possible_switches, property_name); - - // TODO switch off image storage if force_save_images is true - not necessary for normal behaviour - var script_properties = PropertiesService.getScriptProperties(); - var comment_switches = decodeScriptSwitches(property_name); - eval(comment_switches); - - var image_prefix = script_properties.getProperty("image_folder_prefix"); - var numChildren = document.getActiveSection().getNumChildren(); - if (frontmatter_input != "") { - var text = frontmatter_input; - } - else { - var text = "" - } - var md_filename = sanitizeFileName(document.getName()) + ".md"; - var image_foldername = document.getName()+"_images"; - var inSrc = false; - var inClass = false; - var globalImageCounter = 0; - var globalListCounters = {}; - // edbacher: added a variable for indent in src
 block. Let style sheet do margin.
-  var srcIndent = "";
-
-  var postHasImages = false;
-
-  var files = [];
-
-  // Walk through all the child elements of the doc.
-  for (var i = 0; i < numChildren; i++) {
-    var child = document.getActiveSection().getChild(i);
-    var result = processParagraph(i, child, inSrc, globalImageCounter, globalListCounters, image_prefix + image_foldername);
-    globalImageCounter += (result && result.images) ? result.images.length : 0;
-    if (result!==null) {
-      if (result.sourceGlossary==="start" && !inSrc) {
-        inSrc=true;
-        text+="
\n";
-      } else if (result.sourceGlossary==="end" && inSrc) {
-        inSrc=false;
-        text+="
\n\n"; - } else if (result.sourceFigCap==="start" && !inSrc) { - inSrc=true; - text+="
\n";
-      } else if (result.sourceFigCap==="end" && inSrc) {
-        inSrc=false;
-        text+="
\n\n"; - } else if (result.source==="start" && !inSrc) { - inSrc=true; - text+="
\n";
-      } else if (result.source==="end" && inSrc) {
-        inSrc=false;
-        text+="
\n\n"; - } else if (result.inClass==="start" && !inClass) { - inClass=true; - text+="
\n";
-      } else if (result.inClass==="end" && inClass) {
-        inClass=false;
-        text+="
\n\n"; - } else if (inClass) { - text+=result.text+"\n\n"; - } else if (inSrc) { - text+=(srcIndent+escapeHTML(result.text)+"\n"); - } else if (result.text && result.text.length>0) { - text+=result.text+"\n\n"; - } - - if (result.images && result.images.length>0) { - for (var j=0; j/g, '>'); -} - -function standardQMarks(text) { - return text.replace(/\u2018|\u8216|\u2019|\u8217/g,"'").replace(/\u201c|\u8220|\u201d|\u8221/g, '"') -} - -// Process each child element (not just paragraphs). -function processParagraph(index, element, inSrc, imageCounter, listCounters, image_path) { - // First, check for things that require no processing. - if (element.getType() === DocumentApp.ElementType.UNSUPPORTED) { - return null; - } - if (element.getNumChildren()==0) { - return null; - } - // Skip on TOC. - if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) { - return {"text": "[[TOC]]"}; - } - - // Set up for real results. - var result = {}; - var pOut = ""; - var textElements = []; - var imagePrefix = "image_"; - - // Handle Table elements. Pretty simple-minded now, but works for simple tables. - // Note that Markdown does not process within block-level HTML, so it probably - // doesn't make sense to add markup within tables. - if (element.getType() === DocumentApp.ElementType.TABLE) { - textElements.push("\n"); - var nCols = element.getChild(0).getNumCells(); - for (var i = 0; i < element.getNumChildren(); i++) { - textElements.push(" \n"); - // process this row - for (var j = 0; j < nCols; j++) { - textElements.push(" \n"); - } - textElements.push(" \n"); - } - textElements.push("
" + element.getChild(i).getChild(j).getText() + "
\n"); - } - - // Need to handle this element type, return null for now - if (element.getType() === DocumentApp.ElementType.CODE_SNIPPET) { - return null - } - - // Process various types (ElementType). - for (var i = 0; i < element.getNumChildren(); i++) { - var t = element.getChild(i).getType(); - - if (t === DocumentApp.ElementType.TABLE_ROW) { - // do nothing: already handled TABLE_ROW - } else if (t === DocumentApp.ElementType.TEXT) { - var txt = element.getChild(i); - pOut += txt.getText(); - textElements.push(txt); - } else if (t === DocumentApp.ElementType.INLINE_IMAGE) { - var imglink = element.getChild(i).getLinkUrl(); - result.images = result.images || []; - var blob = element.getChild(i).getBlob() - var contentType = blob.getContentType(); - var extension = ""; - if (/\/png$/.test(contentType)) { - extension = ".png"; - } else if (/\/gif$/.test(contentType)) { - extension = ".gif"; - } else if (/\/jpe?g$/.test(contentType)) { - extension = ".jpg"; - } else { - throw "Unsupported image type: "+contentType; - } - - var name = imagePrefix + imageCounter + extension; - blob.setName(name); - - imageCounter++; - if (!return_string || force_save_images) { - textElements.push('![](' + image_path + '/' + name + ')'); - } else { - textElements.push('![](' + imglink + ')'); - } - //result.images.push( { - // "bytes": blob.getBytes(), - // "type": contentType, - // "name": name}); - - result.images.push({ "blob" : blob } ) - - // Need to fix this case TODO - } else if (t === DocumentApp.ElementType.INLINE_DRAWING) { - - imageCounter++; - if (!return_string || force_save_images) { - textElements.push('![](' + "drawing" + '/' + " name" + ')'); - } else { - textElements.push('![](' + "drawing" + ')'); - } - //result.images.push( { - // "bytes": blob.getBytes(), - // "type": contentType, - // "name": name}); - - // result.images.push({ "blob" : blob } ) - - } - else if (t === DocumentApp.ElementType.PAGE_BREAK) { - // ignore - } else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) { - textElements.push('* * *\n'); - } else if (t === DocumentApp.ElementType.FOOTNOTE) { - textElements.push(' ('+element.getChild(i).getFootnoteContents().getText()+')'); - // Fixes for new elements - } else if (t === DocumentApp.ElementType.EQUATION) { - textElements.push(element.getChild(i).getText()); - } else if (t === DocumentApp.ElementType.DATE) { - textElements.push(' ('+element.getChild(i)+')'); - } else if (t === DocumentApp.ElementType.RICH_LINK) { - textElements.push(' ('+element.getChild(i).getUrl()+')'); - } else if (t === DocumentApp.ElementType.PERSON) { - textElements.push(element.getChild(i).getName() + ', '); - } else if (t === DocumentApp.ElementType.UNSUPPORTED) { - textElements.push(' '); - } else { - Logger.log("Paragraph "+index+" of type "+element.getType()+" has an unsupported child: " - +t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index); - } - } - - if (textElements.length==0) { - // Isn't result empty now? - return result; - } - -// Fix for unrecognized command getIndentFirstLine - var ind_f = 0; - var ind_s = 0; - var ind_e = 0; - if (t === DocumentApp.ElementType.PARAGRAPH) { - - var ind_f = element.getIndentFirstLine(); - var ind_s = element.getIndentStart(); - var ind_e = element.getIndentEnd(); - } - var i_fse = [ind_f,ind_s,ind_e]; - var indents = {}; - for (indt=0;indt 0) indents[indname] = eval(indname); - // lazy test, null (no indent) is not greater than zero, but becomes set if indent 'undone' - } - var inIndent = (Object.keys(indents).length > 0); - - // evb: Add glossary and figure caption too. (And abbreviations: gloss and fig-cap.) - // process source code block: - if (/^\s*---\s+gloss\s*$/.test(pOut) || /^\s*---\s+source glossary\s*$/.test(pOut)) { - result.sourceGlossary = "start"; - } else if (/^\s*---\s+fig-cap\s*$/.test(pOut) || /^\s*---\s+source fig-cap\s*$/.test(pOut)) { - result.sourceFigCap = "start"; - } else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) { - result.source = "start"; - } else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) { - result.inClass = "start"; - result.className = RegExp.$1.replace(/\./g,' '); - } else if (/^\s*---\s*$/.test(pOut)) { - result.source = "end"; - result.sourceGlossary = "end"; - result.sourceFigCap = "end"; - result.inClass = "end"; - } else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) { - result.text = ''; - } else { - - prefix = findPrefix(inSrc, element, listCounters); - - var pOut = ""; - for (var i=0; i): - if (gt === DocumentApp.GlyphType.BULLET - || gt === DocumentApp.GlyphType.HOLLOW_BULLET - || gt === DocumentApp.GlyphType.SQUARE_BULLET) { - prefix += "* "; - } else { - // Ordered list (
    ): - var key = listItem.getListId() + '.' + listItem.getNestingLevel(); - var counter = listCounters[key] || 0; - counter++; - listCounters[key] = counter; - prefix += counter+". "; - } - } - } - return prefix; -} - -function processTextElement(inSrc, txt) { - if (typeof(txt) === 'string') { - return txt; - } - - var pOut = txt.getText(); - if (! txt.getTextAttributeIndices) { - return pOut; - } - -// Logger.log("Initial String: " + pOut) - - // CRC introducing reformatted_txt to let us apply rational formatting that we can actually parse - var reformatted_txt = txt.copy(); - reformatted_txt.deleteText(0,pOut.length-1); - reformatted_txt = reformatted_txt.setText(pOut); - - var attrs = txt.getTextAttributeIndices(); - var lastOff = pOut.length; - // We will run through this loop multiple times for the things we care about. - // Font - // URL - // Then for alignment - // Then for bold - // Then for italic. - - // FONTs - var lastOff = pOut.length; // loop goes backwards, so this holds - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var font=txt.getFontFamily(off) - if (font) { - while (i>=1 && txt.getFontFamily(attrs[i-1])==font) { - // detect fonts that are in multiple pieces because of errors on formatting: - i-=1; - off=attrs[i]; - } - reformatted_txt.setFontFamily(off, lastOff-1, font); - } - lastOff=off; - } - - // URL - // XXX TODO actually convert to URL text here. - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var url=txt.getLinkUrl(off); - if (url) { - while (i>=1 && txt.getLinkUrl(attrs[i-1]) == url) { - // detect urls that are in multiple pieces because of errors on formatting: - i-=1; - off=attrs[i]; - } - reformatted_txt.setLinkUrl(off, lastOff-1, url); - } - lastOff=off; - } - - // alignment - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var alignment=txt.getTextAlignment(off); - if (alignment) { // - while (i>=1 && txt.getTextAlignment(attrs[i-1]) == alignment) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setTextAlignment(off, lastOff-1, alignment); - } - lastOff=off; - } - - // strike - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var strike=txt.isStrikethrough(off); - if (strike) { - while (i>=1 && txt.isStrikethrough(attrs[i-1])) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setStrikethrough(off, lastOff-1, strike); - } - lastOff=off; - } - - // bold - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var bold=txt.isBold(off); - if (bold) { - while (i>=1 && txt.isBold(attrs[i-1])) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setBold(off, lastOff-1, bold); - } - lastOff=off; - } - - // italics - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var italic=txt.isItalic(off); - if (italic) { - while (i>=1 && txt.isItalic(attrs[i-1])) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setItalic(off, lastOff-1, italic); - } - lastOff=off; - } - - - var mOut=""; // Modified out string - var harmonized_attrs = reformatted_txt.getTextAttributeIndices(); - reformatted_txt.getTextAttributeIndices(); // @lmmx: is this a typo...? - pOut = reformatted_txt.getText(); - - - // Markdown is farily picky about how it will let you intersperse spaces around words and strong/italics chars. This regex (hopefully) clears this up - // Match any number of \*, followed by spaces/word boundaries against anything that is not the \*, followed by boundaries, spaces and * again. - // Test case at http://jsfiddle.net/ovqLv0s9/2/ - - var reAlignStars = /(\*+)(\s*\b)([^\*]+)(\b\s*)(\*+)/g; - - var lastOff=pOut.length; - for (var i=harmonized_attrs.length-1; i>=0; i--) { - var off=harmonized_attrs[i]; - - var raw_text = pOut.substring(off, lastOff) - - var d1 = ""; // @lmmx: build up a modifier prefix - var d2 = ""; // @lmmx: ...and suffix - - var end_font; - - var mark_bold = false; - var mark_italic = false; - var mark_code = false; - var mark_sup = false; - var mark_sub = false; - var mark_strike = false; - - // The end of the text block is a special case. - if (lastOff == pOut.length) { - end_font = reformatted_txt.getFontFamily(lastOff - 1) - if (end_font) { - if (!inSrc && end_font===end_font.COURIER_NEW) { - mark_code = true; - } - } - if (reformatted_txt.isBold(lastOff -1)) { - mark_bold = true; - } - if (reformatted_txt.isItalic(lastOff - 1)) { - // edbacher: changed this to handle bold italic properly. - mark_italic = true; - } - if (reformatted_txt.isStrikethrough(lastOff - 1)) { - mark_strike = true; - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } else { - end_font = reformatted_txt.getFontFamily(lastOff -1 ) - if (end_font) { - if (!inSrc && end_font===end_font.COURIER_NEW && reformatted_txt.getFontFamily(lastOff) != end_font) { - mark_code=true; - } - } - if (reformatted_txt.isBold(lastOff - 1) && !reformatted_txt.isBold(lastOff) ) { - mark_bold=true; - } - if (reformatted_txt.isStrikethrough(lastOff - 1) && !reformatted_txt.isStrikethrough(lastOff)) { - mark_strike=true; - } - if (reformatted_txt.isItalic(lastOff - 1) && !reformatted_txt.isItalic(lastOff)) { - mark_italic=true; - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUPERSCRIPT) { - if (reformatted_txt.getTextAlignment(lastOff)!==DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUBSCRIPT) { - if (reformatted_txt.getTextAlignment(lastOff)!==DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } - } - - if (mark_code) { - d2 = '`'; // shouldn't these go last? or will it interfere w/ reAlignStars? - } - if (mark_bold) { - d2 = "**" + d2; - } - if (mark_italic) { - d2 = "*" + d2; - } - if (mark_strike) { - d2 = "" + d2; - } - if (mark_sup) { - d2 = '' + d2; - } - if (mark_sub) { - d2 = '' + d2; - } - - mark_bold = mark_italic = mark_code = mark_sup = mark_sub = mark_strike = false; - - var font=reformatted_txt.getFontFamily(off); - if (off == 0) { - if (font) { - if (!inSrc && font===font.COURIER_NEW) { - mark_code = true; - } - } - if (reformatted_txt.isBold(off)) { - mark_bold = true; - } - if (reformatted_txt.isItalic(off)) { - mark_italic = true; - } - if (reformatted_txt.isStrikethrough(off)) { - mark_strike = true; - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } else { - if (font) { - if (!inSrc && font===font.COURIER_NEW && reformatted_txt.getFontFamily(off - 1) != font) { - mark_code=true; - } - } - if (reformatted_txt.isBold(off) && !reformatted_txt.isBold(off -1) ) { - mark_bold=true; - } - if (reformatted_txt.isItalic(off) && !reformatted_txt.isItalic(off - 1)) { - mark_italic=true; - } - if (reformatted_txt.isStrikethrough(off) && !reformatted_txt.isStrikethrough(off - 1)) { - mark_strike=true; - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUPERSCRIPT) { - if (reformatted_txt.getTextAlignment(off - 1)!==DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUBSCRIPT) { - if (reformatted_txt.getTextAlignment(off - 1)!==DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } - } - - - if (mark_code) { - d1 = '`'; - } - - if (mark_bold) { - d1 = d1 + "**"; - } - - if (mark_italic) { - d1 = d1 + "*"; - } - - if (mark_sup) { - d1 = d1 + ''; - } - - if (mark_sub) { - d1 = d1 + ''; - } - - if (mark_strike) { - d1 = d1 + ''; - } - - var url=reformatted_txt.getLinkUrl(off); - if (url) { - mOut = d1 + '['+ raw_text +']('+url+')' + d2 + mOut; - } else { - var new_text = d1 + raw_text + d2; - new_text = new_text.replace(reAlignStars, "$2$1$3$5$4"); - mOut = new_text + mOut; - } - - lastOff=off; -// Logger.log("Modified String: " + mOut) - } - - mOut = pOut.substring(0, off) + mOut; - return mOut; -} \ No newline at end of file diff --git a/examples/gemini/python/docs-agent/apps_script/gmail_to_markdown.gs b/examples/gemini/python/docs-agent/apps_script/gmail_to_markdown.gs deleted file mode 100644 index 3263ef100..000000000 --- a/examples/gemini/python/docs-agent/apps_script/gmail_to_markdown.gs +++ /dev/null @@ -1,137 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -function exportEmailsToMarkdown(search, folderName) { - //Checks if input folder exists or exits - if(folderExistsOrCreate(folderName)){ - var output_file_name = folderName + "-index"; - var folderOutputObj = DriveApp.getFoldersByName(folderName); - if (folderOutputObj.hasNext()){ - var folderOutputName = folderOutputObj.next(); - } - var sheet = checkIndexOutputOrCreate(output_file_name, folderOutputName); - console.log(`Searching for: "${search}"`); - var start = 0; - var max = 500; - var threads = GmailApp.search(search, start, max); - var threadMax = threads.length; - if (threads!=null){ - console.log(threadMax + " threads found."); - } else { - console.warn("No threads found with the search criteria"); - return; - } - let timeZone = Session.getScriptTimeZone(); - let created_date = Utilities.formatDate(new Date(), timeZone, "MM-dd-yyyy HH:mm:ss z"); - sheet.appendRow(["Created: ", created_date]) - sheet.appendRow(["Date", "From", "Subject", "To", "Markdown ID", "Markdown URL", "Full date", "MD5 hash", "Status"]); - var start_data_row = 2; - var status = "New content"; - var newEmails = 0; - var unchangedEmails = 0; - for (var threadCount in threads) { - var msgs = threads[threadCount].getMessages(); - Logger.log("Processing thread " + threadCount + " of " + threadMax); - for (var msgCount in msgs) { - var msg = msgs[msgCount]; - var subject = msg.getSubject().replace(/"/g, "\\\"");; - // Removes replies and forwards - Can mostly be noise. - if(!subject.toLowerCase().includes("re:") && - !subject.toLowerCase().includes("fwd:") && - !subject.toLowerCase().includes("forwarded message")){ - // Values to get and store messages - var date = msg.getDate(); - let from_author = msg.getFrom().replace(/"/g, "\\\""); - var hash_content = from_author + date + subject; - let sanitized_subject = sanitizeString(subject); - let date_format = Utilities.formatDate(date, "PST", "MM-dd-yyyy"); - let to = msg.getTo(); - let to_array = to.split(", "); - for (i in to_array) { - to_array[i] = "\"" + to_array[i].replace(/^" "/, "").replace(/"/g, "\\\"") + "\""; - } - let md5_hash = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5,hash_content, - Utilities.Charset.US_ASCII); - let hash_str = byteToStr(md5_hash); - //Function returns an array, assign each array value to seperate variables. For emails, only need to retrieve - // backup markdown ids - var backup_results = returnBackupHash(sheet, "Backup", hash_str, start_data_row, 7, 4, 5); - if (backup_results != undefined && backup_results[0] != "no_results") { - Logger.log("Email is already in markdown format. Skipping conversion."); - var status = "Unchanged content"; - var markdown_id = backup_results[1]; - if (markdown_id){ - var md_file = DriveApp.getFileById(markdown_id); - } - unchangedEmails += 1; - } - else { - var status = "New content"; - let message = msg.getPlainBody(); - let filename = sanitizeFileName(date_format + subject + ".md"); - // Initialize blank text since file will get updated with URL - let email_md = ""; - // Add count here for emails - newEmails += 1; - Logger.log("Email number: " + newEmails + "| Saving email to: " + filename); - var body = "# " + subject + "\n"; - // Cleans the reply part of emails - body += regexToCleanCharsMD(sanitizeBody(message.replace(/^>/g,""))) + "\n"; - var destinationFolder = DriveApp.getFoldersByName(folderOutputName).next(); - // Initialize blank file to retrieve URL which is then added to the frontmatter - var destinationFile = destinationFolder.createFile(filename, email_md , MimeType.PLAIN_TEXT); - // Create metadata for the object - var markdown_id = destinationFile.getId(); - var md_file = DriveApp.getFileById(markdown_id); - let md_url = md_file.getUrl(); - let frontmatter = "---" + "\n"; - frontmatter += "title: \"" + sanitized_subject + "\"\n"; - frontmatter += "type: \"" + "email" + "\"\n"; - frontmatter += "URL: \"" + md_url + "\"\n"; - frontmatter += "created: \"" + date + "\"\n"; - frontmatter += "from: \"" + from_author + "\"\n"; - frontmatter += "to: \[" + to_array + "\]\n"; - frontmatter += "---" + "\n\n"; - email_md = frontmatter + body; - var encoded = Utilities.base64Encode(email_md); - var byteDataArray = Utilities.base64Decode(encoded); - var textAsBlob = Utilities.newBlob(byteDataArray); - Drive.Files.update(null,markdown_id, textAsBlob); - } - let md_chip = createRichText(md_file); - metadata = [ - date_format, - from_author, - sanitized_subject, - to, - markdown_id, - "md_chip", - date, - hash_str, - status, - ]; - sheet.appendRow(metadata); - var emailTotal = newEmails + unchangedEmails; - let row_number = emailTotal + start_data_row; - insertRichText(sheet, md_chip, "F", row_number); - } - } - } - Logger.log("Saved a total of " + newEmails + " new emails."); - Logger.log("There is a total of " + unchangedEmails + " unchanged emails."); - Logger.log("Grand total of " + emailTotal + " emails."); - } -} \ No newline at end of file diff --git a/examples/gemini/python/docs-agent/apps_script/helper_functions.gs b/examples/gemini/python/docs-agent/apps_script/helper_functions.gs deleted file mode 100644 index 3fbf96866..000000000 --- a/examples/gemini/python/docs-agent/apps_script/helper_functions.gs +++ /dev/null @@ -1,214 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Checks to see if a folder already exists in the drive -function folderExists(folderName) { - const folderIterator = DriveApp.getRootFolder().getFoldersByName(folderName); - if(folderIterator.hasNext()) { - return true; - } - else { - return false; - } -} - -// Checks to see if a folder already exists in the specified root folder -function folderExistsInRoot(folderName, rootFolder) { - const folderIterator = rootFolder.getFoldersByName(folderName); - if(folderIterator.hasNext()) { - return true; - } - else { - return false; - } -} - -// Checks to see if a folder already exists in the drive and exits if it doesn't. Useful for input directories -function folderExistsInput(folderName){ - if (folderExists(folderName)) { - Logger.log("Folder exists: "+ folderName); - return true; - } - else { - Logger.log("Folder does not exist: "+ folderName + ". Please make sure the directory exists."); - return false; - } -} - -// Checks to see if folder exists or creates it. Useful for output directories -function folderExistsOrCreate(folderName){ - if(folderExists(folderName)) { - Logger.log("Folder exists: "+ folderName); - return true; - } - else { - Logger.log("Folder does not exist: "+ folderName + ". Creating the directory."); - DriveApp.createFolder(folderName); - return true; - } -} - -// Checks to see if folder exists or creates it. Useful for output directories -function folderExistsOrCreateSubdir(folderName, rootFolder){ - if(folderExistsInRoot(folderName, rootFolder)) { - Logger.log("Folder exists: "+ folderName); - return true; - } - else { - Logger.log("Folder does not exist: "+ folderName + ". Creating the directory."); - rootFolder.createFolder(folderName); - return true; - } -} - -// Checks to see if a file exists in a folder -function checkFileExists(fileName,folderName){ - let folder = DriveApp.getFoldersByName(folderName); - if(!folder.hasNext()){ - } - else{ - var file = folder.next().getFilesByName(fileName); - if(!file.hasNext()){ - return true; - } - else{ - return false; - } - } -} - -// Function to check if an index output sheet exists or creates it. Returns the file object -// Specify the file output name and outputdirectory -function checkIndexOutputOrCreate(fileName, folderOutput, indexFileID="") { - var timeZone = Session.getScriptTimeZone(); - var date = Utilities.formatDate(new Date(), timeZone, "MM-dd-yyyy hh:mm:ss"); - let file = {title: fileName, mimeType: MimeType.GOOGLE_SHEETS, parents: [{id: folderOutput.getId()}]} - let params = "title='" + fileName + "' and parents in '" + folderOutput.getId() + "'"; - let file_search = DriveApp.searchFiles(params); - if (file_search.hasNext()) { - if (indexFileID=="") { - var fileId = file_search.next().getId(); - } - else { - var fileId = indexFileID; - } - var sheet = SpreadsheetApp.openById(fileId); - Logger.log("File index: " + fileName + " exists."); - var sheet_index = sheet.getSheetByName("Index"); - // Checks to see if this is a sub directory - if (sheet.getSheetByName("Backup")) { - var sheet_backup = sheet.getSheetByName("Backup"); - sheet.deleteSheet(sheet_backup); - } - var sheet_backup = sheet.insertSheet("Backup", 1); - var sheet_backup_open = sheet.getSheetByName("Backup"); - sheet_index.getDataRange().copyTo(sheet_backup_open.getRange(1,1)); - if (sheet_index != null){ - sheet.deleteSheet(sheet_index); - } - sheet.insertSheet("Index", 0); - sheet_index = sheet.getSheetByName("Index"); - sheet_index.addDeveloperMetadata("Date", date); - } - else { - Logger.log("File index: " + fileName + " does not exist."); - let output = Drive.Files.insert(file).id; - var sheet = SpreadsheetApp.openById(output); - var sheet_1 = sheet.getSheetByName("Sheet1"); - sheet.insertSheet("Index", 0); - var sheet_index = sheet.getSheetByName("Index") - sheet_index.addDeveloperMetadata("Date", date); - sheet.deleteSheet(sheet_1); - } - return sheet; -} - -// Function to convert byte array into a string -function byteToStr(byteInput){ - let signatureStr = ''; - for (i = 0; i < byteInput.length; i++) { - let byte = byteInput[i]; - if (byte < 0) - byte += 256; - let byteStr = byte.toString(16); - if (byteStr.length == 1) byteStr = '0' + byteStr; - signatureStr += byteStr; - } -return signatureStr; -} - -// Function to remove special characters for file names -function sanitizeFileName(fileName){ - let clean_filename = fileName.replace(/\[/g, "_").replace(/\]/g, "_").replace(/\(/g, "_").replace(/\)/g, "_").replace(/^_/g, "").replace(/,/g, "_").replace(/ /g, "_").replace(/:/g, "").replace(/`/g, "").replace(/\'/g, "").replace(/&/g, "and").replace(//g, "").replace(/’/g, ""); -return clean_filename; -} - -// Function to remove special characters for file names -function sanitizeString(string){ - let clean_string = string.replace(/\[/g, "").replace(/\]/g, "").replace(/\(/g, "").replace(/\)/g, "").replace(/^_/g, "").replace(/,/g, " ").replace(/:/g, "").replace(/`/g, "").replace(/\'/g, "").replace(/&/g, "and").replace(//g, ""); -return clean_string; -} - -function sanitizeBody(string){ - let clean_body = string.replace(/’/g, "'").replace(/^M/g, ""); -return clean_body; -} - -function regexToCleanCharsMD(string){ - let clean_string = string.replace(/(\*+)(\s*\b)([^\*]+)(\b\s*)(\*+)/g, "$2$1$3$5$4"); -return clean_string; -} - -// Function to check if a backup sheet exists and return a hash if the file exists -// Specify the sheet name where the backup is saved, default is "Backup" -// From your backup sheet specify the column that contains the MD5 hash -// and the columns for which you return values -function returnBackupHash(sheet, sheet_name, fid, start_data_row, pos_id, pos_1_col, pos_2_col){ - if (sheet.getSheetByName(sheet_name)){ - let backup_sheet = sheet.getSheetByName(sheet_name); - if(backup_sheet.getLastRow()> start_data_row){ - let backup_values = backup_sheet.getDataRange().getValues(); - for (let row_count = start_data_row; row_count < backup_sheet.getLastRow(); row_count++) { - let row_id = backup_values[row_count][pos_id]; - let pos_1_value = backup_values[row_count][pos_1_col]; - //Retrieve id of existing markdown conversion - let pos_2_value = backup_values[row_count][pos_2_col]; - if (row_id == fid){ - var results = [row_id, pos_1_value, pos_2_value]; - break; - } - else { - var results = ["no_results"]; - } - } - return results; - } - } -} - -// Creates a richText item with item. -function createRichText (item){ - let title = item.getName(); - let url = item.getUrl(); - let richText = SpreadsheetApp.newRichTextValue().setText(title).setLinkUrl(url).build(); - return richText; -} - -// Insert a richText item in a specific cell -function insertRichText (sheetItem, item, column, row){ - let range = sheetItem.getRange(column + row); - range.setRichTextValue(item); -} diff --git a/examples/gemini/python/docs-agent/apps_script/main.gs b/examples/gemini/python/docs-agent/apps_script/main.gs deleted file mode 100644 index 2fe88de33..000000000 --- a/examples/gemini/python/docs-agent/apps_script/main.gs +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Defines the gmail search query for saving emails to markdown -var SEARCH_QUERY = 'subject: psa to:my-mailing-list@example.com'; -// Defines the directory to output the emails in markdown format -var folderOutput = "PSA-output" -// Defines the directory that has your docs content -var folderInput = "input-folder" - -function main() { - convertDriveFolderToMDForDocsAgent(folderInput); - exportEmailsToMarkdown(SEARCH_QUERY, folderOutput); -} \ No newline at end of file diff --git a/examples/gemini/python/docs-agent/config.yaml b/examples/gemini/python/docs-agent/config.yaml deleted file mode 100644 index 8d7fdc9df..000000000 --- a/examples/gemini/python/docs-agent/config.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -configs: - - product_name: "Fuchsia" - models: - - language_model: "models/aqa" - embedding_model: "models/embedding-001" - api_endpoint: "generativelanguage.googleapis.com" - embedding_api_call_limit: 1400 - embedding_api_call_period: 60 - docs_agent_config: "normal" - markdown_splitter: "token_splitter" - log_level: "NORMAL" - db_type: "google_semantic_retriever" - db_configs: - - db_type: "chroma" - vector_db_dir: "vector_stores/chroma" - collection_name: "docs_collection" - - db_type: "google_semantic_retriever" - corpus_name: "corpora/fuchsia-dev" - output_path: "data/plain_docs" - inputs: - - path: "/usr/local/home/user01/website/src" - url_prefix: "https://docs.flutter.dev/" - conditions: - - condition_text: "You are a helpful chatbot answering questions from users. - Read the context below first and answer the user's question at the end. - In your answer, provide a summary in three or five sentences. (BUT DO NOT USE - ANY INFORMATION YOU KNOW ABOUT THE WORLD.)" - fact_check_question: "Can you compare the text below to the information - provided in this prompt above and write a short message that warns the readers - about which part of the text they should consider fact-checking? (Please keep - your response concise, focus on only one important item, but DO NOT USE BOLD - TEXT IN YOUR RESPONSE.)" - model_error_message: "Gemini is not able to answer this question at the moment. - Rephrase the question and try asking again." diff --git a/examples/gemini/python/docs-agent/docs/chunking-process.md b/examples/gemini/python/docs-agent/docs/chunking-process.md deleted file mode 100644 index e4c054b92..000000000 --- a/examples/gemini/python/docs-agent/docs/chunking-process.md +++ /dev/null @@ -1,68 +0,0 @@ -# Docs Agent chunking process - -This page describes Docs Agent's chunking process and potential optimizations. - -Currently, Docs Agent utilizes Markdown headings (`#`, `##`, and `###`) to -split documents into smaller, manageable chunks. However, the Docs Agent team -is actively developing more advanced strategies to improve the quality and -relevance of these chunks for retrieval. - -## Chunking technique - -In Retrieval Augmented Generation ([RAG][rag]) based systems, ensuring each -chunk contains the right information and context is crucial for accurate -retrieval. The goal of an effective chunking process is to ensure that each -chunk encapsulates a focused topic, which enhances the accuracy of retrieval -and ultimately leads to better answers. At the same time, the Docs Agent team -acknowledges the importance of a flexible approach that allows for -customization based on specific datasets and use cases. - -Key characteristics in Docs Agent’s chunking process include: - -- **Docs Agent splits documents based on Markdown headings.** However, - this approach has limitations, especially when dealing with large sections. -- **Docs Agent chunks are smaller than 5000 bytes (characters).** This size - limit is set by the embedding model used in generating embeddings. -- **Docs Agent enhances chunks with additional metadata.** The metadata helps - Docs Agent to execute operations efficiently, such as preventing duplicate - chunks in databases and deleting obsolete chunks that are no longer - present in the source. -- **Docs Agent retrieves the top 5 chunks and displays the top chunk's URL.** - However, this is adjustable in Docs Agent’s configuration (see the `widget` - and `experimental` app modes). - -The Docs Agent team continues to explore various optimizations to enhance -the functionality and effectiveness of the chunking process. These efforts -include refining the chunking algorithm itself and developing advanced -post-processing techniques, for instance, reconstructing chunks to original -documents after retrieval. - -Additionally, the team has been exploring methods for co-optimizing content -structure and chunking strategies, which aims to maximize retrieval -effectiveness by ensuring the structure of the source document itself -complements the chunking process. - -## Chunks retrieval - -Docs Agent employs two distinct approaches for storing and retrieving chunks: - -- **The local database approach uses a [Chroma][chroma] vector database.** - This approach grants greater control over the chunking and retrieval - process. This option is recommended for development and experimental - setups. -- **The online corpus approach uses Gemini’s - [Semantic Retrieval API][semantic-retrieval].** This approach provides - the advantages of centrally hosted online databases, ensuring - accessibility for all users throughout the organization. This approach - has some drawbacks, as control is reduced because the API may dictate - how chunks are selected and where customization can be applied. - -Choosing between these approaches depends on the specific needs of the user’s -deployment situation, which is to balance control and transparency against -possible improvements in performance, broader reach and ease of use. - - - -[rag]: concepts.md -[chroma]: https://docs.trychroma.com/ -[semantic-retrieval]: https://ai.google.dev/gemini-api/docs/semantic_retrieval diff --git a/examples/gemini/python/docs-agent/docs/cli-reference.md b/examples/gemini/python/docs-agent/docs/cli-reference.md deleted file mode 100644 index 5967b65b9..000000000 --- a/examples/gemini/python/docs-agent/docs/cli-reference.md +++ /dev/null @@ -1,337 +0,0 @@ -# Docs Agent CLI reference - -This page provides a list of the Docs Agent command lines and their usages -and examples. - -The Docs Agent CLI helps developers to manage the Docs Agent project and -interact with language models. It can handle various tasks such as -processing documents, populating vector databases, launching the chatbot, -running benchmark test, sending prompts to language models, and more. - -**Important**: All `agent` commands need to run in the `poetry shell` -environment. - -## Processing documents - -### Chunk Markdown files into small text chunks - -The command below splits Markdown files (and other source files) into small -chunks of plain text files: - -```sh -agent chunk -``` - -### Populate a vector database using text chunks - -The command below populates a vector database using plain text files (created -by running the `agent chunk` command): - -```sh -agent populate -``` - -### Populate a vector database and delete stale text chunks - -The command below deletes stale entries in the existing vector database -before populating it with the new text chunks: - -```sh -agent populate --enable_delete_chunks -``` - -### Show the Docs Agent configuration - -The command below prints all the fields and values in the current -[`config.yaml`][config-yaml] file: - -```sh -agent show-config -``` - -### Clean up the Docs Agent development environment - -The command below deletes development databases specified in the -`config.yaml` file: - -```sh -agent cleanup-dev -``` - -### Write logs to a CSV file - -The command below writes the summaries of all captured debugging information -(in the `logs/debugs` directory) to a `.csv` file: - -```sh -agent write-logs-to-csv -``` - -## Launching the chatbot web app - -### Launch the Docs Agent web app - -The command below launches Docs Agent's Flask-based chatbot web application: - -```sh -agent chatbot -``` - -### Launch the Docs Agent web app using a different port - -The command below launches the Docs Agent web app to run on port 5005: - -```sh -agent chatbot --port 5005 -``` - -### Launch the Docs Agent web app as a widget - -The command below launches the Docs Agent web app to use -a widget-friendly template: - -```sh -agent chatbot --app_mode widget -``` - -### Launch the Docs Agent web app in full mode - -The command below launches the Docs Agent web app to use -a special template that uses three Gemini models (AQA, Gemini 1.5, -and Gemini 1.0): - -```sh -agent chatbot --app_mode full -``` - -### Launch the Docs Agent web app with a log view enabled - -The command below launches the Docs Agent web app while enabling -a log view page (which is accessible at `/logs`): - -```sh -agent chatbot --enable_show_logs -``` - -## Running benchmark test - -### Run the Docs Agent benchmark test - -The command below runs benchmark test using the questions and answers listed -in the [`benchmarks.yaml`][benchmarks-yaml] file: - -```sh -agent benchmark -``` - -## Interacting with language models - -### Ask a question - -The command below reads a question from the arguments, asks the Gemini model, -and prints its response: - -```sh -agent tellme -``` - -Replace `QUESTION` with a question written in plain English, for example: - -```sh -agent tellme does flutter support material design 3? -``` - -**Note**: This `agent tellme` command is used to set up the `gemini` command -in the [Set up Docs Agent CLI][set-up-docs-agent-cli] guide. - -### Ask a question to a specific product - -The command below enables you to ask a question to a specific product in your -Docs Agent setup: - -```sh -agent tellme --product -``` - -The example below asks the question to the `Flutter` product in your -Docs Agent setup: - -```sh -agent tellme which modules are available? --product=Flutter -``` - -You may also specify multiple products, for example: - -```sh -agent tellme which modules are available? --product=Flutter --product=Angular --product=Android -``` - -### Ask for advice - -The command below reads a request and a filename from the arguments, -asks the Gemini model, and prints its response: - -```sh -agent helpme --file -``` - -Replace `REQUEST` with a prompt and `PATH_TO_FILE` with a file's -absolure or relative path, for example: - -```sh -agent helpme write comments for this C++ file? --file ../my-project/test.cc -``` - -### Ask for advice using RAG - -The command below uses a local or online vector database (specified in -the `config.yaml` file) to retrieve relevant context for the request: - -```sh -agent helpme --file --rag -``` - -### Ask for advice in a session - -The command below starts a new session (`--new`), which tracks responses, -before running the `agent helpme` command: - -```sh -agent helpme --file --new -``` - -For example: - -```sh -agent helpme write a draft of all features found in this README file? --file ./README.md --new -``` - -After starting a session, use the `--cont` flag to include the previous -responses as context to the request: - -```sh -agent helpme --cont -``` - -For example: - -```sh -agent helpme write a concept doc that delves into more details of these features? --cont -``` - -### Print the context in the current session - -The command below prints the questions, files, and responses that -are being used as context in the current session: - -```sh -agent show-session -``` - -### Ask the model to perform the request to each file in a directory - -The command below applies the request to each file found in the -specified directory: - -```sh -agent helpme --perfile -``` - -For example: - -```sh -agent helpme explain what this file does? --perfile ~/my-project --new -``` - -### Ask the model to include all files in a directory as context - -The command below includes all files found in the specified directory -as context to the request: - -```sh -agent helpme --allfiles -``` - -For example: - -```sh -agent helpme write a concept doc covering all features in this project? --allfiles ~/my-project --new -``` - -### Ask the model to run a pre-defined chain of prompts - -The command below runs a task (a sequence of prompts) defined in -a `.yaml` file stored in the [`tasks`][tasks-dir] directory: - -```sh -agent runtask --task -``` - -For example: - -```sh -agent runtask --task DraftReleaseNotes -``` - -To see the list of all tasks available in your project, run -`agent runtask` without any arguments. - -## Managing online corpora - -### List all existing online corpora - -The command below prints the list of all existing online corpora created -using the [Semantic Retrieval API][semantic-api]: - -```sh -agent list-corpora -``` - -### Share an online corpora with a user - -The command below enables `user01@gmail.com` to read text chunks stored in -`corpora/example01`: - -```sh -agent share-corpus --name corpora/example01 --user user01@gmail.com --role READER -``` - -The command below enables `user01@gmail.com` to read and write to -`corpora/example01`: - -```sh -agent share-corpus --name corpora/example01 --user user01@gmail.com --role WRITER -``` - -### Share an online corpora with everyone - -The command below enables `EVERYONE` to read text chunks stored in -`corpora/example01`: - -```sh -agent open-corpus --name corpora/example01 -``` - -### Remove a user permission from an online corpora - -The command below remove an existing user permission set in `corpora/example01`: - -```sh -agent remove-corpus-permission --name corpora/example01/permissions/123456789123456789 -``` - -### Delete an online corpora - -The command below deletes an online corpus: - -```sh -agent delete-corpus --name corpora/example01 -``` - - - -[config-yaml]: ../config.yaml -[benchmarks-yaml]: ../docs_agent/benchmarks/benchmarks.yaml -[set-up-docs-agent-cli]: ../docs_agent/interfaces/README.md -[semantic-api]: https://ai.google.dev/docs/semantic_retriever -[tasks-dir]: ../tasks diff --git a/examples/gemini/python/docs-agent/docs/concepts.md b/examples/gemini/python/docs-agent/docs/concepts.md deleted file mode 100644 index c8cfb53ac..000000000 --- a/examples/gemini/python/docs-agent/docs/concepts.md +++ /dev/null @@ -1,376 +0,0 @@ -# Docs Agent concepts - -**Note**: If you want to set up and launch the Docs Agent chat app on your host machine, -see the [Set up Docs Agent][set-up-docs-agent] section in README. - -This page describes the architecture and features of Docs Agent. - -## Overview - -The Docs Agent chat app is designed to be easily set up and configured in a Linux environment. -and require that you have access to Google’s [Gemini API][genai-doc-site]. - -Docs Agent uses a technique known as Retrieval Augmented Generation (RAG), which allows -you to bring your own documents as knowledge sources to AI language models. This approach -helps the AI language models to generate relevant and accurate responses that are grounded -in the information that you provide and control. - -![Docs Agent architecture](./images/docs-agent-architecture-01.png) - -**Figure 1**. Docs Agent uses a vector database to retrieve context for augmenting prompts. - -## Main features - -The key features of the Docs Agent chat app are: - -- Add contextual information to user questions to augment prompts for AI language models. -- Process documents into embeddings and store them in a vector database for semnatic retrieval. - -![Docs Agent flow](./images/docs-agent-architecture-02.png) - -**Figure 2**. A user question is augmented by the Docs Agent server and passed to an LLM. - -For the moment, the Docs Agent project focuses on providing Python scripts that make it -easy to process Markdown files into embeddings. However, there is no hard requirement that the -source documents must exist in Markdown format. What’s important is that the processed content -is available as embeddings in the vector database. - -### Structure of a prompt to a language model - -To enable an LLM to answer questions that are not part of the public knowledge (which the LLM -is likely trained on), the Docs Agent project applies a mixture of prompt engineering and -embeddings techniques. That is, we process a set of documents (which contain domain specific -knowledge) into embeddings and store them in a vector database. This vector database allows -the Docs Agent server to perform semantic search on stored embeddings to find the most relevant -content from the source documents given user questions. - -Once the most relevant content is returned, the Docs Agent server uses the prompt structure -shown in Figure 3 to augment the user question with a preset **condition** and a list of -**context**. (When the Docs Agent server starts, the condition value is read from the -[`config.yaml`][config-yaml] file.) Then the Docs Agent server sends this prompt to a -language model using the Gemini API and receives a response generated by the model. - -![Docs Agent prompt strcture](./images/docs-agent-prompt-structure-01.png) - -**Figure 3**. Prompt structure for augmenting a user question with related context -(Context source: [eventhorizontelescope.org][context-source-01]) - -### Processing of Markdown files into embeddings - -To process information into embeddings using the Python scripts in the project, the -information needs to be stored in Markdown format. Once you have a set of Markdown files -stored in a directory on your host machine, you can run the -[`files_to_plain_text.py`][files-to-plain-text] script to process those Markdown -files into small plain text files – the script splits the content by the top three Markdown -headers (`#`, `##`, and `###`). - -Once Markdown files are processed into small plain text files, you can run the -[`populate_vector_database.py`][populate-vector-database] script to generate embeddings -for each text file and store those embeddings into a [Chroma][chroma-docs] vector database -running on the host machine. - -The embeddings in this vector database enable the Docs Agent server to perform semantic search -and retrieve context related to user questions for augmenting prompts. - -For more information on the processing of Markdown files, see the [`README`][scripts-readme] -file in the `scripts` directory. - -![Document to embeddings](./images/docs-agent-embeddings-01.png) - -**Figure 4**. A document is split into small semantic chunks, which are then used to generate -embeddings. - -![Markdown to embeddings](./images/docs-agent-embeddings-02.png) - -**Figure 5**. A Markdown page is split by headers and processed into embeddings. - -## Summary of tasks and features - -The following list summarizes the tasks and features of the Docs Agent chat app: - -- **Process Markdown**: Split Markdown files into small plain text files. (See the - Python scripts in the [`preprocess`][preprocess-dir] directory.) -- **Generate embeddings**: Use an embedding model to process small plain text files - into embeddings, and store them in a vector database. (See the - [`populate_vector_database.py`][populate-vector-database] script.) -- **Perform semantic search**: Compare embeddings in the vector database to retrieve - most relevant content given user questions. -- **Add context to a user question**: Add a list of text chunks returned from - a semantic search as context in a prompt. -- **(Experimental) “Fact-check” responses**: This experimental feature composes - a follow-up prompt and asks the language model to “fact-check” its own previous response. - (See the [Using a language model to fact-check its own response][fact-check-section] - section.) -- **Generate related questions**: In addition to displaying a response to the user - question, the web UI displays 5 questions generated by the language model based on - the context of the user question. (See the - [Using a language model to suggest related questions][related-questions-section] - section.) -- **Return URLs of documentation sources**: Docs Agent's vector database stores URLs - as metadata next to embeddings. Whenever the vector database is used to retrieve - text chunks for context, the database can also return the URLs of the sources used - to generate the embeddings. -- **Collect feedback from users**: Docs Agent's chatbot web UI includes buttons that - allow users to [like generated responses][like-generated-responses] or - [submit rewrites][submit-a-rewrite]. -- **Convert Google Docs, PDF, and Gmail into Markdown files**: This feature uses - Apps Script to convert Google Docs, PDF, and Gmail into Markdown files, which then - can be used as input datasets for Docs Agent. (See the - [`apps_script`][apps-script-readme] directory.) -- **Run benchmark test to monitor the quality of AI-generated responses**: Using - Docs Agent, you can run [benchmark test][benchmark-test] to measure and compare - the quality of text chunks, embeddings, and AI-generated responses. -- **Use the Semantic Retrieval API and AQA model**: You can use Gemini's - [Semantic Retrieval API][semantic-api] to upload source documents to an online - corpus and use the [AQA model][aqa-model] that is specifically created for answering - questions using an online corpus. - -## Flow of events - -The following events take place in the Docs Agent chat app: - -1. The [`files_to_plain_text.py`][files-to-plain-text] script converts input - Markdown documents into small plain text files, split by Markdown headings - (`#`, `##`, and `###`). -2. The [`populate_vector_database.py`][populate-vector-database] script generates - embeddings from the small plain text files and populates a vector database. -3. When the [`agent chatbot`] command is run, it starts the Docs Agent server and - vector database, which loads generated embeddings and metadata (URLs and filenames) - stored in the `vector_store` directory. -4. When the user asks a question, the Docs Agent server uses the vector database to - perform semantic search on embeddings, which represent content in the source - documents. -5. Using this semantic search capability, the Docs Agent server finds a list of - text chunks that are most relevant to the user question. -6. The Docs Agent server adds this list of text chunks as context (plus a condition - for responses) to the user question and constructs them into a prompt. -7. The system sends the prompt to a language model via the Gemini API. -8. The language model generates a response and the Docs Agent server renders it on - the chat UI. - -Additional events for [“fact-checking” a generated response][fact-check-section]: - -9. The Docs Agent server prepares another prompt that compares the generated response - (in step 8) to the context (in step 6) and asks the language model to look for - a discrepancy in the response. -10. The language model generates a response that points out one major discrepancy - (if it exists) between its previous response and the context. -11. The Docs Agent server renders this response on the chat UI as a call-out note. -12. The Docs Agent server passes this second response to the vector database to - perform semantic search. -13. The vector database returns a list of relevant content (that is closely related - to the second response). -14. The Docs Agent server renders the top URL of this list on the chat UI and - suggests that the user checks out this URL for fact-checking. - -Additional events for -[suggesting 5 questions related to the user question][related-questions-section]: - -15. The Docs Agent server prepares another prompt that asks the language model to - generate 5 questions based on the context (in step 6). -16. The language model generates a response that contains a list of questions related - to the context. -17. The Docs Agent server renders the questions on the chat UI. - -## Supplementary features - -This section describes additional features implemented on the Docs Agent chat app for -enhancing the usability of the Q&A experience powered by generative AI. - -![Docs Agent UI](./images/docs-agent-ui-screenshot-01.png) - -**Figure 6**. A screenshot of the Docs Agent chat UI showing the sections generated by -three distinct prompts. - -### Using a language model to fact-check its own response - -In addition to using the prompt structure above (shown in Figure 3), we‘re currently -experimenting with the following prompt setup for “fact-checking” responses generated -by the language model: - -- Condition: - - ``` - You are a helpful chatbot answering questions from users. Read the following context - first and answer the question at the end: - ``` - -- Context: - - ``` - - ``` - -- Additional condition (for fact-checking): - - ``` - Can you compare the text below to the information provided in this prompt above - and write a short message that warns the readers about which part of the text they - should consider fact-checking? (Please keep your response concise and focus on only - one important item.)" - ``` - -- Previously generated response - - ``` - Text: - ``` - -This "fact-checking" prompt returns a response similar to the following example: - -``` -The text states that Flutter chose to use Dart because it is a fast, productive, object-oriented -language that is well-suited for building user interfaces. However, the context provided in the -prompt states that Flutter chose Dart because it is a fast, productive language that is well-suited -for Flutter's problem domain: creating visual user experiences. Therefore, readers should consider -fact-checking the claim that Dart is well-suited for building user interfaces. -``` - -After the second response, notice that the Docs Agent chat UI also suggests a URL to visit for -fact-checking (see Figure 6), which looks similar to the following example: - -``` -To verify this information, please check out: - -https://docs.flutter.dev/resources/faq -``` - -To identify this URL, the Docs Agent server takes the second response (which is the paragraph that -begins with “The text states that ...” in the example above) and uses it to query the vector -database. Once the vector database returns a list of the most relevant content to this response, -the UI only displays the top URL to the user. - -Keep in mind that this "fact-checking" prompt setup is currently considered **experimental** -because we‘ve seen cases where a language model would end up adding incorrect information into its -second response as well. However, we saw that adding this second response (which brings attention -to the language model’s possible hallucinations) seems to improve the usability of the system since it -serves as a reminder to the users that the language model‘s response is far from being perfect, which -helps encourage the users to take more steps to validate generated responses for themselves. - -### Using a language model to suggest related questions - -The project‘s latest web UI includes the “Related questions” section, which displays five -questions that are related to the user question (see Figure 6). These five questions are also -generated by a language model (via the Gemini API). Using the list of contents returned from the vector -database as context, the system prepares another prompt asking the language model to generate five -questions from the included context. - -The following is the exact structure of this prompt: - -- Condition: - - ``` - Read the context below and answer the question at the end: - ``` - -- Context: - - ``` - - ``` - -- Question: - - ``` - What are 5 questions developers might ask after reading the context? - ``` - -### Enabling users to submit a rewrite of a generated response - -The project‘s latest web UI includes the **Rewrite this response** button at the bottom of -the panel (see Figure 6). When this button is clicked, a widget opens up, expanding the -main UI panel, and reveals a textarea containing the generated response to the user's question. -The user is then allowed to edit this response in the textarea and click the **Submit** button -to submit the updated response to the system. - -The system stores the submitted response as a Markdown file in the project's local `rewrites` -directory. The user may re-click the **Submit** button to update the submitted rewrite multiple -times. - -### Enabling users to like generated responses - -The project's latest web UI includes the **Like this response** button at the bottom of the panel -(see Figure 6). When this button is clicked, the server logs the event of "like" for the response. -However, clicking the **Liked** button again will reset the button. Then the server logs this reset -event of "like" for the response. - -The user may click this like button multiple times to toggle the state of the like button. But when -examining the logs, only the final state of the like button will be considered for the response. - -### Using Google Docs, PDF, or Gmail as input sources - -The project includes Apps Script files that allow you to convert various sources of content -(including Google Docs and PDF) from your Google Drive and Gmail into Markdown files. You can then -use these Markdown files as additional input sources for Docs Agent. For more information, see the -[`README`][apps-script-readme] file in the `apps_script` directory. - -![Docs Agent pre-processing flow](./images/docs-agent-pre-processing-01.png) - -**Figure 7**. Docs Agent's pre-processing flow for various doc types. - -### Using the Semantic Retrieval API and AQA model - -Docs Agent provides options to use Gemini's [Semantic Retrieval API][semantic-api] for storing text -chunks in Google Cloud's online storage (and using this online storage for context retrieval), -in combination with using the [AQA model][aqa-model] for question-answering. - -To use the Semantic Retrieval API, update the `config.yaml` file to the following settings: - -``` -models: - - language_model: "models/aqa" - -... - -db_type: "google_semantic_retriever" -``` - -The setup above uses both the Semantic Retrieval API and the AQA model. - -**Note**: At the moment, when `db_type` is set to `google_semantic_retriever`, running the -`populate_vector_database.py` script will also create and popluate a local vector database using -Chroma as well as creating and populating an online corpus using the Semantic Retrieval API. - -However, if you want to use only the AQA model without using an online corpus, update the -`config.yaml` file to the following settings instead: - -``` -models: - - language_model: "models/aqa" - -... - -db_type: "chroma" -``` - -The setup above uses the AQA model with your local Chroma vector database. For more information, -see the [More Options: AQA Using Inline Passages][inline-passages] section on the -_Semantic Retriever Quickstart_ page. - -**Note**: To use the Semantic Retrieval API, you need to complete the OAuth setup for your Google -Cloud project from your host machine. For detailed instructions, see the -[Authentication with OAuth quickstart][oauth-quickstart] page. - - - -[set-up-docs-agent]: ../README.md#set-up-docs-agent -[files-to-plain-text]: ../docs_agent/preprocess/files_to_plain_text.py -[populate-vector-database]: ../docs_agent/preprocess/populate_vector_database.py -[context-source-01]: http://eventhorizontelescope.org -[fact-check-section]: #using-a-language-model-to-fact_check-its-own-response -[related-questions-section]: #using-a-language-model-to-suggest-related-questions -[submit-a-rewrite]: #enabling-users-to-submit-a-rewrite-of-a-generated-response -[like-generated-responses]: #enabling-users-to-like-generated-responses -[populate-db-steps]: #populate-a-new-vector-database-from-markdown-files -[genai-doc-site]: https://ai.google.dev/docs/gemini_api_overview -[chroma-docs]: https://docs.trychroma.com/ -[apps-script-readme]: ../apps_script/README.md -[scripts-readme]: ../docs_agent/preprocess/README.md -[config-yaml]: ../config.yaml -[benchmark-test]: ../docs_agent/benchmarks/README.md -[semantic-api]: https://ai.google.dev/docs/semantic_retriever -[aqa-model]: https://ai.google.dev/models/gemini#model_variations -[oauth-quickstart]: https://ai.google.dev/docs/oauth_quickstart -[inline-passages]: https://ai.google.dev/docs/semantic_retriever#more_options_aqa_using_inline_passages -[authorize-credentials]: https://ai.google.dev/docs/oauth_quickstart#authorize-credentials -[preprocess-dir]: ../docs_agent/preproces/ diff --git a/examples/gemini/python/docs-agent/docs/config-reference.md b/examples/gemini/python/docs-agent/docs/config-reference.md deleted file mode 100644 index 2cdc30629..000000000 --- a/examples/gemini/python/docs-agent/docs/config-reference.md +++ /dev/null @@ -1,152 +0,0 @@ -# Docs Agent configuration reference - -This page provides a list of additional options that can be specified -in the Docs Agent configuration file ([`config.yaml`][config-yaml]). - -## Web application options - -### app_port - -This field sets the port which the Docs Agent web app runs on. - -``` -app_port: 5001 -``` - -By default, the web app is set to use port 5000. - -### app_mode - -This field controls the user interface mode of the web app. - -The options are: - -* `widget`: This mode launches a compact widget-style interface, suitable - for being embedded within a webpage. - - ``` - app_mode: "widget" - ``` - -* `full`: This special mode is designed to be used with Gemini 1.5 models. - - ``` - app_mode: "full" - ``` - -When this field is not specified, the web app is set to use the standard mode. - -## User feedback options - -### feedback_mode - -This field sets the type of feedback mechanism available to users for providing -the quality or relevance of responses. - -The options are: - -* `feedback`: This is the default setting. - - ``` - feedback_mode: "feedback" - ``` - -* `rewrite`: This option provides the "Rewrite this response" button to allows - users to suggest alternative responses. - - ``` - feedback_mode: "rewrite" - ``` - -* `like_and_dislike`: This option provides simple "Like" and "Dislike" buttons. - - ``` - feedback_mode: "like_and_dislike" - ``` - -## Logging options - -### log_level - -This field controls the level of detail captured in the logs generated by Docs -Agent. - -Setting it to `VERBOSE` provides more comprehensive logging information: - -``` -log_level: "VERBOSE" -``` - -This field is set to `NORMAL` by default. - -### enable_show_logs - -Setting this field to `"True"` allows logs to be displayed on a web browser -(which is accessible at `/logs`): - -``` -enable_show_logs: "True" -``` - -### enable_logs_to_markdown - -Setting this field to `"True"` saves the generated answers as Markdown pages -on the host machine: - -``` -enable_logs_to_markdown: "True" -``` - -### enable_logs_for_debugging - -Setting this field to `"True"` generates detailed logs for debugging purposes: - -``` -enable_logs_for_debugging: "True" -``` - -## Database management options - -### enable_delete_chunks - -Setting this field to `"True"` enables the ability to delete outdated, stale -text chunks from the vector databases: - -``` -enable_delete_chunks: "True" -``` - -## Secondary database configuration - -Docs Agent allows for the use of a secondary database alongside the primary one -for providing additional context from a different source. - -### secondary_db_type - -This field specifies the type of secondary database to be used: - -``` -secondary_db_type: "google_semantic_retrieval" -``` - -or - -``` -secondary_db_type: "chroma" -``` - -When `chroma` is specified, the collection in the `vector_stores/chroma` -directory is used as the secondary database. - -### secondary_corpus_name - -This field defines the name of the corpus for the secondary database, -for example: - -``` -secondary_corpus_name: "corpora/my-example-corpus" -``` - - - -[config-yaml]: ../config.yaml diff --git a/examples/gemini/python/docs-agent/docs/images/apps-script-screenshot-01.png b/examples/gemini/python/docs-agent/docs/images/apps-script-screenshot-01.png deleted file mode 100644 index e49478e79..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/apps-script-screenshot-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-architecture-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-architecture-01.png deleted file mode 100644 index 21912a876..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-architecture-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-architecture-02.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-architecture-02.png deleted file mode 100644 index 9eff8da5f..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-architecture-02.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-benchmarks-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-benchmarks-01.png deleted file mode 100644 index 2907ac307..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-benchmarks-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-chat-app-screenshot-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-chat-app-screenshot-01.png deleted file mode 100644 index 6efb51c9a..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-chat-app-screenshot-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-embeddings-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-embeddings-01.png deleted file mode 100644 index 32ac4dc5a..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-embeddings-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-embeddings-02.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-embeddings-02.png deleted file mode 100644 index 7ff222efd..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-embeddings-02.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-pre-processing-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-pre-processing-01.png deleted file mode 100644 index 98b34c02d..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-pre-processing-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-prompt-structure-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-prompt-structure-01.png deleted file mode 100644 index 64119ac07..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-prompt-structure-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/images/docs-agent-ui-screenshot-01.png b/examples/gemini/python/docs-agent/docs/images/docs-agent-ui-screenshot-01.png deleted file mode 100644 index d680cae39..000000000 Binary files a/examples/gemini/python/docs-agent/docs/images/docs-agent-ui-screenshot-01.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs/whats-new.md b/examples/gemini/python/docs-agent/docs/whats-new.md deleted file mode 100644 index 7794324ac..000000000 --- a/examples/gemini/python/docs-agent/docs/whats-new.md +++ /dev/null @@ -1,100 +0,0 @@ -# What's new in Docs Agent - -## April 2024 - -* **Focus: Feature enhancements and usability improvements** -* Expanded CLI functionality with options for managing online corpora and interacting with files. -* Addressed bug fixes and performed code refactoring for improved stability and maintainability. -* Added a new chat app template specifically designed for the **Gemini 1.5 model**. -* Updated GenAI SDK version to `0.5.0`. -* Introduced a splitter for handling of Fuchsia’s FIDL protocol files in the preprocessing stage. - -## March 2024 - -* **Milestone: Introduction of the Docs Agent CLI** -* Added the `tellme` command for direct interaction with Gemini from a Linux terminal. -* Expanded CLI options for corpora management, including creation, deletion, and permission control. -* Enhanced the chat app UI with a "loading" animation and probability-based response pre-screening. -* Enabled displaying more URLs retrieved from the AQA model in the widget mode. -* Added support for including URLs as metadata when uploading chunks to online corpora. - -## February 2024 - -* **Focus: Refining AQA model integration** -* Improved UI rendering of AQA model responses, especially for code segments. -* Addressed bug fixes to handle unexpected AQA model responses. -* Generated related questions by using retrieved context instead of a user question. -* Started logging `answerable_probability` for AQA model responses. - -## January 2024 - -* **Milestone: Docs Agent uses AQA model and Semantric Retrieval API** -* Started Logs Agent experiments -* Benchmark score up ~2.5% with enhancements to embeddings - -## December 2023 - -* **Milestone: Docs Agent uses Gemini model.** -* Prototyping benchmarking: documentation unit tests. -* Steady traffic since launch, 861 weekly views, December 14. - -## November 2023 - -* Experimented with context reconstruction. -* Docs Agent now parsing code blocks. -* Added new condition using a mixture of best practices to improve answers. -* Added chunking by tokenization. - -## October 2023 - -* **Milestone: Docs Agent supports Google docs, PDFs, and emails.** -* Drafted Docs Agent security strategy. -* Drafted Docs Agent + Talking Character design doc. -* Top of the charts for generative AI samples: 1216 weekly views. -* Build for AI series: 16000 watches. - -## September 2023 - -* First open-source feature request: support Google docs. -* **Milestone: Docs Agent published!** -* Recorded Build for AI series. -* Implemented hashing to check existing entries and only generate embeddings for - new or updated content. - -## August 2023 - -* Docs Agent demo running with Flutter docs. -* Docs Agent gets necessary approvals for open-sourcing. -* Special mention: Docs Agent gets it's name. -* Added support to read frontmatter, starting with titles. - -## July 2023 - -* Light month, as many of us took vacations :). -* Created `opensource` branch on internal repo for open-source pushes. -* Reviewed video script for Build for AI series. -* Security: meeting on using open-source content and security issues. - -## June 2023 - -* Drafted Docs Agent Readme. -* Created internal repo to set up infrastructure for open-source pushes. -* First internal customer tried Docs Agent. -* Compiled list of Todos to open-source Docs Agent. - -## May 2023 - -* Switched from chunking content based on 3000-char limit to chunking by - headings. -* Cleaned up Markdown processing issues. -* Privacy: clarified in UI how we are using data. -* Attempted to create a chat bot for Google chat. -* Added database admin console. -* Partially implemented rewrite option. -* Added related questions. - -## April 2023 - -* Created new UI for chat app: Flask app. -* Added 'fact-checking' section. -* **Milestone: started the Docs Agent open-source project.** diff --git a/examples/gemini/python/docs-agent/docs_agent/agents/__init__.py b/examples/gemini/python/docs-agent/docs_agent/agents/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/gemini/python/docs-agent/docs_agent/agents/docs_agent.py b/examples/gemini/python/docs-agent/docs_agent/agents/docs_agent.py deleted file mode 100644 index 992a46c6f..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/agents/docs_agent.py +++ /dev/null @@ -1,575 +0,0 @@ -# -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Docs Agent""" - -import typing - -from absl import logging -import google.api_core -import google.ai.generativelanguage as glm -from chromadb.utils import embedding_functions - -from docs_agent.storage.chroma import ChromaEnhanced - -from docs_agent.models.google_genai import Gemini - -from docs_agent.utilities.config import ProductConfig, Models -from docs_agent.preprocess.splitters import markdown_splitter - -from docs_agent.preprocess.splitters.markdown_splitter import Section as Section -from docs_agent.postprocess.docs_retriever import SectionDistance as SectionDistance -from docs_agent.postprocess.docs_retriever import ( - SectionProbability as SectionProbability, -) - - -class DocsAgent: - """DocsAgent class""" - - # Temporary parameter of init_chroma - def __init__( - self, - config: ProductConfig, - init_chroma: bool = True, - init_semantic: bool = True, - ): - # Models settings - self.config = config - self.language_model = str(self.config.models.language_model) - self.embedding_model = str(self.config.models.embedding_model) - self.api_endpoint = str(self.config.models.api_endpoint) - - # Initialize the default Gemini model. - if self.language_model.startswith("models/gemini"): - self.gemini = Gemini( - models_config=config.models, conditions=config.conditions - ) - self.context_model = self.language_model - - # Use the new chroma db for all queries - # Should make a function for this or clean this behavior - if init_chroma: - for item in self.config.db_configs: - if "chroma" in item.db_type: - self.vector_db_dir = item.vector_db_dir - self.collection_name = item.collection_name - self.chroma = ChromaEnhanced(self.vector_db_dir) - logging.info( - "Using the local vector database created at %s", self.vector_db_dir - ) - self.collection = self.chroma.get_collection( - self.collection_name, - embedding_model=self.embedding_model, - embedding_function=embedding_function_gemini_retrieval( - self.config.models.api_key, self.embedding_model - ), - ) - - # AQA model settings - if init_semantic: - # Except in "full" and "pro" modes, the semantic retriever option requires - # the AQA model. If not, exit the program. - if ( - self.config.app_mode != "full" - and self.config.app_mode != "widget-pro" - and self.config.db_type == "google_semantic_retriever" - ): - if self.language_model != "models/aqa": - logging.error( - "The db_type `google_semnatic_retriever` option" - + " requires the AQA model (`models/aqa`)." - ) - exit(1) - # If the AQA model is selected or the web app is on "full" and "pro" modes. - if ( - self.language_model == "models/aqa" - or self.config.app_mode == "full" - or self.config.app_mode == "widget-pro" - ): - # AQA model setup - self.generative_service_client = glm.GenerativeServiceClient() - self.retriever_service_client = glm.RetrieverServiceClient() - self.permission_service_client = glm.PermissionServiceClient() - # Start a Gemini model for other tasks - self.context_model = "models/gemini-pro" - gemini_model_config = Models( - language_model=self.context_model, - embedding_model=self.embedding_model, - api_endpoint=self.api_endpoint, - ) - self.gemini = Gemini( - models_config=gemini_model_config, conditions=config.conditions - ) - # If semantic retriever is selected as the main database. - if self.config.db_type == "google_semantic_retriever": - for item in self.config.db_configs: - if "google_semantic_retriever" in item.db_type: - self.corpus_name = item.corpus_name - if item.corpus_display: - self.corpus_display = item.corpus_display - else: - self.corpus_display = ( - self.config.product_name + " documentation" - ) - self.aqa_response_buffer = "" - - # Always initialize the Gemini 1.0 pro model for other tasks. - gemini_pro_model_config = Models( - language_model="models/gemini-pro", - embedding_model=self.embedding_model, - api_endpoint=self.api_endpoint, - ) - self.gemini_pro = Gemini( - models_config=gemini_pro_model_config, conditions=config.conditions - ) - - if self.config.app_mode == "full" or self.config.app_mode == "widget-pro": - # Initialize the Gemini 1.5 model for generating main responses. - gemini_15_model_config = Models( - language_model=self.language_model, - embedding_model=self.embedding_model, - api_endpoint=self.api_endpoint, - ) - self.gemini_15 = Gemini( - models_config=gemini_15_model_config, conditions=config.conditions - ) - else: - self.gemini_15 = self.gemini_pro - - # Use this method for talking to a Gemini content model - def ask_content_model_with_context(self, context, question): - new_prompt = context + "\n\nQuestion: " + question - # Print the prompt for debugging if the log level is VERBOSE. - if self.config.log_level == "VERBOSE": - self.print_the_prompt(new_prompt) - try: - response = self.gemini.generate_content(new_prompt) - except google.api_core.exceptions.InvalidArgument: - return self.config.conditions.model_error_message - # for chunk in response: - # if str(chunk.candidates[0].content) == "": - # return self.config.conditions.model_error_message - return response - - # Use this method for talking to Gemini's AQA model using inline passages - # answer_style can be VERBOSE, ABSTRACTIVE, or EXTRACTIVE - def ask_aqa_model_using_local_vector_store( - self, - question, - results_num: int = 5, - answer_style: str = "VERBOSE", - ): - user_query_content = glm.Content(parts=[glm.Part(text=question)]) - verbose_prompt = "Question: " + question + "\n" - # Retrieves from chroma, using up to 30k tokens - max gemini model tokens - chroma_search_result, final_context = self.query_vector_store_to_build( - question=question, - token_limit=30000, - results_num=results_num, - max_sources=results_num, - ) - # Create the grounding inline passages - grounding_passages = glm.GroundingPassages() - i = 0 - aqa_search_result = [] - for item in chroma_search_result: - returned_context = item.section.content - new_passage = glm.Content(parts=[glm.Part(text=returned_context)]) - index_id = str("{:03d}".format(i + 1)) - i += 1 - grounding_passages.passages.append( - glm.GroundingPassage(content=new_passage, id=index_id) - ) - verbose_prompt += "\nID: " + index_id + "\n" + returned_context + "\n" - req = glm.GenerateAnswerRequest( - model="models/aqa", - contents=[user_query_content], - inline_passages=grounding_passages, - answer_style=answer_style, - ) - aqa_response = self.generative_service_client.generate_answer(req) - self.aqa_response_buffer = aqa_response - for item in chroma_search_result: - # Builds an object with sections + probability - aqa_search_result.append( - SectionProbability( - section=item.section, - probability=aqa_response.answerable_probability, - ) - ) - if self.config.log_level == "VERBOSE": - self.print_the_prompt(verbose_prompt) - elif self.config.log_level == "DEBUG": - self.print_the_prompt(verbose_prompt) - print(aqa_response) - try: - return aqa_response.answer.content.parts[0].text, aqa_search_result - except: - self.aqa_response_buffer = "" - return self.config.conditions.model_error_message, aqa_search_result - - # Get the save response of the AQA model - def get_saved_aqa_response_json(self): - return self.aqa_response_buffer - - # Retrieve the metadata dictionary from an AQA response grounding attribution entry - def get_aqa_response_metadata(self, aqa_response_item): - try: - chunk_resource_name = ( - aqa_response_item.source_id.semantic_retriever_chunk.chunk - ) - get_chunk_response = self.retriever_service_client.get_chunk( - name=chunk_resource_name - ) - metadata = get_chunk_response.custom_metadata - final_metadata = {} - for m in metadata: - if m.string_value: - value = m.string_value - elif m.numeric_value: - value = m.numeric_value - else: - value = "" - final_metadata[m.key] = value - except: - final_metadata = {} - return final_metadata - - # Use this method for talking to Gemini's AQA model using a corpus - # Answer style can be "VERBOSE" or ABSTRACTIVE, EXTRACTIVE - def ask_aqa_model_using_corpora( - self, question, corpus_name: str = "None", answer_style: str = "VERBOSE" - ): - search_result = [] - if corpus_name == "None": - corpus_name = self.corpus_name - # Prepare parameters for the AQA model - user_question_content = glm.Content( - parts=[glm.Part(text=question)], role="user" - ) - # Settings to retrieve grounding content from semantic retriever - retriever_config = glm.SemanticRetrieverConfig( - source=corpus_name, query=user_question_content - ) - - # Ask the AQA model. - req = glm.GenerateAnswerRequest( - model="models/aqa", - contents=[user_question_content], - semantic_retriever=retriever_config, - answer_style=answer_style, - ) - - try: - aqa_response = self.generative_service_client.generate_answer(req) - self.aqa_response_buffer = aqa_response - except: - self.aqa_response_buffer = "" - return self.config.conditions.model_error_message, search_result - - if self.config.log_level == "VERBOSE": - verbose_prompt = "[question]\n" + question + "\n" - verbose_prompt += ( - "\n[answerable_probability]\n" - + str(aqa_response.answerable_probability) - + "\n" - ) - for attribution in aqa_response.answer.grounding_attributions: - verbose_prompt += "\n[grounding_attributions]\n" + str( - attribution.content.parts[0].text - ) - self.print_the_prompt(verbose_prompt) - elif self.config.log_level == "DEBUG": - print(aqa_response) - try: - for item in aqa_response.answer.grounding_attributions: - metadata = self.get_aqa_response_metadata(item) - for part in item.content.parts: - metadata["content"] = part.text - section = markdown_splitter.DictionarytoSection(metadata) - search_result.append( - SectionProbability( - section=section, probability=aqa_response.answerable_probability - ) - ) - # Return the aqa_response object but also the actual text response - return aqa_response.answer.content.parts[0].text, search_result - except: - return self.config.conditions.model_error_message, search_result - - def ask_aqa_model(self, question): - response = "" - if self.config.db_type == "google_semantic_retriever": - response = self.ask_aqa_model_using_corpora(question) - else: - response = self.ask_aqa_model_using_local_vector_store(question) - return response - - # Retrieve and return chunks that are most relevant to the input question. - def retrieve_chunks_from_corpus(self, question, corpus_name: str = "None"): - if corpus_name == "None": - corpus_name = self.corpus_name - user_query = question - results_count = 5 - # Quick fix: This was needed to allow the method to be called - # even when the model is not set to `models/aqa`. - retriever_service_client = glm.RetrieverServiceClient() - # Make the request - request = glm.QueryCorpusRequest( - name=corpus_name, query=user_query, results_count=results_count - ) - query_corpus_response = retriever_service_client.query_corpus(request) - return query_corpus_response - - # Use this method for asking a Gemini content model for fact-checking - def ask_content_model_to_fact_check(self, context, prev_response): - question = self.config.conditions.fact_check_question + "\n\nText: " - question += prev_response - return self.ask_content_model_with_context(context, question) - - # Query the local Chroma vector database using the user question - def query_vector_store(self, question, num_returns: int = 5): - return self.collection.query(question, num_returns) - - # Add specific instruction as a prefix to the context - def add_instruction_to_context(self, context): - new_context = "" - new_context += self.config.conditions.condition_text + "\n\n" + context - return new_context - - # Add custom instruction as a prefix to the context - def add_custom_instruction_to_context(self, condition, context): - new_context = "" - new_context += condition + "\n\n" + context - return new_context - - # Return true if the aqa model used in this Docs Agent setup - def check_if_aqa_is_used(self): - if self.config.models.language_model == "models/aqa": - return True - else: - return False - - # Return the chroma collection name - def return_chroma_collection(self): - try: - return self.collection_name - except: - return None - - # Return the vector db name - def return_vector_db_dir(self): - try: - return self.vector_db_dir - except: - return None - - # Print the prompt on the terminal for debugging - def print_the_prompt(self, prompt): - print("#########################################") - print("# PROMPT #") - print("#########################################") - print(prompt) - print("#########################################") - print("# END OF PROMPT #") - print("#########################################") - print("\n") - - # Query the local Chroma vector database. Starts with the number of results - # from results - # Results_num is the initial result set based on distance to the question - # Max_sources is the number of those results_num to use to build a final - # context page - def query_vector_store_to_build( - self, - question: str, - token_limit: float = 30000, - results_num: int = 10, - max_sources: int = 4, - ): - # Looks for contexts related to a question that is limited to an int - # Returns a list - contexts_query = self.collection.query(question, results_num) - # This returns a list of results - build_context = contexts_query.returnDBObjList() - # Use the token limit and distances to assign a token limit for each - # page. For time being split evenly into top max_sources - token_limit_temp = token_limit / max_sources - token_limit_per_source = [] - i = 0 - for i in range(max_sources): - token_limit_per_source.append(token_limit_temp) - same_document = "" - same_metadata = "" - # Each item is a chunk result along with all of it's metadata - # We can use metadata to identify if one of these chunks comes from the - # same page, potentially indicating a better match, so more token allocation - # You can see these objects contents with .content, .document, .distance, .metadata - plain_content = "" - search_result = [] - same_pages = [] - # For each result make a SectionDistance object that includes the - # Section along with it's distance from the question - for item in build_context: - # Check if this page was previously added as a source, to avoid - # duplicate count. These signals should be used to give a page higher token limits - # Make a page based on the section_id (this is where the search - # found a match) - section = SectionDistance( - section=Section( - id=item.metadata.get("section_id", None), - name_id=item.metadata.get("name_id", None), - page_title=item.metadata.get("page_title", None), - section_title=item.metadata.get("section_title", None), - level=item.metadata.get("level", None), - previous_id=item.metadata.get("previous_id", None), - parent_tree=item.metadata.get("parent_tree", None), - token_count=item.metadata.get("token_estimate", None), - content=item.document, - md_hash=item.metadata.get("md_hash", None), - url=item.metadata.get("url", None), - origin_uuid=item.metadata.get("origin_uuid", None), - ), - distance=item.distance, - ) - search_result.append(section) - # From this you can run queries to find all chunks from the same page - # since they all share the same origin_uuid which is a hash of the - # original source file name - # Limits the number of results to go through - final_page_content = [] - final_page_token = [] - plain_token = 0 - sources = [] - final_pages = [] - # Quick fix: Ensure max_sources is not larger than the array size of search_result. - this_range = len(search_result) - if this_range > max_sources: - this_range = max_sources - for i in range(this_range): - # The current section that is being built - # eval turns str representation of array into an array - curr_section_id = search_result[i].section.name_id - curr_parent_tree = eval(search_result[i].section.parent_tree) - # Assigned token limit for this position in the list - page_token_limit = token_limit_per_source[i] - # Returns a FullPage which is just a list of Section - same_page = self.collection.getPageOriginUUIDList( - origin_uuid=search_result[i].section.origin_uuid - ) - same_pages.append(same_page) - # Use all sections in experimental, only self when "normal" - if self.config.docs_agent_config == "experimental": - test_page = same_page.buildSections( - section_id=search_result[i].section.id, - selfSection=True, - children=True, - parent=True, - siblings=True, - token_limit=token_limit_per_source[i], - ) - else: - test_page = same_page.buildSections( - section_id=search_result[i].section.id, - selfSection=True, - children=False, - parent=False, - siblings=False, - token_limit=token_limit_per_source[i], - ) - final_pages.append(test_page) - # Each item here is a FullPage corresponding to the source - final_context = "" - for item in final_pages: - for source in item.section_list: - final_context += source.content + "\n\n" - final_context = final_context.strip() - # Result contains the search result of Section of the initial hits - # final_pages could be returned to get the full Section for displaying - # context with metadata - return search_result, final_context - - # Use this method for talking to a Gemini content model - # Optionally provide a prompt, if not use the one from config.yaml - # If prompt is "fact_checker" it will use the fact_check_question from - # config.yaml for the prompt - def ask_content_model_with_context_prompt( - self, - context: str, - question: str, - prompt: typing.Optional[str] = None, - model: typing.Optional[str] = None, - ): - if prompt == None: - prompt = self.config.conditions.condition_text - elif prompt == "fact_checker": - prompt = self.config.conditions.fact_check_question - new_prompt = f"{prompt}\n\nContext:\n{context}\nQuestion:\n{question}" - # Print the prompt for debugging if the log level is VERBOSE. - if self.config.log_level == "VERBOSE": - self.print_the_prompt(new_prompt) - try: - response = "" - if model == "gemini-pro": - response = self.gemini_pro.generate_content(contents=new_prompt) - elif model == "gemini-1.5": - response = self.gemini_15.generate_content(contents=new_prompt) - else: - response = self.gemini.generate_content(contents=new_prompt) - except: - return self.config.conditions.model_error_message, new_prompt - # for chunk in response: - # if str(chunk.candidates[0].content) == "": - # return self.config.conditions.model_error_message, new_prompt - return response, new_prompt - - # Use this method for talking to a Gemini content model - # Provide a prompt, followed by the content of the file - # This isn't in use yet, but can be used to give an LLM a full or partial file - def ask_content_model_to_use_file(self, prompt: str, file: str): - new_prompt = prompt + file - # Print the prompt for debugging if the log level is VERBOSE. - if self.config.log_level == "VERBOSE": - self.print_the_prompt(new_prompt) - try: - response = self.gemini.generate_content(contents=new_prompt) - except google.api_core.exceptions.InvalidArgument: - return self.config.conditions.model_error_message - # for chunk in response: - # if str(chunk.candidates[0].content) == "": - # return self.config.conditions.model_error_message - return response - - # Use this method for asking a Gemini content model for fact-checking. - # This uses ask_content_model_with_context_prompt w - def ask_content_model_to_fact_check_prompt(self, context: str, prev_response: str): - question = self.config.conditions.fact_check_question + "\n\nText: " - question += prev_response - return self.ask_content_model_with_context_prompt( - context=context, question=question, prompt="" - ) - - # Generate an embedding given text input - def generate_embedding(self, text, task_type: str = "SEMANTIC_SIMILARITY"): - return self.gemini.embed(text, task_type)[0] - - -# Function to give an embedding function for gemini using an API key -def embedding_function_gemini_retrieval(api_key, embedding_model: str): - return embedding_functions.GoogleGenerativeAiEmbeddingFunction( - api_key=api_key, model_name=embedding_model, task_type="RETRIEVAL_QUERY" - ) diff --git a/examples/gemini/python/docs-agent/docs_agent/benchmarks/README.md b/examples/gemini/python/docs-agent/docs_agent/benchmarks/README.md deleted file mode 100644 index 07cb577f0..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/benchmarks/README.md +++ /dev/null @@ -1,164 +0,0 @@ -# Benchmark test for monitoring the quality of embeddings and AI responses - -This page explains how to run benchmark test to measure and track -the quality of embeddings, context chunks, and AI-generated responses. - -Docs Agent’s benchmark test currently uses 10 questions and their -target answers curated by technical writers (see -[`benchmarks.yaml`][benchmarks-yaml]). The benchmark test asks these -10 questions to an AI language model to generate responses. The test then -computes the dot product of the embeddings (vectors) of these AI-generated -responses and the target answer to measure their similarity values -(see Figure 1). - -![Docs Agent benchmark test](../../docs/images/docs-agent-benchmarks-01.png) - -**Figure 1**. The dot product of vectors is computed to measure their similarity. - -**Note**: The input questions and answers in the -[`benchmarks.yaml`][benchmarks-yaml] file checked in the Docs Agent project are -based on the [FAQ][flutter-faq] page on the Flutter documentation site, whose -source Markdown files are available in this [Flutter repository][flutter-git]). - -## Set up and run the benchmark test - -To set up and run benchmark test using Docs Agent, the steps are: - -1. [Prepare questions and target answers for your source docs](#1_prepare-questions-and-target-answers-for-your-source-docs). -2. [Set up Docs Agent](#2_set-up-docs-agent). -3. [Run the benchmark test](#3_run-the-benchmark-test). - -### 1. Prepare questions and target answers for your source docs - -List questions and target answers for your source docs in the `benchmarks.yaml` -file. - -An example of a question and target answer pair: - -```none - - question: "Does Flutter support Material Design?" - target_answer: "Yes! The Flutter and Material teams collaborate closely, and Material is fully supported. For more information, check out the Material 2 and Material 3 widgets in the widget catalog." -``` - -Based on the information documented in your source docs, come up -with a list of questions (`question`) and their expected answers -(`target_answer`). It’s important that these answers are found in -the source docs and are produced by humans, not AI models. - -For instance, the example [`benchmarks.yaml`][benchmarks-yaml] file includes -10 questions and 10 target answers that are based on the source documents in -the [Flutter repository][flutter-git]. So if you plan on running benchmark -test using this `benchmarks.yaml` file, you need to configure your -Docs Agent setup so that it uses the documents in the Flutter repository -as a knowledge source, for example: - -```yaml -inputs: - - path: "/usr/local/home/user01/website/src" - url_prefix: "https://docs.flutter.dev" -``` - -(Source: [`config.yaml`][config-yaml]) - -### 2. Set up Docs Agent - -Complete the processing of your source docs into Docs Agent’s vector -database (by running the `agent chunk` and `agent populate` commands). - -**Note**: This benchmark testing uses the same `config.yaml` file as the -chatbot app (that is, `condition_text`, `vector_db_dir`, and `log_level` -variables and so on). For instance, set `log_level` to `NORMAL` -if you do not wish to see the details of prompts to the AI model while -the benchmark test is running. - -### 3. Run the benchmark test - -To start benchmark test, run the following command from your Docs Agent -project home directory: - -```sh -agent benchmark -``` - -This command computes the similarity value for each question entry -in the `benchmarks.yaml` file and writes the test results -to the [`results.out`][results-out] file. If there already -exists a `results.out` file, its content will be overwritten. - -An example of test results: - -```none -Similarity (-1, 1) Question -================== ======== -0.9693597667161213 What is inside the Flutter SDK? -0.8810758779307981 Does Flutter work with any editors or IDEs? -0.8760932771858571 Does Flutter come with a framework? -0.8924252745816632 Does Flutter come with widgets? -0.8637181105900334 Does Flutter support Material Design? -0.9340505894484676 Does Flutter come with a testing framework? -0.9192416276439515 Does Flutter come with debugging tools? -0.7491969164696617 Does Flutter come with a dependency injection framework? -0.7895399136265219 What technology is Flutter built with? -0.7802681514431923 What language is Flutter written in? -``` - -**Note**: The similarity scores shown in the example above are -computed using only a small set of documents processed from the -Flutter respository. These scores may vary depending on which -documents are added into Docs Agent's knowledge source. - -## How does this benchmark test work? - -When Docs Agent's benchmark test is run, the following events -take place: - -1. Read a `question` and `target_answer` entry from the - [`benchmarks.yaml`][benchmarks-yaml] file. -2. Generate an embedding using `target_answer` (Embedding 1). -3. Ask `question` to the AI model using the RAG technique. -4. Generate an embedding using the AI-generated response - (Embedding 2). -5. Compute the similarity between Embedding 1 and Embedding 2. -6. Repeat the steps until all question entries are read. -7. Print the test results to the [`results.out`][results-out] file. - -## How is the similarity value computed? - -To measure the similarity, each benchmark test calculates the -dot product of the embedding (vector) generated from the target -answer and the embedding generated from the AI response. - -An example of a benchmark test result: - -```none -Question: -Does Flutter come with debugging tools? - -Target answer: -Yes, Flutter comes with Flutter DevTools (also called Dart DevTools). For more information, see Debugging with Flutter and the Flutter DevTools docs. - -AI Response: -Yes, Flutter has debugging tools. You can debug your app in a few ways: - - • Using DevTools, a suite of debugging and profiling tools that run in a browser and include the Flutter inspector. - • Using Android Studio's (or IntelliJ's) built-in debugging features, such as the ability to set breakpoints. - • Using the Flutter inspector, directly available in Android Studio and IntelliJ. - -Similarity: -0.9192416276439515 -``` - -This value estimates the similarity between the human-produced -and machine-generated answers. The closer the value is to 1, -the more similar they are. (For more information , see the -[Embedding guide][embedding-generation] page on the Gemini API -documentation site.) - - - -[benchmarks-yaml]: benchmarks.yaml -[config-yaml]: ../../config.yaml -[flutter-faq]: https://docs.flutter.dev/resources/faq -[flutter-git]: https://github.com/flutter/website/tree/main/src -[results-out]: results.out -[embedding-generation]: https://ai.google.dev/docs/embeddings_guide diff --git a/examples/gemini/python/docs-agent/docs_agent/benchmarks/__init__.py b/examples/gemini/python/docs-agent/docs_agent/benchmarks/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/gemini/python/docs-agent/docs_agent/benchmarks/benchmarks.yaml b/examples/gemini/python/docs-agent/docs_agent/benchmarks/benchmarks.yaml deleted file mode 100644 index a5a6e4c7e..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/benchmarks/benchmarks.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -### Configuration for Docs Agent benchmark tests ### - -# Source docs: https://github.com/flutter/website/tree/main/src (flutter.dev) -# -# These questions and target answers are captured from https://docs.flutter.dev/resources/faq. -# -# For this benchmark testing, use a config.yaml setup similar to the following: -# -# inputs: -# - path: "/usr/local/home/user01/website/src/ui" -# url_prefix: "https://docs.flutter.dev/ui" -# - path: "/usr/local/home/user01/website/src/tools" -# url_prefix: "https://docs.flutter.dev/tools" - - -benchmarks: - - question: "What is inside the Flutter SDK?" - target_answer: "Flutter includes: * Heavily optimized, mobile-first 2D rendering engine with excellent support for text * Modern react-style framework * Rich set of widgets implementing Material Design and iOS-style * APIs for unit and integration tests * Interop and plugin APIs to connect to the system and 3rd-party SDKs * Headless test runner for running tests on Windows, Linux, and Mac * Flutter DevTools (also called Dart DevTools) for testing, debugging, and profiling your app * Command-line tools for creating, building, testing, and compiling your apps" - - question: "Does Flutter work with any editors or IDEs?" - target_answer: "We provide plugins for VS Code, Android Studio, and IntelliJ IDEA. See editor configuration for setup details, and VS Code and Android Studio/IntelliJ for tips on how to use the plugins. Alternatively, you can use the flutter command from a terminal, along with one of the many editors that support editing Dart." - - question: "Does Flutter come with a framework?" - target_answer: "Yes! Flutter ships with a modern react-style framework. Flutter’s framework is designed to be layered and customizable (and optional). Developers can choose to use only parts of the framework, or even replace upper layers of the framework entirely." - - question: "Does Flutter come with widgets?" - target_answer: "Yes! Flutter ships with a set of high-quality Material Design and Cupertino (iOS-style) widgets, layouts, and themes. Of course, these widgets are only a starting point. Flutter is designed to make it easy to create your own widgets, or customize the existing widgets." - - question: "Does Flutter support Material Design?" - target_answer: "Yes! The Flutter and Material teams collaborate closely, and Material is fully supported. For more information, check out the Material 2 and Material 3 widgets in the widget catalog." - - question: "Does Flutter come with a testing framework?" - target_answer: "Yes, Flutter provides APIs for writing unit and integration tests. Learn more about testing with Flutter. We use our own testing capabilities to test our SDK, and we measure our test coverage on every commit." - - question: "Does Flutter come with debugging tools?" - target_answer: "Yes, Flutter comes with Flutter DevTools (also called Dart DevTools). For more information, see Debugging with Flutter and the Flutter DevTools docs." - - question: "Does Flutter come with a dependency injection framework?" - target_answer: "We don’t ship with an opinionated solution, but there are a variety of packages that offer dependency injection and service location, such as injectable, get_it, kiwi, and riverpod." - - question: "What technology is Flutter built with?" - target_answer: "Flutter is built with C, C++, Dart, Skia (a 2D rendering engine), and Impeller (the default rendering engine on iOS). See this architecture diagram for a better picture of the main components. For a more detailed description of the layered architecture of Flutter, read the architectural overview." - - question: "What language is Flutter written in?" - target_answer: "Dart, a fast-growing modern language optimized for client apps. The underlying graphics framework and the Dart virtual machine are implemented in C/C++." diff --git a/examples/gemini/python/docs-agent/docs_agent/benchmarks/results.out b/examples/gemini/python/docs-agent/docs_agent/benchmarks/results.out deleted file mode 100644 index 7418b92cc..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/benchmarks/results.out +++ /dev/null @@ -1,12 +0,0 @@ -Similarity (-1, 1) Question -================== ======== -0.9693597667161213 What is inside the Flutter SDK? -0.8810758779307981 Does Flutter work with any editors or IDEs? -0.8760932771858571 Does Flutter come with a framework? -0.8924252745816632 Does Flutter come with widgets? -0.8637181105900334 Does Flutter support Material Design? -0.9340505894484676 Does Flutter come with a testing framework? -0.9192416276439515 Does Flutter come with debugging tools? -0.7491969164696617 Does Flutter come with a dependency injection framework? -0.7895399136265219 What technology is Flutter built with? -0.7802681514431923 What language is Flutter written in? diff --git a/examples/gemini/python/docs-agent/docs_agent/benchmarks/run_benchmark_tests.py b/examples/gemini/python/docs-agent/docs_agent/benchmarks/run_benchmark_tests.py deleted file mode 100644 index fe15f9714..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/benchmarks/run_benchmark_tests.py +++ /dev/null @@ -1,223 +0,0 @@ -# -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Run benchmark tests to measure the quality of embeddings, context chunks, and AI responses""" - -import os -import sys -import yaml - -import numpy as np - -from rich.console import Console -from rich.markdown import Markdown -from rich.panel import Panel - -from docs_agent.storage.chroma import Format -from docs_agent.agents.docs_agent import DocsAgent -from docs_agent.utilities import config -from docs_agent.utilities.config import ProductConfig - - -# A function that asks the questin to the AI model using the RAG technique. -def ask_model(question: str, docs_agent: DocsAgent): - results_num = 5 - if "gemini" in docs_agent.config.models.language_model: - # print("Asking a Gemini model") - (search_result, final_context) = docs_agent.query_vector_store_to_build( - question=question, - token_limit=30000, - results_num=results_num, - max_sources=results_num, - ) - response, full_prompt = docs_agent.ask_content_model_with_context_prompt( - context=final_context, question=question - ) - elif "aqa" in docs_agent.config.models.language_model: - # print("Asking the AQA model") - if docs_agent.config.db_type == "google_semantic_retriever": - (response, search_result) = docs_agent.ask_aqa_model_using_corpora( - question=question - ) - elif docs_agent.config.db_type == "chroma": - ( - response, - search_result, - ) = docs_agent.ask_aqa_model_using_local_vector_store( - question=question, results_num=results_num - ) - else: - (response, search_result) = docs_agent.ask_aqa_model_using_corpora( - question=question - ) - return response - - -# A customized print function -def vprint(text: str, VERBOSE: bool = False): - if VERBOSE: - print(text) - - -# A function that computes cosine similarity between two vectors -def compute_cosine_similarity(v1, v2): - a = np.asarray(v1) - b = np.asarray(v2) - dot = np.dot(a, b) - a_norm = np.linalg.norm(a, 2) - b_norm = np.linalg.norm(b, 2) - cosine = dot / (a_norm * b_norm) - return cosine - - -# Read the `benchmarks.yaml` file in the `benchmarks` directory of the project. -def read_benchmarks_yaml(): - BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - BENCHMARKS_YAML = os.path.join(BASE_DIR, "benchmarks/benchmarks.yaml") - try: - with open(BENCHMARKS_YAML, "r", encoding="utf-8") as b_yaml: - read_values = yaml.safe_load(b_yaml) - except FileNotFoundError: - print("The " + BENCHMARKS_YAML + " file is missing.") - sys.exit(1) - return read_values - - -def run_benchmarks(): - # VERBOSE = False - BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - # Initialize Rich console - my_console = Console(width=160) - - # Read the configuration file (`config.yaml`) - config_file = config.ReadConfig().returnProducts() - # TODO: This benchmark test only selects the first product - # in the product list in the config file at the moment. - product = config_file.products[0] - print(f"===========================================") - print(f"Benchmark test target product: {product.product_name}") - print(f"===========================================") - - # Initialize Docs Agent - if product.db_type == "google_semantic_retriever": - docs_agent = DocsAgent(config=product, init_chroma=False) - else: - docs_agent = DocsAgent(config=product) - - # Read the `benchmarks.yaml` file. - benchmark_values = read_benchmarks_yaml() - - questions = [] - results = [] - index = 0 - print() - for benchmark in benchmark_values["benchmarks"]: - embedding_01 = "" - embedding_02 = "" - response = "" - similarity = "" - - # Step 1. Read a `question` and `target_answer` pair. - question = benchmark["question"] - target_answer = benchmark["target_answer"] - questions.append(question) - print("================") - print("Benchmark " + str(index)) - print("================") - print("Question: " + question) - print("Target answer: " + target_answer) - print() - - # Step 2. Generate an embedding using `target_answer` - Embedding 1. - vprint("################") - vprint("# Embedding 1 #") - vprint("################") - vprint("Input text:") - vprint(target_answer) - embedding_01 = docs_agent.generate_embedding(target_answer) - vprint("") - vprint("Embedding:") - vprint(str(embedding_01)) - - # Step 3. Ask `question` to the AI model. - response = ask_model(question, docs_agent) - vprint("################") - vprint("# Response #") - vprint("################") - vprint(response) - - # Step 4. Generate an embedding using the response - Embedding 2. - vprint("################") - vprint("# Embedding 2 #") - vprint("################") - vprint("Input text:") - vprint(response) - embedding_02 = docs_agent.generate_embedding(response) - vprint("") - vprint("Embedding:") - vprint(str(embedding_02)) - - # Step 5. Compute the similarity between Embedding 1 and Embedding 2. - vprint("################") - vprint("# Similarity #") - vprint("################") - similarity = compute_cosine_similarity(embedding_01, embedding_02) - vprint(similarity) - vprint("") - results.append(similarity) - - # Step 6. Print the summary of this run. - print("################") - print("# Result #") - print("################") - print("Question:") - my_console.print(Panel.fit(Markdown(question))) - print() - print("Target answer:") - my_console.print(Panel.fit(Markdown(target_answer))) - print() - print("AI Response:") - my_console.print(Panel.fit(Markdown(response))) - print() - print("Similarity:") - print(similarity) - print() - - index += 1 - - # Print the benchmark test results. - print("################################") - print("# Benchmark tests summary #") - print("################################") - print() - print("Similarity (-1, 1)" + " " + "Question") - print("==================" + " " + "========") - for i, q in enumerate(questions): - print(str("{:.16f}".format(results[i])) + " " + q) - print() - - # Store the benchmark test results into benchmarks/results.out. - BENCHMARKS_OUT = os.path.join(BASE_DIR, "benchmarks/results.out") - with open(BENCHMARKS_OUT, "w", encoding="utf-8") as outfile: - outfile.write("Similarity (-1, 1)" + " " + "Question\n") - outfile.write("==================" + " " + "========\n") - for i, q in enumerate(questions): - outfile.write(str("{:.16f}".format(results[i])) + " " + q + "\n") - print("Created " + BENCHMARKS_OUT + " to store the results of the benchmark tests.") - - -if __name__ == "__main__": - run_benchmarks() diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/README.md b/examples/gemini/python/docs-agent/docs_agent/interfaces/README.md deleted file mode 100644 index 7a3d83b13..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/README.md +++ /dev/null @@ -1,320 +0,0 @@ -# Set up Docs Agent CLI - -This guide provides instructions on setting up Docs Agent's command-line -interface (CLI) that allows you to ask questions from anywhere in a terminal. - -Using Docs Agent, you can configure your host machine's environment to make -the `gemini` command run from anywhere in your terminal. The `gemini` command -(which is an `alias` to Docs Agent's `agent tellme` command) reads a question -from the arguments, asks the [Gemini AQA][gemini-aqa] model, and prints its -response in the terminal. - -The example below shows that a user can run the `gemini` command directly -from a terminal: - -``` -user@user01:~$ gemini does Flutter support material design 3? - -As of the Flutter 3.16 release, Material 3 is enabled by default. - -To verify this information, see: - - • https://docs.flutter.dev/ui/design/material/index#more-information - -user@user01:~$ -``` - -In this setup guide, Docs Agent's AQA model is configured to use an example -online corpus. However, using the tools available in the Docs Agent project, -you can [create and populate a new corpus][populate-corpus] with your own -documents and adjust your Docs Agent configuration to use that corpus -instead – you can also [share the corpus][share-corpus] with other members -in your team. - -## 1. Prerequisites - -Setting up Docs Agent requires the following prerequisite items: - -- A Linux host machine - -- A [Google Cloud][google-cloud] project with the setup below: - - - An API key enabled with the Generative Language API (that is, - the [Gemini API][genai-doc-site]) - - - (**Optional**) [Authenticated OAuth client credentials][oauth-client] - stored on the host machine - -## 2 Update your host machine's environment - -1. Update the Linux package repositories on the host machine: - - ``` - sudo apt update - ``` - -2. Install the following dependencies: - - ``` - sudo apt install git pipx python3-venv - ``` - -3. Install `poetry`: - - ``` - pipx install poetry - ``` - -4. To add `$HOME/.local/bin` to your `PATH` variable, run the following - command: - - ``` - pipx ensurepath - ``` - -5. To set the Google API key as a environment variable, add the following - line to your `$HOME/.bashrc` file: - - ``` - export GOOGLE_API_KEY= - ``` - - Replace `` with the API key to the - [Gemini API][genai-doc-site]. - -6. Update your environment: - - ``` - source ~/.bashrc - ``` - -## 3. Authorize credentials for Docs Agent - -**Note**: This step may not be necessary if you already have OAuth client -credentials (via `gcloud`) stored on your host machine. - -**Note**: This step is **only necessary** if you plan on using the -`agent tellme` command to interact with your online corpora on Google Cloud. - -1. Download the `client_secret.json` file from your - [Google Cloud project][authorize-credentials]. - -2. Copy the `client_secret.json` file to your host machine. - -3. Install the Google Cloud SDK on your host machine: - - ``` - sudo apt install google-cloud-sdk - ``` - -4. To authenticate credentials, run the following command in the directory of - the host machine where the `client_secret.json` file is located: - - ``` - gcloud auth application-default login --client-id-file=client_secret.json --scopes='https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/generative-language.retriever' - ``` - - This command opens a browser and asks to log in using your Google account. - -5. Follow the instructions on the browser and click **Allow** to authenticate. - - This saves the authenticated credentials for Docs Agent - (`application_default_credentials.json`) in the `$HOME/.config/gcloud/` - directory of your host machine. - -## 4. Clone the Docs Agent project - -**Note**: This guide assumes that you're creating a new project directory -from your `$HOME` directory. - -1. Clone the following repo: - - ``` - git clone https://github.com/google/generative-ai-docs.git - ``` - -2. Go to the Docs Agent project directory: - - ``` - cd generative-ai-docs/examples/gemini/python/docs-agent - ``` - -3. Install dependencies using `poetry`: - - ``` - poetry install - ``` - -At this point, you can start using the `agent helpme`, `agent tellme`, -and `agent runtask` commands to interact with the Gemini models from -your terminal. For more information on these commands, see the -[Interacting with language models][cli-reference-helpme] section in -the CLI reference page. - -Proceed to the next section if you want to set up an alias for the -`agent tellme` command. - -## 5. Set up an alias to the gemini command - -**Note**: If your Docs Agent project is not cloned in the `$HOME` directory, -you need to edit the `scripts/tellme.sh` script in your `docs-agent` project directory. - -Update your shell environment so that the `gemini` command can be run -from anywhere in the terminal: - -1. (**Optional**) Open the `scripts/tellme.sh` file using a text editor, - for example: - - ``` - nano scripts/tellme.sh - ``` - - If necessary, adjust the path (`$HOME/docs-agent`) to match your - `docs-agent` project directory on the host machine: - - ``` - # IF NECESSARY, ADJUST THIS PATH TO YOUR `docs-agent` DIRECTORY. - docs_agent_dir="$HOME/docs-agent" - ``` - - Save the file and close the text editor. - -2. Add the following `alias` line to your `$HOME/.bashrc` file: - - ``` - alias gemini='$HOME/docs-agent/scripts/tellme.sh' - ``` - - Similarly, if necessary, you need to adjust the path - (`$HOME/docs-agent`) to match your the `docs-agent` project directory - on the host machine. - -3. Update your environment: - - ``` - source ~/.bashrc - ``` - -4. Now you can run the `gemini` command from anywhere in your terminal: - - ``` - gemini - ``` - - For example: - - ``` - user@user01:~/temp$ gemini does flutter support material design 3? - ``` - -## Appendices - -### Set up your terminal to run the helpme command - -**Note**: This is an experimental setup. - -This new feature allows you to freely navigate a codebase setup in your -terminal and asks Gemini to perform various tasks while automatically -referencing the output you see in your terminal. - -Similar to the `agent tellme` command, the `agent helpme` command allows you to -ask a question to Gemini directly from your terminal. However, unlike -the `tellme` command, the `helpme` command uses the Gemini Pro model -and doesn't depend on an online corpus to retrieve relevant context. -Instead, this `helpme` setup can read directly from the output of your terminal -(that is, the last 150 lines at the moment) and automatically adds it as context -to your prompt. - -These tasks include, but not limited to: - -- Rewrite `README` file to be instructional and linear. -- Rewrite `README` file to be more concise and better structured. -- Format `README` to collect reference links at the bottom. -- Write comments for a C++ source file. - -**Note**: Since this setup uses the Gemini Pro model, setting up OAuth on your -host machine is **not required**. - -To set up this `helpme` command in your terminal, do the following: - -1. (**Optional**) Open the `scripts/helpme.sh` file using a text editor, - for example: - - ``` - nano scripts/helpme.sh - ``` - - If necessary, adjust the path (`$HOME/docs-agent`) to match your - `docs-agent` project directory on the host machine: - - ``` - # IF NECESSARY, ADJUST THIS PATH TO YOUR `docs-agent` DIRECTORY. - docs_agent_dir="$HOME/docs-agent" - ``` - - Save the file and close the text editor. - -2. Add the following `alias` lines to your `$HOME/.bashrc` file: - - ``` - alias gemini-pro='$HOME/docs-agent/scripts/helpme.sh' - alias start_agent='script -f -o 200MiB -O /tmp/docs_agent_console_input' - alias stop_agent='exit' - ``` - - Similarly, if necessary, you need to adjust the path - (`$HOME/docs-agent`) to match your the `docs-agent` project directory - on the host machine. - -3. Update your environment: - - ``` - source ~/.bashrc - ``` - -4. When you are ready to let Docs Agent to read output from your terminal, - run the following command: - - ``` - start_agent - ``` - - **Note**: To stop this process, run `stop_agent`. - -5. Navigate to a directory in your terminal and use the `cat` command - (or `head` or `tail`) to print the content of a file to your terminal. - - (In fact, you can run any command that prints output to the terminal.) - - For example: - - ``` - user@user01:~/my-example-project$ cat test.cc - - ``` - -6. To use the latest output from your terminal, run the `gemini-pro` command - immediately after the output: - - ``` - gemini-pro - ``` - - For example: - - ``` - user@user01:~/my-example-project$ cat test.cc - - user@user01:~/my-example-project$ gemini-pro could you help me write comments for this C++ file above? - ``` - - - -[gemini-aqa]: https://ai.google.dev/docs/semantic_retriever -[populate-corpus]: ../preprocess/README.md -[share-corpus]: https://ai.google.dev/docs/semantic_retriever#share_the_corpus -[google-cloud]: https://console.cloud.google.com/ -[oauth-client]: https://ai.google.dev/docs/oauth_quickstart#set-cloud -[authorize-credentials]: https://ai.google.dev/docs/oauth_quickstart#authorize-credentials -[genai-doc-site]: https://ai.google.dev/docs/gemini_api_overview -[cli-reference-helpme]: ../../docs/cli-reference.md#interacting-with-language-models diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/__init__.py b/examples/gemini/python/docs-agent/docs_agent/interfaces/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/__init__.py b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/__init__.py deleted file mode 100644 index 58c4c46d9..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from flask import Flask -from docs_agent.interfaces.chatbot import chatui -from docs_agent.utilities import config - - -def create_app(product: config.ProductConfig, app_mode: str = "web"): - app = Flask(__name__) - app.register_blueprint(chatui.construct_blueprint(product_config=product, app_mode=app_mode)) - return app diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/chatui.py b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/chatui.py deleted file mode 100644 index 4b713050d..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/chatui.py +++ /dev/null @@ -1,605 +0,0 @@ -# -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -"""Chatbot web service for Docs Agent""" - -from flask import Blueprint, render_template, request, redirect, url_for, json, jsonify -import markdown -import markdown.extensions.fenced_code -import urllib -import os -import typing -from datetime import datetime -from absl import logging -import pytz -import uuid -import re - -from docs_agent.utilities.helpers import ( - parse_related_questions_response_to_html_list, - trim_section_for_page_link, - named_link_html, - md_to_html, -) -from docs_agent.utilities import config -from docs_agent.preprocess.splitters import markdown_splitter -from docs_agent.postprocess.docs_retriever import SectionProbability - -from docs_agent.storage.chroma import Format -from docs_agent.agents.docs_agent import DocsAgent - -from docs_agent.memory.logging import ( - log_question, - log_debug_info_to_file, - log_feedback_to_file, - log_like, - log_dislike, -) - - -# This is used to define the app blueprint using a productConfig -def construct_blueprint( - product_config: config.ProductConfig, app_mode: typing.Optional[str] = None -): - bp = Blueprint("chatui", __name__) - if product_config.db_type == "google_semantic_retriever": - if product_config.secondary_db_type == "chroma": - docs_agent = DocsAgent(config=product_config, init_chroma=True) - else: - # A local Chroma DB is not needed for the Semantic Retreiver only mode. - docs_agent = DocsAgent(config=product_config, init_chroma=False) - elif product_config.db_type == "none": - docs_agent = DocsAgent( - config=product_config, init_chroma=False, init_semantic=False - ) - else: - docs_agent = DocsAgent(config=product_config, init_chroma=True) - logging.info( - f"Launching the Flask app for product: {product_config.product_name} with app_mode: {app_mode}" - ) - # Assign templates and redirects - if app_mode == "web": - app_template = "chatui/index.html" - redirect_index = "chatui.index" - elif app_mode == "experimental": - app_template = "chatui-experimental/index.html" - redirect_index = "chatui-experimental.index" - elif app_mode == "widget": - app_template = "chatui-widget/index.html" - redirect_index = "chatui-widget.index" - elif app_mode == "full": - app_template = "chatui-full/index.html" - redirect_index = "chatui-full.index" - elif app_mode == "widget-pro": - app_template = "chatui-widget-pro/index.html" - redirect_index = "chatui-widget-pro.index" - else: - app_template = "chatui/index.html" - redirect_index = "chatui.index" - - @bp.route("/", methods=["GET", "POST"]) - def index(): - server_url = request.url_root.replace("http", "https") - return render_template( - app_template, - product=product_config.product_name, - server_url=server_url, - ) - - @bp.route("/api/ask-docs-agent", methods=["GET", "POST"]) - def api(): - try: - input = request.get_json() - if input["question"]: - ( - full_prompt, - response, - context, - search_result, - ) = ask_model_with_sources(input["question"], agent=docs_agent) - source_array = [] - # for source in search_result: - # source_array.append(source.returnDictionary()) - dictionary = { - "response": response, - "full_prompt": full_prompt, - "sources": source_array, - } - return jsonify(dictionary) - else: - error = "Must have a valid question key in your JSON" - return jsonify({"error": error}), 400 - except: - error = "Must be a valid JSON" - return jsonify({"error": error}), 400 - - @bp.route("/like", methods=["GET", "POST"]) - def like(): - if request.method == "POST": - json_data = json.loads(request.data) - uuid_found = str(json_data.get("uuid")).strip() - is_like = json_data.get("like") - if is_like != None: - log_like(is_like, uuid_found) - is_dislike = json_data.get("dislike") - if is_dislike != None: - log_dislike(is_dislike, uuid_found) - # Check if the server has the `debugs` directory. - debug_dir = "logs/debugs" - if os.path.exists(debug_dir): - log_feedback_to_file(uuid_found, is_like, is_dislike) - return "OK" - else: - return redirect(url_for(redirect_index)) - - @bp.route("/rewrite", methods=["GET", "POST"]) - def rewrite(): - # Create the 'rewrites' directory if it does not exist. - rewrites_dir = "rewrites" - is_exist = os.path.exists(rewrites_dir) - if not is_exist: - os.makedirs(rewrites_dir) - if request.method == "POST": - json_data = json.loads(request.data) - user_id = json_data.get("user_id") - question_captured = json_data.get("question") - original_response = json_data.get("original_response") - rewrite_captured = json_data.get("rewrite") - date_format = "%m%d%Y-%H%M%S" - date = datetime.now(tz=pytz.utc) - date = date.astimezone(pytz.timezone("US/Pacific")) - print( - "[" + date.strftime(date_format) + "] A user has submitted a rewrite." - ) - print("Submitted by: " + user_id + "\n") - print("# " + question_captured.strip() + "\n") - print("## Original response\n") - print(original_response.strip() + "\n") - print("## Rewrite\n") - print(rewrite_captured + "\n") - filename = ( - rewrites_dir - + "/" - + question_captured.strip() - .replace(" ", "-") - .replace("?", "") - .replace("'", "") - .lower() - + "-" - + date.strftime(date_format) - + ".md" - ) - with open(filename, "w", encoding="utf-8") as file: - file.write("Submitted by: " + user_id + "\n\n") - file.write("# " + question_captured.strip() + "\n\n") - file.write("## Original response\n\n") - file.write(original_response.strip() + "\n\n") - file.write("## Rewrite\n\n") - file.write(rewrite_captured + "\n") - file.close() - return "OK" - else: - return redirect(url_for(redirect_index)) - - @bp.route("/feedback", methods=["GET", "POST"]) - def feedback(): - # Create the 'feedback' directory if it does not exist. - feedback_dir = "feedback" - is_exist = os.path.exists(feedback_dir) - if not is_exist: - os.makedirs(feedback_dir) - if request.method == "POST": - json_data = json.loads(request.data) - user_id = json_data.get("user_id") - question = json_data.get("question") - response = json_data.get("response") - feedback = json_data.get("feedback") - date_format = "%m%d%Y-%H%M%S" - date = datetime.now(tz=pytz.utc) - date = date.astimezone(pytz.timezone("US/Pacific")) - print("[" + date.strftime(date_format) + "] A user has submitted feedback.") - print("Submitted by: " + user_id + "\n") - print("# " + question.strip() + "\n") - print("## Response\n") - print(response.strip() + "\n") - print("## Feedback\n") - print(feedback + "\n") - filename = ( - feedback_dir - + "/" - + question.strip() - .replace(" ", "-") - .replace("?", "") - .replace("'", "") - .lower() - + "-" - + date.strftime(date_format) - + ".md" - ) - with open(filename, "w", encoding="utf-8") as file: - file.write("Submitted by: " + user_id + "\n\n") - file.write("# " + question.strip() + "\n\n") - file.write("## Response\n\n") - file.write(response.strip() + "\n\n") - file.write("## Feedback\n\n") - file.write(feedback + "\n") - file.close() - return "OK" - else: - return redirect(url_for(redirect_index)) - - # Render a response page when the user asks a question - # using input text box. - @bp.route("/result", methods=["GET", "POST"]) - def result(): - if request.method == "POST": - question = request.form["question"] - return ask_model(question, agent=docs_agent, template=app_template) - else: - return redirect(url_for(redirect_index)) - - # Render a response page when the user clicks a question - # from the related questions list. - @bp.route("/question/", methods=["GET", "POST"]) - def question(ask): - if request.method == "GET": - question = urllib.parse.unquote_plus(ask) - return ask_model(question, agent=docs_agent, template=app_template) - else: - return redirect(url_for(redirect_index)) - - # Render the log view page. - @bp.route("/logs", methods=["GET", "POST"]) - def logs(): - return show_logs(agent=docs_agent) - - # Render the debug view page. - @bp.route("/debugs/", methods=["GET", "POST"]) - def debugs(filename): - if request.method == "GET": - filename = urllib.parse.unquote_plus(filename) - return show_debug_info(agent=docs_agent, filename=filename) - else: - return redirect(url_for(redirect_index)) - - return bp - - -# Go through the `seatch_result` object returned from the AQA model -# and extract context. -def extract_context_from_search_result(search_result): - context = "" - context_count = 0 - for item in search_result: - context_count += 1 - # Add a "Reference[#]" line at the end of each context. - context += item.section.content + "\nReference [" + str(context_count) + "]\n\n" - context = context.strip() - return context - - -# Construct a set of prompts using the user question, send the prompts to -# the lanaguage model, receive responses, and present them into a page. -# Use template to specify a custom template for the classic web UI -def ask_model(question, agent, template: str = "chatui/index.html"): - # Returns a built context, a total token count of the context and an array - # of sourceOBJ - full_prompt = "" - final_context = "" - docs_agent = agent - new_question_count = 5 - results_num = 5 - aqa_response_in_html = "" - - # Debugging feature: Do not log this question if it ends with `?do_not_log`. - can_be_logged = True - question_match = re.search(r"^(.*)\?do_not_log$", question) - if question_match: - # Update the question to remove `do_not_log`. - question = question_match[1] + "?" - can_be_logged = False - - # Retrieve context and ask the question. - if ( - docs_agent.config.app_mode == "full" - or docs_agent.config.app_mode == "widget-pro" - or "aqa" in docs_agent.config.models.language_model - ): - # For "full" and "pro" modes, use the AQA model for the first request. - # For the AQA model, check the DB type. - if docs_agent.config.db_type == "chroma": - ( - response, - search_result, - ) = docs_agent.ask_aqa_model_using_local_vector_store( - question=question, results_num=results_num - ) - else: - (response, search_result) = docs_agent.ask_aqa_model_using_corpora( - question=question - ) - # Extract context from this AQA model's response. - final_context = extract_context_from_search_result(search_result) - # Save this AQA model's response. - aqa_response_json = docs_agent.get_saved_aqa_response_json() - # Convert this AQA model's response to HTML for better rendering. - if aqa_response_json: - aqa_response_in_html = json.dumps( - type(aqa_response_json).to_dict(aqa_response_json), indent=2 - ) - else: - # For the `gemini-*` model, alway use the Chroma database. - if docs_agent.config.docs_agent_config == "experimental": - results_num = 10 - new_question_count = 5 - else: - results_num = 5 - new_question_count = 5 - # Note: Error if max_sources > results_num, so leave the same for now. - if docs_agent.config.db_type == "none": - search_result = [] - final_context = "" - # response = ask_content_model_with_context(context="", question=question) - # Issue if max_sources > results_num, so leave the same for now - else: - this_token_limit = 30000 - if docs_agent.config.models.language_model.startswith("models/gemini-1.5"): - this_token_limit = 50000 - search_result, final_context = docs_agent.query_vector_store_to_build( - question=question, - token_limit=this_token_limit, - results_num=results_num, - max_sources=results_num, - ) - try: - response, full_prompt = docs_agent.ask_content_model_with_context_prompt( - context=final_context, question=question - ) - aqa_response_in_html = "" - except: - logging.error("Failed to ask content model with context prompt.") - - ### Check the AQA model's answerable_probability field - probability = "None" - if docs_agent.check_if_aqa_is_used(): - aqa_response = docs_agent.get_saved_aqa_response_json() - try: - probability = aqa_response.answerable_probability - except: - probability = 0.0 - - # For "full" and "pro" modes, retrieve additional context from - # the secondary knowledge database. - additional_context = "" - if ( - docs_agent.config.app_mode == "full" - or docs_agent.config.app_mode == "widget-pro" - ): - if docs_agent.config.secondary_db_type == "chroma": - ( - additional_search_result, - additional_context, - ) = docs_agent.query_vector_store_to_build( - question=question, - token_limit=30000, - results_num=5, - max_sources=5, - ) - # Extract context from this search result. - additional_context = extract_context_from_search_result( - additional_search_result - ) - elif docs_agent.config.secondary_db_type == "google_semantic_retriever": - ( - additional_response, - additional_search_result, - ) = docs_agent.ask_aqa_model_using_corpora( - question=question, - corpus_name=str(docs_agent.config.secondary_corpus_name), - ) - # Extract context from this search result. - additional_context = extract_context_from_search_result( - additional_search_result - ) - - ### PROMPT: GET RELATED QUESTIONS. - # 1. Use the response from Prompt 1 as context and add a custom condition. - # 2. Prepare a new question asking the model to come up with 5 related questions. - # 3. Ask the language model with the new question. - # 4. Parse the model's response into a list in HTML format. - new_condition = f"Read the context below and answer the question at the end:" - new_question = f"Can you think of {new_question_count} questions whose answers can be found in the context above?" - try: - ( - related_questions_response, - new_prompt_questions, - ) = docs_agent.ask_content_model_with_context_prompt( - context=final_context, - question=new_question, - prompt=new_condition, - model="gemini-pro", - ) - # Clean up the response to a proper html list - related_questions = parse_related_questions_response_to_html_list( - markdown.markdown(related_questions_response) - ) - except: - related_questions = "" - logging.error("Failed to ask content model with context prompt.") - - ### PREPARE OTHER ELEMENTS NEEDED BY UI. - # - Create a uuid for this request. - # - A workaround to get the server's URL to work with the rewrite and like features. - new_uuid = uuid.uuid1() - server_url = request.url_root.replace("http", "https") - - ### The code below is added for "full" and "pro" modes. - # Ask the model to generate the main response. - if ( - docs_agent.config.app_mode == "full" - or docs_agent.config.app_mode == "widget-pro" - ) and docs_agent.config.db_type != "none": - if additional_context != "": - extended_context = f"RELEVANT CONTEXT FOUND IN SECONDARY KNOWLEDGE SOURCE:\n\n{additional_context}\n\nRELEVANT CONTEXT FOUND IN PRIMARY KNOWLEDGE SOURCE:\n\n{final_context}\n" - else: - extended_context = f"{final_context}\n" - additional_condition = ( - "DO NOT INCLUDE THE NAMES OF PEOPLE FOUND IN CONVERSATIONS" - ) - new_condition = f"Read the context below and provide a detailed overview to address the question at the end ({additional_condition}):" - ( - summary_response, - summary_prompt, - ) = docs_agent.ask_content_model_with_context_prompt( - context=extended_context, - question=question, - prompt=new_condition, - model="gemini-1.5", - ) - log_lines = f"{response}\n\n{summary_response}" - else: - summary_response = "" - log_lines = f"{response}" - - ### LOG THIS REQUEST. - if can_be_logged: - if docs_agent.config.enable_logs_to_markdown == "True": - log_question( - new_uuid, - question, - log_lines, - probability, - save=True, - logs_to_markdown="True", - ) - else: - log_question(new_uuid, question, log_lines, probability, save=True) - # Log debug information. - - if docs_agent.config.enable_logs_for_debugging == "True": - top_source_url = "" - if len(search_result) > 0: - top_source_url = search_result[0].section.url - source_urls = "" - index = 1 - for result in search_result: - source_urls += "[" + str(index) + "]: " + str(result.section.url) + "\n" - index += 1 - log_debug_info_to_file( - uid=new_uuid, - user_question=question, - response=log_lines, - context=final_context, - top_source_url=top_source_url, - source_urls=source_urls, - probability=probability, - server_url=server_url, - ) - - ### Check the feedback mode in the `config.yaml` file. - feedback_mode = "feedback" - if hasattr(docs_agent.config, "feedback_mode"): - feedback_mode = str(docs_agent.config.feedback_mode) - - return render_template( - template, - question=question, - response=response, - related_questions=related_questions, - product=docs_agent.config.product_name, - server_url=server_url, - uuid=new_uuid, - aqa_response_in_html=aqa_response_in_html, - named_link_html=named_link_html, - trim_section_for_page_link=trim_section_for_page_link, - md_to_html=md_to_html, - final_context=final_context, - search_result=search_result, - summary_response=summary_response, - feedback_mode=feedback_mode, - ) - - -# Not fully implemented -# This method is used for the API endpoint, so it returns values that can be -# packaged as JSON -def ask_model_with_sources(question, agent): - docs_agent = agent - full_prompt = "" - search_result, context = docs_agent.query_vector_store_to_build( - question=question, token_limit=30000, results_num=10, max_sources=10 - ) - context_with_instruction = docs_agent.add_instruction_to_context(context) - if "gemini" in docs_agent.get_language_model_name(): - response, full_prompt = docs_agent.ask_content_model_with_context_prompt( - context=context, question=question - ) - else: - response = docs_agent.ask_text_model_with_context( - context_with_instruction, question - ) - - return full_prompt, response, context, search_result - - -# Display a page showing logs -def show_logs(agent, template: str = "admin/logs.html"): - docs_agent = agent - product = docs_agent.config.product_name - log_filename = "logs/chatui_logs.txt" - answerable_log_filename = "logs/answerable_logs.txt" - log_contents = "" - answerable_contents = "" - if docs_agent.config.enable_show_logs == "True": - try: - with open(log_filename, "r", encoding="utf-8") as file: - log_contents = file.read() - except: - log_contents = "Cannot find or open log files." - try: - with open(answerable_log_filename, "r", encoding="utf-8") as file: - answerable_contents = file.read() - except: - answerable_contents = ( - "Cannot find or open a file that contains answerable scores." - ) - return render_template( - template, - product=product, - logs=log_contents, - answerable_logs=answerable_contents, - ) - - -# Display a page showing debug information. -def show_debug_info(agent, filename: str, template: str = "admin/debugs.html"): - docs_agent = agent - product = docs_agent.config.product_name - debug_dir = "logs/debugs" - debug_filename = f"{debug_dir}/{filename}" - debug_info = "" - if docs_agent.config.enable_logs_for_debugging == "True": - try: - if debug_filename.endswith("txt"): - with open(debug_filename, "r", encoding="utf-8") as file: - debug_info = file.read() - except: - debug_info = "Cannot find or open this file." - return render_template( - template, - product=product, - debug_info=debug_info, - ) diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/chatbox.css b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/chatbox.css deleted file mode 120000 index 44ad5badf..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/chatbox.css +++ /dev/null @@ -1 +0,0 @@ -../../../../../third_party/css/chatbox.css \ No newline at end of file diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-full.css b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-full.css deleted file mode 100644 index 4cdb442e3..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-full.css +++ /dev/null @@ -1,625 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ======= General style for HTML elements ======= */ - -body { - font: 16px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - color: #333; - font-weight: 300; - max-width: 960px; - margin: auto; - background-color: #d9d9d9; - padding-top: 15px; - padding-bottom: 15px; - } - - a { - color: #0a619a; - } - - p { - margin: 0 0 1em; - line-height: 130%; - } - - h1 { - margin: 0 0 0.5em; - font-weight: 500; - font-size: 1.3em; - margin-left: 1.0em; - margin-top: 0.3em; - } - - h2 { - margin: 0; - margin-top: 17px; - margin-bottom: 15px; - font-weight: normal; - font-size: 1.5em; - } - - h3 { - margin: 0; - margin-top: 10px; - margin-bottom: 10px; - } - - h4 { - color: #505050; - margin-top: 3px; - margin-left: 5px; - margin-bottom: 8px; - } - - li { - margin: 0 0 0.3em; - } - - code { - font-family: math; - color: darkgreen; - text-wrap: pretty; - } - - /* ======= Style layout by ID ======= */ - - #callout-box { - margin: auto; - max-width: 800px; - font: 13px arial, sans-serif; - background-color: white; - border-style: solid; - border-width: 1px; - padding: 10px 25px; - box-shadow: 5px 5px 5px grey; - border-radius: 15px; - } - - #important-box { - font-size: 0.9em; - font-family: system-ui; - line-height: 150%; - word-break: break-word; - #padding: 4px; - padding-top: 5px; - padding-bottom: 5px; - padding-left: 10px; - padding-right: 10px; - background-color: #fcb8a1; - border-radius: 5px; - border-width: 2px; - border-style: solid; - } - - #tldr-response-box { - font-size: 0.8em; - font-family: sans-serif; - line-height: 140%; - margin-top: 10px; - margin-bottom: 25px; - border-width: 2px; - border-style: solid; - padding-top: 5px; - padding-bottom: 5px; - padding-left: 10px; - padding-right: 10px; - background-color: #b1d8f1; - border-radius: 5px; - border-width: 2px; - border-style: solid; - } - - #response-box { - font-size: 1.0em; - font-family: sans-serif; - line-height: 140%; - margin-top: 10px; - } - #suggested-questions { - font-family: sans-serif; - word-break: break-word; - } - - #context-content{ - background: #d7dbd7; - font-family: sans-serif; - word-break: break-all; - } - - #context-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - margin-top: 0.3px; - } - - #probability-box { - font-size: small; - padding: 4px; - margin-bottom: 10px; - } - - #grounding-box { - font-size: small; - padding: 4px; - word-break: break-all; - } - - #grounding-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - } - - #reference-box { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - margin-bottom: 12px; - line-height: 1.5em; - } - - #reference-box-no-aqa { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - line-height: 1.5em; - } - - #aqa-content{ - background: #9fc7db; - font-family: math; - } - - #aqa-label{ - background: #49a5d2; - } - - #aqa-json { - font-family: system-ui; - font-size: small; - text-wrap: pretty; - word-break: break-all; - margin: 0; - } - - #rewrite-buttons-box { - margin-top: 12px; - } - - #feedback-buttons-box { - margin-top: 12px; - } - - #answerable-span { - font-size: small; - font-family: system-ui; - float: right; - padding: 10px; - } - - /* ======= Style by class ======= */ - - .hidden { - display: none; - } - - .disable { - display: none; - } - - .header-wrapper { - display: flex; - } - - .loading { - font: 15px arial, sans-serif; - width: 100%; - margin-left: 12px; - color: #505050; - padding: 2px; - } - - .notselected { - background-color: #303936e6; - padding-top: 3px; - padding-bottom: 5px; - } - - .notselected:hover { - background-color: #121a17e6; - cursor:pointer; - } - - #like-button.selected { - background-color: #1e6a9c; - padding-top: 7px; - padding-bottom: 7px; - } - - #dislike-button.selected { - background-color: #CF5C3F; - padding-top: 7px; - padding-bottom: 7px; - } - - .selected:hover { - background-color: #0a619a; - cursor:pointer; - } - - .rewrite { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .feedback { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .question, .response, .response-text, .fact-checked-text { - max-width: 700px; - margin-left: 3px; - } - - .full-response { - max-width: 700px; - margin-left: 10px; - } - - .related-questions { - margin-bottom: 20px; - font-size: 0.9em; - line-height: 140%; - } - - /* ======= Style buttons by ID ======= */ - - #rewrite-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #rewrite-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #feedback-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #like-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #dislike-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #submit-result { - color: #027f02d6; - font-family: fantasy; - } - #feedback-submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #feedback-submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-submit-result { - color: #027f02d6; - font-family: fantasy; - } - - #edit-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 650px; - padding: 8px; - } - - #feedback-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 580px; - padding: 8px; - } - - #rewrite-question-header { - margin: 0; - margin-bottom: 5px; - } - - #rewrite-response-header { - margin: 0; - margin-top: 10px; - margin-bottom: 5px; - } - - #user-id { - margin: 0; - margin-top: 10px; - margin-bottom: 15px; - } - - #fact-check-url { - margin: 0 0 0.7em; - } - - #source-para { - margin: 0 0 0.7em; - } - - #distance-para { - margin: 0 0 0.7em; - font: 11px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - } - /* ======= Search Box ======= */ - - .search { - border: 2px solid #CF5C3F; - overflow: auto; - max-width: 700px; - margin-top: 15px; - margin-left: 10px; - margin-bottom: 10px; - border-radius: 5px; - } - - .search input[type="text"] { - border: 0; - width: calc(100% - 65px); - padding: 10px; - } - - .search input[type="text"]:focus { - outline: 0; - } - - .search input[type="submit"] { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - float: right; - padding: 10px; - -moz-border-radius-top-right: 5px; - -webkit-border-radius-top-right: 5px; - -moz-border-radius-bottom-right: 5px; - -webkit-border-radius-bottom-right: 5px; - cursor:pointer; - } - - /* ======= Accordion ======= */ - - .accordion { - max-width: 65em; - #margin-bottom: 1em; - } - - .accordion > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion .reference-content { - font-size: 15px; - font-family: serif; - } - - .accordion > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion .handle { - margin: 0; - font-size: 1.0em; - } - - .accordion label { - display: block; - font-weight: normal; - border: 2px solid #000; - #padding: 12px; - background: #4490b8ab; - #border-radius: 15px; - padding: 5px; - #background: #027f023b; - border-radius: 10px; - } - - .accordion label:hover, - .accordion label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: 1.556em; - vertical-align: middle; - } - - .accordion > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion p:last-child { - margin-bottom: 0; - } - - /* ======= Accordion Source ======= */ - - .accordion-source { - max-width: 65em; - margin-bottom: 1em; - } - - .accordion-source > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion-source .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion-source .content{ - font-size: 13px; - } - - .accordion-source > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion-source .handle { - margin: 0; - font-size: 1em; - line-height: 1.2em; - } - - .accordion-source label { - display: block; - font-weight: normal; - border: 1px solid #000; - padding: 6px; - background: #4490b8ab; - border-radius: 15px; - } - - .accordion-source label:hover, - .accordion-source label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion-source .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: .556em; - vertical-align: middle; - } - - .accordion-source > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion-source p:last-child { - margin-bottom: 0; - } - -/* Loader animation */ -/* Source: https://css-loaders.com/classic/ */ -.loader { - width: fit-content; - font-family: monospace; - font-size: 14px; - margin-left: 13px; - clip-path: inset(0 3ch 0 0); - animation: animation 1s steps(4) infinite; -} -.loader:before { - content:"Generating a response..." -} -@keyframes animation {to{clip-path: inset(0 -1ch 0 0)}} diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-widget-pro.css b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-widget-pro.css deleted file mode 100644 index b343fb6e1..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-widget-pro.css +++ /dev/null @@ -1,635 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ======= General style for HTML elements ======= */ - -body { - font: 16px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - color: #333; - font-weight: 300; - max-width: 960px; - margin: auto; - background-color: white; - padding-top: 15px; - padding-bottom: 15px; - } - - a { - color: #0a619a; - } - - p { - margin: 0 0 1em; - line-height: 130%; - } - - h1 { - margin: 0 0 0.5em; - font-weight: 500; - font-size: 1.3em; - margin-top: 0.1em; - margin-left: 1.0em; - margin-bottom: 0.9em; - } - - h2 { - margin: 0; - margin-top: 15px; - margin-bottom: 10px; - font-weight: normal; - font-size: 1.4em; - } - - h3 { - margin: 0; - margin-top: 10px; - margin-bottom: 10px; - } - - h4 { - color: #505050; - margin-top: 3px; - margin-left: 5px; - margin-bottom: 8px; - } - - li { - margin: 0 0 0.3em; - } - - code { - font-family: math; - color: darkgreen; - text-wrap: pretty; - } - - /* ======= Style layout by ID ======= */ - - #iframe-box { - margin: 0px; - max-width: 760px; - font: 15px arial, sans-serif; - background-color: white; - padding-bottom: 0px; - padding-left: 0px; - } - - #callout-box { - margin: auto; - max-width: 800px; - font: 13px arial, sans-serif; - background-color: white; - border-style: solid; - border-width: 1px; - padding: 10px 25px; - box-shadow: 5px 5px 5px grey; - border-radius: 15px; - } - - #important-box { - font-size: 0.9em; - font-family: system-ui; - line-height: 150%; - word-break: break-word; - #padding: 4px; - padding-top: 5px; - padding-bottom: 5px; - padding-left: 10px; - padding-right: 10px; - background-color: #fcb8a1; - border-radius: 5px; - border-width: 2px; - border-style: solid; - } - - #tldr-response-box { - font-size: 0.8em; - font-family: sans-serif; - line-height: 140%; - margin-top: 10px; - margin-bottom: 25px; - border-width: 2px; - border-style: solid; - padding-top: 5px; - padding-bottom: 5px; - padding-left: 10px; - padding-right: 10px; - background-color: #b1d8f1; - border-radius: 5px; - border-width: 2px; - border-style: solid; - } - - #response-box { - font-size: 1.0em; - font-family: sans-serif; - line-height: 140%; - margin-top: 10px; - } - #suggested-questions { - font-family: sans-serif; - word-break: break-word; - } - - #context-content{ - background: #d7dbd7; - font-family: sans-serif; - word-break: break-all; - } - - #context-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - margin-top: 0.3px; - } - - #probability-box { - font-size: small; - padding: 4px; - margin-bottom: 10px; - } - - #grounding-box { - font-size: small; - padding: 4px; - word-break: break-all; - } - - #grounding-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - } - - #reference-box { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - margin-bottom: 12px; - line-height: 1.5em; - } - - #reference-box-no-aqa { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - line-height: 1.5em; - } - - #aqa-content{ - background: #9fc7db; - font-family: math; - } - - #aqa-label{ - background: #49a5d2; - } - - #aqa-json { - font-family: system-ui; - font-size: small; - text-wrap: pretty; - word-break: break-all; - margin: 0; - } - - #rewrite-buttons-box { - margin-top: 12px; - } - - #feedback-buttons-box { - margin-top: 12px; - } - - #answerable-span { - font-size: small; - font-family: system-ui; - float: right; - padding: 10px; - } - - /* ======= Style by class ======= */ - - .hidden { - display: none; - } - - .disable { - display: none; - } - - .header-wrapper { - display: flex; - } - - .loading { - font: 15px arial, sans-serif; - width: 100%; - margin-left: 12px; - color: #505050; - padding: 2px; - } - - .notselected { - background-color: #303936e6; - padding-top: 3px; - padding-bottom: 5px; - } - - .notselected:hover { - background-color: #121a17e6; - cursor:pointer; - } - - #like-button.selected { - background-color: #1e6a9c; - padding-top: 7px; - padding-bottom: 7px; - } - - #dislike-button.selected { - background-color: #CF5C3F; - padding-top: 7px; - padding-bottom: 7px; - } - - .selected:hover { - background-color: #0a619a; - cursor:pointer; - } - - .rewrite { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .feedback { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .question, .response, .response-text, .fact-checked-text { - max-width: 700px; - margin-left: 3px; - } - - .full-response { - max-width: 700px; - margin-left: 10px; - } - - .related-questions { - margin-bottom: 20px; - font-size: 0.9em; - line-height: 140%; - } - - /* ======= Style buttons by ID ======= */ - - #rewrite-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #rewrite-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #feedback-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #like-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #dislike-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #submit-result { - color: #027f02d6; - font-family: fantasy; - } - #feedback-submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #feedback-submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-submit-result { - color: #027f02d6; - font-family: fantasy; - } - - #edit-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 650px; - padding: 8px; - } - - #feedback-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 580px; - padding: 8px; - } - - #rewrite-question-header { - margin: 0; - margin-bottom: 5px; - } - - #rewrite-response-header { - margin: 0; - margin-top: 10px; - margin-bottom: 5px; - } - - #user-id { - margin: 0; - margin-top: 10px; - margin-bottom: 15px; - } - - #fact-check-url { - margin: 0 0 0.7em; - } - - #source-para { - margin: 0 0 0.7em; - } - - #distance-para { - margin: 0 0 0.7em; - font: 11px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - } - /* ======= Search Box ======= */ - - .search { - border: 2px solid #CF5C3F; - overflow: auto; - max-width: 700px; - margin-top: 15px; - margin-left: 10px; - margin-bottom: 10px; - border-radius: 5px; - } - - .search input[type="text"] { - border: 0; - width: calc(100% - 65px); - padding: 10px; - } - - .search input[type="text"]:focus { - outline: 0; - } - - .search input[type="submit"] { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - float: right; - padding: 10px; - -moz-border-radius-top-right: 5px; - -webkit-border-radius-top-right: 5px; - -moz-border-radius-bottom-right: 5px; - -webkit-border-radius-bottom-right: 5px; - cursor:pointer; - } - - /* ======= Accordion ======= */ - - .accordion { - max-width: 65em; - #margin-bottom: 1em; - } - - .accordion > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion .reference-content { - font-size: 15px; - font-family: serif; - } - - .accordion > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion .handle { - margin: 0; - font-size: 1.0em; - } - - .accordion label { - display: block; - font-weight: normal; - border: 2px solid #000; - #padding: 12px; - background: #4490b8ab; - #border-radius: 15px; - padding: 5px; - #background: #027f023b; - border-radius: 10px; - } - - .accordion label:hover, - .accordion label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: 1.556em; - vertical-align: middle; - } - - .accordion > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion p:last-child { - margin-bottom: 0; - } - - /* ======= Accordion Source ======= */ - - .accordion-source { - max-width: 65em; - margin-bottom: 1em; - } - - .accordion-source > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion-source .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion-source .content{ - font-size: 13px; - } - - .accordion-source > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion-source .handle { - margin: 0; - font-size: 1em; - line-height: 1.2em; - } - - .accordion-source label { - display: block; - font-weight: normal; - border: 1px solid #000; - padding: 6px; - background: #4490b8ab; - border-radius: 15px; - } - - .accordion-source label:hover, - .accordion-source label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion-source .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: .556em; - vertical-align: middle; - } - - .accordion-source > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion-source p:last-child { - margin-bottom: 0; - } - -/* Loader animation */ -/* Source: https://css-loaders.com/classic/ */ -.loader { - width: fit-content; - font-family: monospace; - font-size: 14px; - margin-left: 13px; - clip-path: inset(0 3ch 0 0); - animation: animation 1s steps(4) infinite; -} -.loader:before { - content:"Generating a response..." -} -@keyframes animation {to{clip-path: inset(0 -1ch 0 0)}} diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-widget.css b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-widget.css deleted file mode 100644 index 9658ef6a7..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui-widget.css +++ /dev/null @@ -1,624 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ======= General style for HTML elements ======= */ - -body { - font: 16px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - color: #333; - font-weight: 300; - max-width: 960px; - margin: auto; - background-color: white; - padding-top: 15px; - padding-bottom: 15px; - } - - a { - color: #0a619a; - } - - p { - margin: 0 0 1em; - line-height: 130%; - } - - h1 { - margin: 0 0 0.5em; - font-weight: 500; - font-size: 1.3em; - margin-top: 0.1em; - margin-left: 1.0em; - margin-bottom: 0.9em; - } - - h2 { - margin: 0; - margin-top: 15px; - margin-bottom: 10px; - font-weight: normal; - font-size: 1.4em; - } - - h3 { - margin: 0; - margin-top: 10px; - margin-bottom: 10px; - } - - h4 { - color: #505050; - margin-top: 3px; - margin-left: 5px; - margin-bottom: 8px; - } - - li { - margin: 0 0 0.3em; - } - - code { - font-family: math; - color: darkgreen; - text-wrap: pretty; - } - - /* ======= Style layout by ID ======= */ - - #iframe-box { - margin: 0px; - max-width: 760px; - font: 15px arial, sans-serif; - background-color: white; - padding-bottom: 0px; - padding-left: 0px; - } - - #callout-box { - margin: auto; - max-width: 800px; - font: 13px arial, sans-serif; - background-color: white; - border-style: solid; - border-width: 1px; - padding: 10px 25px; - box-shadow: 5px 5px 5px grey; - border-radius: 15px; - } - - #important-box { - font-size: 0.9em; - font-family: system-ui; - word-break: break-word; - line-height: 150%; - word-break: break-word; - padding: 4px; - } - - #response-box { - font-size: 1.0em; - font-family: sans-serif; - line-height: 140%; - margin-top: 10px; - } - - #suggested-questions { - font-family: sans-serif; - word-break: break-word; - } - - #source-pages { - font-family: sans-serif; - word-break: break-all; - } - - #context-content{ - background: #d7dbd7; - font-family: sans-serif; - word-break: break-all; - } - - #context-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - margin-top: 0.3px; - } - - #probability-box { - font-size: small; - padding: 4px; - margin-bottom: 10px; - } - - #grounding-box { - font-size: small; - padding: 4px; - word-break: break-all; - } - - #grounding-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - } - - #reference-box { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - margin-bottom: 12px; - line-height: 1.5em; - } - - #reference-box-no-aqa { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - line-height: 1.5em; - } - - #aqa-content{ - background: #9fc7db; - font-family: math; - } - - #aqa-label{ - background: #49a5d2; - } - - #aqa-json { - font-family: system-ui; - font-size: small; - text-wrap: pretty; - word-break: break-all; - margin: 0; - } - - #rewrite-buttons-box { - margin-top: 12px; - } - - #feedback-buttons-box { - margin-top: 12px; - } - - #answerable-span { - font-size: small; - font-family: system-ui; - float: right; - padding: 10px; - } - - /* ======= Style by class ======= */ - - .hidden { - display: none; - } - - .disable { - display: none; - } - - .header-wrapper { - display: flex; - } - - .loading { - font: 15px arial, sans-serif; - width: 100%; - margin-left: 12px; - color: #505050; - padding: 2px; - } - - .notselected { - background-color: #303936e6; - padding-top: 3px; - padding-bottom: 5px; - } - - .notselected:hover { - background-color: #121a17e6; - cursor:pointer; - } - - #like-button.selected { - background-color: #1e6a9c; - padding-top: 7px; - padding-bottom: 7px; - } - - #dislike-button.selected { - background-color: #CF5C3F; - padding-top: 7px; - padding-bottom: 7px; - } - - .selected:hover { - background-color: #0a619a; - cursor:pointer; - } - - .rewrite { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .feedback { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .question, .response, .response-text, .fact-checked-text { - max-width: 700px; - margin-left: 3px; - } - - .full-response { - max-width: 700px; - margin-left: 10px; - } - - .related-questions { - margin-bottom: 20px; - font-size: 0.9em; - line-height: 140%; - } - - .relevant-sources { - margin-bottom: 20px; - font-size: 0.9em; - line-height: 140%; - } - - /* ======= Style buttons by ID ======= */ - - #rewrite-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #rewrite-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #feedback-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #like-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #dislike-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #submit-result { - color: #027f02d6; - font-family: fantasy; - } - - #feedback-submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #feedback-submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-submit-result { - color: #027f02d6; - font-family: fantasy; - } - - #edit-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 580px; - padding: 8px; - } - - #feedback-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 580px; - padding: 8px; - } - - #rewrite-question-header { - margin: 0; - margin-bottom: 5px; - } - - #rewrite-response-header { - margin: 0; - margin-top: 10px; - margin-bottom: 5px; - } - - #user-id { - margin: 0; - margin-top: 10px; - margin-bottom: 15px; - } - - #fact-check-url { - margin: 0 0 0.7em; - } - - #source-para { - margin: 0 0 0.7em; - } - - #distance-para { - margin: 0 0 0.7em; - font: 11px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - } - /* ======= Search Box ======= */ - - .search { - border: 2px solid #CF5C3F; - overflow: auto; - max-width: 600px; - margin-top: 15px; - margin-left: 10px; - margin-bottom: 10px; - border-radius: 5px; - } - - .search input[type="text"] { - border: 0; - width: calc(100% - 65px); - padding: 10px; - } - - .search input[type="text"]:focus { - outline: 0; - } - - .search input[type="submit"] { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - float: right; - padding: 10px; - -moz-border-radius-top-right: 5px; - -webkit-border-radius-top-right: 5px; - -moz-border-radius-bottom-right: 5px; - -webkit-border-radius-bottom-right: 5px; - cursor:pointer; - } - - /* ======= Accordion ======= */ - - .accordion { - max-width: 65em; - #margin-bottom: 1em; - } - - .accordion > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion .reference-content { - font-size: 15px; - font-family: serif; - } - - .accordion > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion .handle { - margin: 0; - font-size: 1.0em; - } - - .accordion label { - display: block; - font-weight: normal; - border: 2px solid #000; - #padding: 12px; - background: #4490b8ab; - #border-radius: 15px; - padding: 5px; - #background: #027f023b; - border-radius: 10px; - } - - .accordion label:hover, - .accordion label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: 1.556em; - vertical-align: middle; - } - - .accordion > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion p:last-child { - margin-bottom: 0; - } - - /* ======= Accordion Source ======= */ - - .accordion-source { - max-width: 65em; - margin-bottom: 1em; - } - - .accordion-source > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion-source .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion-source .content{ - font-size: 13px; - } - - .accordion-source > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion-source .handle { - margin: 0; - font-size: 1em; - line-height: 1.2em; - } - - .accordion-source label { - display: block; - font-weight: normal; - border: 1px solid #000; - padding: 6px; - background: #4490b8ab; - border-radius: 15px; - } - - .accordion-source label:hover, - .accordion-source label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion-source .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: .556em; - vertical-align: middle; - } - - .accordion-source > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion-source p:last-child { - margin-bottom: 0; - } - -/* Loader animation */ -/* Source: https://css-loaders.com/classic/ */ -.loader { - width: fit-content; - font-family: monospace; - font-size: 14px; - margin-left: 13px; - clip-path: inset(0 3ch 0 0); - animation: animation 1s steps(4) infinite; -} -.loader:before { - content:"Generating a response..." -} -@keyframes animation {to{clip-path: inset(0 -1ch 0 0)}} - diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui.css b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui.css deleted file mode 100644 index 7267c9b22..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-chatui.css +++ /dev/null @@ -1,601 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ======= General style for HTML elements ======= */ - -body { - font: 16px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - color: #333; - font-weight: 300; - max-width: 960px; - margin: auto; - background-color: #d9d9d9; - padding-top: 15px; - padding-bottom: 15px; - } - - a { - color: #0a619a; - } - - p { - margin: 0 0 1em; - line-height: 130%; - } - - h1 { - margin: 0 0 0.5em; - font-weight: 500; - font-size: 1.3em; - margin-left: 1.0em; - margin-top: 0.3em; - } - - h2 { - margin: 0; - margin-top: 17px; - margin-bottom: 15px; - font-weight: normal; - font-size: 1.5em; - } - - h3 { - margin: 0; - margin-top: 10px; - margin-bottom: 10px; - } - - h4 { - color: #505050; - margin-top: 3px; - margin-left: 5px; - margin-bottom: 8px; - } - - li { - margin: 0 0 0.3em; - } - - code { - font-family: math; - color: darkgreen; - text-wrap: pretty; - } - - /* ======= Style layout by ID ======= */ - - #callout-box { - margin: auto; - max-width: 800px; - font: 13px arial, sans-serif; - background-color: white; - border-style: solid; - border-width: 1px; - padding: 10px 25px; - box-shadow: 5px 5px 5px grey; - border-radius: 15px; - } - - #important-box { - font-size: 0.9em; - font-family: system-ui; - line-height: 150%; - word-break: break-word; - padding: 4px; - } - - #response-box { - font-size: 1.0em; - font-family: sans-serif; - line-height: 140%; - margin-top: 10px; - } - - #suggested-questions { - font-family: sans-serif; - word-break: break-word; - } - - #context-content{ - background: #d7dbd7; - font-family: sans-serif; - word-break: break-all; - } - - #context-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - margin-top: 0.3px; - } - - #probability-box { - font-size: small; - padding: 4px; - margin-bottom: 10px; - } - - #grounding-box { - font-size: small; - padding: 4px; - word-break: break-all; - } - - #grounding-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - } - - #reference-box { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - margin-bottom: 12px; - line-height: 1.5em; - } - - #reference-box-no-aqa { - font-size: 0.9em; - font-family: system-ui; - text-wrap: pretty; - word-break: break-all; - line-height: 1.5em; - } - - #aqa-content{ - background: #9fc7db; - font-family: math; - } - - #aqa-label{ - background: #49a5d2; - } - - #aqa-json { - font-family: system-ui; - font-size: small; - text-wrap: pretty; - word-break: break-all; - margin: 0; - } - - #rewrite-buttons-box { - margin-top: 12px; - } - - #feedback-buttons-box { - margin-top: 12px; - } - - #answerable-span { - font-size: small; - font-family: system-ui; - float: right; - padding: 10px; - } - - /* ======= Style by class ======= */ - - .hidden { - display: none; - } - - .disable { - display: none; - } - - .header-wrapper { - display: flex; - } - - .loading { - font: 15px arial, sans-serif; - width: 100%; - margin-left: 12px; - color: #505050; - padding: 2px; - } - - .notselected { - background-color: #303936e6; - padding-top: 3px; - padding-bottom: 5px; - } - - .notselected:hover { - background-color: #121a17e6; - cursor:pointer; - } - - #like-button.selected { - background-color: #1e6a9c; - padding-top: 7px; - padding-bottom: 7px; - } - - #dislike-button.selected { - background-color: #CF5C3F; - padding-top: 7px; - padding-bottom: 7px; - } - - .selected:hover { - background-color: #0a619a; - cursor:pointer; - } - - .rewrite { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .feedback { - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .question, .response, .response-text, .fact-checked-text { - max-width: 700px; - margin-left: 3px; - } - - .full-response { - max-width: 700px; - margin-left: 10px; - } - - .related-questions { - margin-bottom: 20px; - font-size: 0.9em; - line-height: 140%; - } - - /* ======= Style buttons by ID ======= */ - - #rewrite-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #rewrite-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-button { - border: 0; - background-color: #cf633ff2; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - margin-top: 0.3em; - } - - #feedback-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #like-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #dislike-button { - border: 0; - color: #fff; - padding-left: 7px; - padding-right: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #submit-result { - color: #027f02d6; - font-family: fantasy; - } - - #feedback-submit-button { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - padding: 7px; - border-radius: 5px; - cursor:pointer; - } - - #feedback-submit-button:hover { - background: #ce3705f2; - cursor:pointer; - } - - #feedback-submit-result { - color: #027f02d6; - font-family: fantasy; - } - - #edit-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 650px; - padding: 8px; - } - - #feedback-text-area { - font: 13px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - max-height: 500px; - max-width: -webkit-fill-available; - height: 300px; - width: 580px; - padding: 8px; - } - - #rewrite-question-header { - margin: 0; - margin-bottom: 5px; - } - - #rewrite-response-header { - margin: 0; - margin-top: 10px; - margin-bottom: 5px; - } - - #user-id { - margin: 0; - margin-top: 10px; - margin-bottom: 15px; - } - - #fact-check-url { - margin: 0 0 0.7em; - } - - #source-para { - margin: 0 0 0.7em; - } - - #distance-para { - margin: 0 0 0.7em; - font: 11px/1.5em Overpass, "Open Sans", Helvetica, sans-serif; - } - /* ======= Search Box ======= */ - - .search { - border: 2px solid #CF5C3F; - overflow: auto; - max-width: 700px; - margin-top: 15px; - margin-left: 10px; - margin-bottom: 10px; - border-radius: 5px; - } - - .search input[type="text"] { - border: 0; - width: calc(100% - 65px); - padding: 10px; - } - - .search input[type="text"]:focus { - outline: 0; - } - - .search input[type="submit"] { - border: 0; - background: none; - background-color: #CF5C3F; - color: #fff; - float: right; - padding: 10px; - -moz-border-radius-top-right: 5px; - -webkit-border-radius-top-right: 5px; - -moz-border-radius-bottom-right: 5px; - -webkit-border-radius-bottom-right: 5px; - cursor:pointer; - } - - /* ======= Accordion ======= */ - - .accordion { - max-width: 65em; - #margin-bottom: 1em; - } - - .accordion > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion .reference-content { - font-size: 15px; - font-family: serif; - } - - .accordion > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion .handle { - margin: 0; - font-size: 1.0em; - } - - .accordion label { - display: block; - font-weight: normal; - border: 2px solid #000; - #padding: 12px; - background: #4490b8ab; - #border-radius: 15px; - padding: 5px; - #background: #027f023b; - border-radius: 10px; - } - - .accordion label:hover, - .accordion label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: 1.556em; - vertical-align: middle; - } - - .accordion > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion p:last-child { - margin-bottom: 0; - } - - /* ======= Accordion Source ======= */ - - .accordion-source { - max-width: 65em; - margin-bottom: 1em; - } - - .accordion-source > input[type="checkbox"] { - position: absolute; - left: -100vw; - } - - .accordion-source .content { - overflow-y: hidden; - height: 0; - transition: height 0.3s ease; - } - - .accordion-source .content{ - font-size: 13px; - } - - .accordion-source > input[type="checkbox"]:checked ~ .content { - height: auto; - overflow: visible; - padding: 15px; - border: 2px solid #000; - margin-top: 6px; - border-radius: 15px; - } - - .accordion-source .handle { - margin: 0; - font-size: 1em; - line-height: 1.2em; - } - - .accordion-source label { - display: block; - font-weight: normal; - border: 1px solid #000; - padding: 6px; - background: #4490b8ab; - border-radius: 15px; - } - - .accordion-source label:hover, - .accordion-source label:focus { - background: #d9d9d9; - cursor:pointer; - } - - .accordion-source .handle label::before { - font-family: fontawesome, sans-serif; - display: inline-block; - content: "\2964"; - margin-right: 10px; - font-size: .58em; - line-height: .556em; - vertical-align: middle; - } - - .accordion-source > input[type="checkbox"]:checked ~ .handle label::before { - content: "\2965"; - } - - .accordion-source p:last-child { - margin-bottom: 0; - } - -/* Loader animation */ -/* Source: https://css-loaders.com/classic/ */ -.loader { - width: fit-content; - font-family: monospace; - font-size: 14px; - margin-left: 13px; - clip-path: inset(0 3ch 0 0); - animation: animation 1s steps(4) infinite; -} -.loader:before { - content:"Generating a response..." -} -@keyframes animation {to{clip-path: inset(0 -1ch 0 0)}} diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-logs.css b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-logs.css deleted file mode 100644 index e0e884c0a..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/css/style-logs.css +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -.log-pre { - font-size: small; - font-family: monospace; - text-wrap: pretty; - word-break: break-all; -} diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/images/favicon.png b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/images/favicon.png deleted file mode 100644 index dd83d19ce..000000000 Binary files a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/images/favicon.png and /dev/null differ diff --git a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/javascript/app.js b/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/javascript/app.js deleted file mode 100644 index cd3e9aba9..000000000 --- a/examples/gemini/python/docs-agent/docs_agent/interfaces/chatbot/static/javascript/app.js +++ /dev/null @@ -1,342 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Display the "loading" message when a question is entered and submitted. -let askButton = document.getElementById('ask-button'); -let loadingDiv = document.getElementById('loading-div'); - -if (askButton != null){ - askButton.addEventListener('click',function (){ - if (loadingDiv.classList.contains("hidden")){ - loadingDiv.classList.remove("hidden"); - } - }); -} - -// Display the "loading" message when a related question is clicked. -let relatedQuestions = document.getElementById('suggested-questions'); - -if (relatedQuestions != null){ - questions = relatedQuestions.getElementsByTagName('a'); - for(i=0; i
     block. Let style sheet do margin.
    -  var srcIndent = "";
    -
    -  var postHasImages = false;
    -
    -  var files = [];
    -
    -  // Walk through all the child elements of the doc.
    -  for (var i = 0; i < numChildren; i++) {
    -    var child = document.getActiveSection().getChild(i);
    -    var result = processParagraph(i, child, inSrc, globalImageCounter, globalListCounters, image_prefix + image_foldername);
    -    globalImageCounter += (result && result.images) ? result.images.length : 0;
    -    if (result!==null) {
    -      if (result.sourceGlossary==="start" && !inSrc) {
    -        inSrc=true;
    -        text+="
    \n";
    -      } else if (result.sourceGlossary==="end" && inSrc) {
    -        inSrc=false;
    -        text+="
    \n\n"; - } else if (result.sourceFigCap==="start" && !inSrc) { - inSrc=true; - text+="
    \n";
    -      } else if (result.sourceFigCap==="end" && inSrc) {
    -        inSrc=false;
    -        text+="
    \n\n"; - } else if (result.source==="start" && !inSrc) { - inSrc=true; - text+="
    \n";
    -      } else if (result.source==="end" && inSrc) {
    -        inSrc=false;
    -        text+="
    \n\n"; - } else if (result.inClass==="start" && !inClass) { - inClass=true; - text+="
    \n";
    -      } else if (result.inClass==="end" && inClass) {
    -        inClass=false;
    -        text+="
    \n\n"; - } else if (inClass) { - text+=result.text+"\n\n"; - } else if (inSrc) { - text+=(srcIndent+escapeHTML(result.text)+"\n"); - } else if (result.text && result.text.length>0) { - text+=result.text+"\n\n"; - } - - if (result.images && result.images.length>0) { - for (var j=0; j/g, '>'); -} - -function standardQMarks(text) { - return text.replace(/\u2018|\u8216|\u2019|\u8217/g,"'").replace(/\u201c|\u8220|\u201d|\u8221/g, '"') -} - -// Process each child element (not just paragraphs). -function processParagraph(index, element, inSrc, imageCounter, listCounters, image_path) { - // First, check for things that require no processing. - if (element.getNumChildren()==0) { - return null; - } - // Skip on TOC. - if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) { - return {"text": "[[TOC]]"}; - } - - // Set up for real results. - var result = {}; - var pOut = ""; - var textElements = []; - var imagePrefix = "image_"; - - // Handle Table elements. Pretty simple-minded now, but works for simple tables. - // Note that Markdown does not process within block-level HTML, so it probably - // doesn't make sense to add markup within tables. - if (element.getType() === DocumentApp.ElementType.TABLE) { - textElements.push("\n"); - var nCols = element.getChild(0).getNumCells(); - for (var i = 0; i < element.getNumChildren(); i++) { - textElements.push(" \n"); - // process this row - for (var j = 0; j < nCols; j++) { - textElements.push(" \n"); - } - textElements.push(" \n"); - } - textElements.push("
    " + element.getChild(i).getChild(j).getText() + "
    \n"); - } - - // Process various types (ElementType). - for (var i = 0; i < element.getNumChildren(); i++) { - var t = element.getChild(i).getType(); - - if (t === DocumentApp.ElementType.TABLE_ROW) { - // do nothing: already handled TABLE_ROW - } else if (t === DocumentApp.ElementType.TEXT) { - var txt = element.getChild(i); - pOut += txt.getText(); - textElements.push(txt); - } else if (t === DocumentApp.ElementType.INLINE_IMAGE) { - var imglink = element.getChild(i).getLinkUrl(); - result.images = result.images || []; - var blob = element.getChild(i).getBlob() - var contentType = blob.getContentType(); - var extension = ""; - if (/\/png$/.test(contentType)) { - extension = ".png"; - } else if (/\/gif$/.test(contentType)) { - extension = ".gif"; - } else if (/\/jpe?g$/.test(contentType)) { - extension = ".jpg"; - } else { - throw "Unsupported image type: "+contentType; - } - - var name = imagePrefix + imageCounter + extension; - blob.setName(name); - - imageCounter++; - if (!return_string || force_save_images) { - textElements.push('![](' + image_path + '/' + name + ')'); - } else { - textElements.push('![](' + imglink + ')'); - } - //result.images.push( { - // "bytes": blob.getBytes(), - // "type": contentType, - // "name": name}); - - result.images.push({ "blob" : blob } ) - - } else if (t === DocumentApp.ElementType.PAGE_BREAK) { - // ignore - } else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) { - textElements.push('* * *\n'); - } else if (t === DocumentApp.ElementType.FOOTNOTE) { - textElements.push(' ('+element.getChild(i).getFootnoteContents().getText()+')'); - } else { - throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: " - +t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index; - } - } - - if (textElements.length==0) { - // Isn't result empty now? - return result; - } - - var ind_f = element.getIndentFirstLine(); - var ind_s = element.getIndentStart(); - var ind_e = element.getIndentEnd(); - var i_fse = ['ind_f','ind_s','ind_e']; - var indents = {}; - for (indt=0;indt 0) indents[indname] = eval(indname); - // lazy test, null (no indent) is not greater than zero, but becomes set if indent 'undone' - } - var inIndent = (Object.keys(indents).length > 0); - - // evb: Add glossary and figure caption too. (And abbreviations: gloss and fig-cap.) - // process source code block: - if (/^\s*---\s+gloss\s*$/.test(pOut) || /^\s*---\s+source glossary\s*$/.test(pOut)) { - result.sourceGlossary = "start"; - } else if (/^\s*---\s+fig-cap\s*$/.test(pOut) || /^\s*---\s+source fig-cap\s*$/.test(pOut)) { - result.sourceFigCap = "start"; - } else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) { - result.source = "start"; - } else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) { - result.inClass = "start"; - result.className = RegExp.$1.replace(/\./g,' '); - } else if (/^\s*---\s*$/.test(pOut)) { - result.source = "end"; - result.sourceGlossary = "end"; - result.sourceFigCap = "end"; - result.inClass = "end"; - } else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) { - result.text = ''; - } else { - - prefix = findPrefix(inSrc, element, listCounters); - - var pOut = ""; - for (var i=0; i): - if (gt === DocumentApp.GlyphType.BULLET - || gt === DocumentApp.GlyphType.HOLLOW_BULLET - || gt === DocumentApp.GlyphType.SQUARE_BULLET) { - prefix += "* "; - } else { - // Ordered list (
      ): - var key = listItem.getListId() + '.' + listItem.getNestingLevel(); - var counter = listCounters[key] || 0; - counter++; - listCounters[key] = counter; - prefix += counter+". "; - } - } - } - return prefix; -} - -function processTextElement(inSrc, txt) { - if (typeof(txt) === 'string') { - return txt; - } - - var pOut = txt.getText(); - if (! txt.getTextAttributeIndices) { - return pOut; - } - -// Logger.log("Initial String: " + pOut) - - // CRC introducing reformatted_txt to let us apply rational formatting that we can actually parse - var reformatted_txt = txt.copy(); - reformatted_txt.deleteText(0,pOut.length-1); - reformatted_txt = reformatted_txt.setText(pOut); - - var attrs = txt.getTextAttributeIndices(); - var lastOff = pOut.length; - // We will run through this loop multiple times for the things we care about. - // Font - // URL - // Then for alignment - // Then for bold - // Then for italic. - - // FONTs - var lastOff = pOut.length; // loop goes backwards, so this holds - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var font=txt.getFontFamily(off) - if (font) { - while (i>=1 && txt.getFontFamily(attrs[i-1])==font) { - // detect fonts that are in multiple pieces because of errors on formatting: - i-=1; - off=attrs[i]; - } - reformatted_txt.setFontFamily(off, lastOff-1, font); - } - lastOff=off; - } - - // URL - // XXX TODO actually convert to URL text here. - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var url=txt.getLinkUrl(off); - if (url) { - while (i>=1 && txt.getLinkUrl(attrs[i-1]) == url) { - // detect urls that are in multiple pieces because of errors on formatting: - i-=1; - off=attrs[i]; - } - reformatted_txt.setLinkUrl(off, lastOff-1, url); - } - lastOff=off; - } - - // alignment - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var alignment=txt.getTextAlignment(off); - if (alignment) { // - while (i>=1 && txt.getTextAlignment(attrs[i-1]) == alignment) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setTextAlignment(off, lastOff-1, alignment); - } - lastOff=off; - } - - // strike - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var strike=txt.isStrikethrough(off); - if (strike) { - while (i>=1 && txt.isStrikethrough(attrs[i-1])) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setStrikethrough(off, lastOff-1, strike); - } - lastOff=off; - } - - // bold - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var bold=txt.isBold(off); - if (bold) { - while (i>=1 && txt.isBold(attrs[i-1])) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setBold(off, lastOff-1, bold); - } - lastOff=off; - } - - // italics - var lastOff=pOut.length; - for (var i=attrs.length-1; i>=0; i--) { - var off=attrs[i]; - var italic=txt.isItalic(off); - if (italic) { - while (i>=1 && txt.isItalic(attrs[i-1])) { - i-=1; - off=attrs[i]; - } - reformatted_txt.setItalic(off, lastOff-1, italic); - } - lastOff=off; - } - - - var mOut=""; // Modified out string - var harmonized_attrs = reformatted_txt.getTextAttributeIndices(); - reformatted_txt.getTextAttributeIndices(); // @lmmx: is this a typo...? - pOut = reformatted_txt.getText(); - - - // Markdown is farily picky about how it will let you intersperse spaces around words and strong/italics chars. This regex (hopefully) clears this up - // Match any number of \*, followed by spaces/word boundaries against anything that is not the \*, followed by boundaries, spaces and * again. - // Test case at http://jsfiddle.net/ovqLv0s9/2/ - - var reAlignStars = /(\*+)(\s*\b)([^\*]+)(\b\s*)(\*+)/g; - - var lastOff=pOut.length; - for (var i=harmonized_attrs.length-1; i>=0; i--) { - var off=harmonized_attrs[i]; - - var raw_text = pOut.substring(off, lastOff) - - var d1 = ""; // @lmmx: build up a modifier prefix - var d2 = ""; // @lmmx: ...and suffix - - var end_font; - - var mark_bold = false; - var mark_italic = false; - var mark_code = false; - var mark_sup = false; - var mark_sub = false; - var mark_strike = false; - - // The end of the text block is a special case. - if (lastOff == pOut.length) { - end_font = reformatted_txt.getFontFamily(lastOff - 1) - if (end_font) { - if (!inSrc && end_font===end_font.COURIER_NEW) { - mark_code = true; - } - } - if (reformatted_txt.isBold(lastOff -1)) { - mark_bold = true; - } - if (reformatted_txt.isItalic(lastOff - 1)) { - // edbacher: changed this to handle bold italic properly. - mark_italic = true; - } - if (reformatted_txt.isStrikethrough(lastOff - 1)) { - mark_strike = true; - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } else { - end_font = reformatted_txt.getFontFamily(lastOff -1 ) - if (end_font) { - if (!inSrc && end_font===end_font.COURIER_NEW && reformatted_txt.getFontFamily(lastOff) != end_font) { - mark_code=true; - } - } - if (reformatted_txt.isBold(lastOff - 1) && !reformatted_txt.isBold(lastOff) ) { - mark_bold=true; - } - if (reformatted_txt.isStrikethrough(lastOff - 1) && !reformatted_txt.isStrikethrough(lastOff)) { - mark_strike=true; - } - if (reformatted_txt.isItalic(lastOff - 1) && !reformatted_txt.isItalic(lastOff)) { - mark_italic=true; - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUPERSCRIPT) { - if (reformatted_txt.getTextAlignment(lastOff)!==DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - } - if (reformatted_txt.getTextAlignment(lastOff - 1)===DocumentApp.TextAlignment.SUBSCRIPT) { - if (reformatted_txt.getTextAlignment(lastOff)!==DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } - } - - if (mark_code) { - d2 = '`'; // shouldn't these go last? or will it interfere w/ reAlignStars? - } - if (mark_bold) { - d2 = "**" + d2; - } - if (mark_italic) { - d2 = "*" + d2; - } - if (mark_strike) { - d2 = "" + d2; - } - if (mark_sup) { - d2 = '' + d2; - } - if (mark_sub) { - d2 = '' + d2; - } - - mark_bold = mark_italic = mark_code = mark_sup = mark_sub = mark_strike = false; - - var font=reformatted_txt.getFontFamily(off); - if (off == 0) { - if (font) { - if (!inSrc && font===font.COURIER_NEW) { - mark_code = true; - } - } - if (reformatted_txt.isBold(off)) { - mark_bold = true; - } - if (reformatted_txt.isItalic(off)) { - mark_italic = true; - } - if (reformatted_txt.isStrikethrough(off)) { - mark_strike = true; - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } else { - if (font) { - if (!inSrc && font===font.COURIER_NEW && reformatted_txt.getFontFamily(off - 1) != font) { - mark_code=true; - } - } - if (reformatted_txt.isBold(off) && !reformatted_txt.isBold(off -1) ) { - mark_bold=true; - } - if (reformatted_txt.isItalic(off) && !reformatted_txt.isItalic(off - 1)) { - mark_italic=true; - } - if (reformatted_txt.isStrikethrough(off) && !reformatted_txt.isStrikethrough(off - 1)) { - mark_strike=true; - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUPERSCRIPT) { - if (reformatted_txt.getTextAlignment(off - 1)!==DocumentApp.TextAlignment.SUPERSCRIPT) { - mark_sup = true; - } - } - if (reformatted_txt.getTextAlignment(off)===DocumentApp.TextAlignment.SUBSCRIPT) { - if (reformatted_txt.getTextAlignment(off - 1)!==DocumentApp.TextAlignment.SUBSCRIPT) { - mark_sub = true; - } - } - } - - - if (mark_code) { - d1 = '`'; - } - - if (mark_bold) { - d1 = d1 + "**"; - } - - if (mark_italic) { - d1 = d1 + "*"; - } - - if (mark_sup) { - d1 = d1 + ''; - } - - if (mark_sub) { - d1 = d1 + ''; - } - - if (mark_strike) { - d1 = d1 + ''; - } - - var url=reformatted_txt.getLinkUrl(off); - if (url) { - mOut = d1 + '['+ raw_text +']('+url+')' + d2 + mOut; - } else { - var new_text = d1 + raw_text + d2; - new_text = new_text.replace(reAlignStars, "$2$1$3$5$4"); - mOut = new_text + mOut; - } - - lastOff=off; -// Logger.log("Modified String: " + mOut) - } - - mOut = pOut.substring(0, off) + mOut; - return mOut; -} \ No newline at end of file diff --git a/examples/gemini/python/docs-agent/third_party/g2docsmd-html/patches/001-initial-changes-for-docs-agent.patch b/examples/gemini/python/docs-agent/third_party/g2docsmd-html/patches/001-initial-changes-for-docs-agent.patch deleted file mode 100644 index af6d3aa93..000000000 --- a/examples/gemini/python/docs-agent/third_party/g2docsmd-html/patches/001-initial-changes-for-docs-agent.patch +++ /dev/null @@ -1,153 +0,0 @@ ---- third_party/g2docsmd-html/exportmd.gs 2023-10-20 22:03:56.577441177 +0000 -+++ apps_script/exportmd.gs 2023-10-20 22:45:27.268431292 +0000 -@@ -1,4 +1,22 @@ --/* -+/** -+ * Copyright 2023 Google LLC -+ * -+ * Licensed under the Apache License, Version 2.0 (the "License"); -+ * you may not use this file except in compliance with the License. -+ * You may obtain a copy of the License at -+ * -+ * http://www.apache.org/licenses/LICENSE-2.0 -+ * -+ * Unless required by applicable law or agreed to in writing, software -+ * distributed under the License is distributed on an "AS IS" BASIS, -+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+ * See the License for the specific language governing permissions and -+ * limitations under the License. -+ */ -+ -+/* Original script is from: -+https://github.com/lmmx/gdocs2md-html/blob/master/exportmd.gs -+and commit: 0d86cfa - Parsing from mangini/gdocs2md. - Modified by clearf to add files to the google directory structure. - Modified by lmmx to write Markdown, going back to HTML-incorporation. -@@ -601,7 +619,7 @@ - - } - --function convertDocumentToMarkdown(document, destination_folder, optional_switches) { -+function convertDocumentToMarkdown(document, destination_folder, frontmatter_input, optional_switches) { - // if returning a string, force_save_images will make the script continue - experimental - var possible_switches = ['return_string', 'force_save_images']; - var property_name = 'conversion_switches'; -@@ -614,8 +632,13 @@ - - var image_prefix = script_properties.getProperty("image_folder_prefix"); - var numChildren = document.getActiveSection().getNumChildren(); -- var text = ""; -- var md_filename = document.getName()+".md"; -+ if (frontmatter_input != "") { -+ var text = frontmatter_input; -+ } -+ else { -+ var text = "" -+ } -+ var md_filename = sanitizeFileName(document.getName()) + ".md"; - var image_foldername = document.getName()+"_images"; - var inSrc = false; - var inClass = false; -@@ -724,7 +747,7 @@ - } - DriveApp.removeFile(saved_file) // Removes from google drive root. - } -- -+return saved_file; - } - - function escapeHTML(text) { -@@ -738,6 +761,9 @@ - // Process each child element (not just paragraphs). - function processParagraph(index, element, inSrc, imageCounter, listCounters, image_path) { - // First, check for things that require no processing. -+ if (element.getType() === DocumentApp.ElementType.UNSUPPORTED) { -+ return null; -+ } - if (element.getNumChildren()==0) { - return null; - } -@@ -769,6 +795,11 @@ - textElements.push("\n"); - } - -+ // Need to handle this element type, return null for now -+ if (element.getType() === DocumentApp.ElementType.CODE_SNIPPET) { -+ return null -+ } -+ - // Process various types (ElementType). - for (var i = 0; i < element.getNumChildren(); i++) { - var t = element.getChild(i).getType(); -@@ -811,12 +842,38 @@ - - result.images.push({ "blob" : blob } ) - -- } else if (t === DocumentApp.ElementType.PAGE_BREAK) { -+ // Need to fix this case TODO -+ } else if (t === DocumentApp.ElementType.INLINE_DRAWING) { -+ -+ imageCounter++; -+ if (!return_string || force_save_images) { -+ textElements.push('![](' + "drawing" + '/' + " name" + ')'); -+ } else { -+ textElements.push('![](' + "drawing" + ')'); -+ } -+ //result.images.push( { -+ // "bytes": blob.getBytes(), -+ // "type": contentType, -+ // "name": name}); -+ -+ // result.images.push({ "blob" : blob } ) -+ -+ } -+ else if (t === DocumentApp.ElementType.PAGE_BREAK) { - // ignore - } else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) { - textElements.push('* * *\n'); - } else if (t === DocumentApp.ElementType.FOOTNOTE) { - textElements.push(' ('+element.getChild(i).getFootnoteContents().getText()+')'); -+ // Fixes for new elements -+ } else if (t === DocumentApp.ElementType.DATE) { -+ textElements.push(' ('+element.getChild(i)+')'); -+ } else if (t === DocumentApp.ElementType.RICH_LINK) { -+ textElements.push(' ('+element.getChild(i).getUrl()+')'); -+ } else if (t === DocumentApp.ElementType.PERSON) { -+ textElements.push(element.getChild(i).getName() + ', '); -+ } else if (t === DocumentApp.ElementType.UNSUPPORTED) { -+ textElements.push(' '); - } else { - throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: " - +t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index; -@@ -828,10 +885,17 @@ - return result; - } - -- var ind_f = element.getIndentFirstLine(); -- var ind_s = element.getIndentStart(); -- var ind_e = element.getIndentEnd(); -- var i_fse = ['ind_f','ind_s','ind_e']; -+// Fix for unrecognized command getIndentFirstLine -+ var ind_f = 0; -+ var ind_s = 0; -+ var ind_e = 0; -+ if (t === DocumentApp.ElementType.PARAGRAPH) { -+ -+ var ind_f = element.getIndentFirstLine(); -+ var ind_s = element.getIndentStart(); -+ var ind_e = element.getIndentEnd(); -+ } -+ var i_fse = [ind_f,ind_s,ind_e]; - var indents = {}; - for (indt=0;indt