baidubce · danielhjz · Jan 18, 2024 · Jan 15, 2024 · Jan 16, 2024 · Jan 16, 2024
diff --git a/cookbook/prompt.ipynb b/cookbook/prompt.ipynb
diff --git a/docs/cli.md b/docs/cli.md
@@ -244,7 +244,7 @@ $ qianfan dataset view [OPTIONS] DATASET
 * `--raw`：展示原始数据。
 * `--help`：展示帮助文档。
 
-## trainer 训练
+### trainer 训练
 
 **用法**:
 
@@ -260,7 +260,7 @@ $ qianfan trainer [OPTIONS] COMMAND [ARGS]...
 
 * `run`：运行 trainer 任务
 
-### run 
+#### run 发起训练任务
 
 运行 trainer 任务
 

diff --git a/docs/prompt.md b/docs/prompt.md
@@ -114,6 +114,58 @@ prompt, _ = p.render(usage="测试")
 print(prompt) # => 这是一个用于测试的 Prompt
 ```
 
+### 优化 Prompt
+
+为了提升模型推理的准确率，可以通过 `optimize` 方法对 Prompt 进行优化。
+
+```python
+prompt = Prompt(template="帮我写一份{job}年终总结")
+optimized_prompt = prompt.optimize()
+```
+
+优化可以开启不同选项：
+
+- `optimize_quality`：优化 prompt 质量
+- `simplify_prompt`：简化 prompt，可以省去“的”、“吧”等含义不强的文本实体，精炼语料内容并降低推理成本
+- `iteration_round`：优化迭代轮数，默认为 1
+- `enable_cot`：开启思维链，将指引模型拆解Prompt内容，逐步进行推理。建议仅在数学计算、逻辑推理等场景下开启使用。
+- `app_id`：优化时使用的 App ID，可选参数
+- `service_name`：优化时使用的服务名称，可选参数
+
+### 评估 Prompt
+
+SDK 提供了 `evaluate` 方法，可以评估不同 Prompt 在不同场景下的质量。
+
+```python
+prompts = [old_prompt, new_prompt]
+scenes = [
+    {
+        "args": {"job": "程序员"},
+        "expected": "代码"
+    },
+    {
+        "args": {"job": "产品经理"},
+        "expected": "用户量"
+    }
+]
+model = Completion()
+
+results = Prompt.evaluate(prompts, scenes, model)
+```
+
+其中 `prompts` 是一个 Prompt 列表，`model` 是进行生成的模型对象，`scenes` 是一个场景列表，每个场景包含 `args` 和 `expected` 两个字段，`args` 是 Prompt 变量参数，`expected` 是该 Prompt 期望的输出。
+
+输出是一个列表，列表中每个元素是一个 `PromptEvaluateResult` 对象，对应了每个 Prompt 的评估结果，具有以下字段：
+
+- `prompt`：评估的 Prompt 对象
+- `scene`：不同场景下的表现
+  - `new_prompt`：变量填充后的新 prompt
+  - `variables`：变量列表
+  - `expected_target`：期望的输出
+  - `response`：模型生成的输出
+  - `score`：对模型输出的打分
+- `summary`：对该 Prompt 表现的总结
+
 ### 框架类型
 
 为了能够让模型更好的理解输入，需要提供足够详细的信息，千帆提供了数个 Prompt 框架类型，能够帮助用户编写出足够高质量的 Prompt，目前 SDK 支持如下框架类型：
@@ -187,15 +239,15 @@ from qianfan.resources import Prompt
 
 ### 创建 Prompt
 
-最简单的方式是直接使用 `Prompt.create` 方法，传入 Prompt 名称和模板内容即可创建 Prompt，模版中通过 `{}` 表示待填充的变量名。返回结果可以通过 `['result']['templateId']` 字段获取模板 ID。返回字段详见 [API 文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Hlp7waib4)。
+最简单的方式是直接使用 `Prompt.create` 方法，传入 Prompt 名称和模板内容即可创建 Prompt，模版中通过 `{}` 表示待填充的变量名。返回结果可以通过 `['result']['templatePK']` 字段获取模板 ID。返回字段详见 [API 文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Hlp7waib4)。
 
 ```python
 resp = Prompt.create(
     name="example_prompt",
     # 变量必须字母开头，仅包含字母、数字和下划线，长度 2-30
     template="example template {var1}", 
 )
-print(resp['result']['templateId'])
+print(resp['result']['templatePK'])
 ```
 
 SDK 也提供了其他参数，能够根据需求创建 Prompt，例如通过 `identifier` 字段指定识别变量的符号，示例如下

diff --git a/src/qianfan/common/prompt/prompt.py b/src/qianfan/common/prompt/prompt.py
@@ -12,14 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, cast
 
 from qianfan.common.hub.interface import HubSerializable
-from qianfan.consts import PromptFrameworkType, PromptSceneType, PromptType
-from qianfan.errors import InvalidArgumentError
+from qianfan.consts import (
+    PromptFrameworkType,
+    PromptSceneType,
+    PromptScoreStandard,
+    PromptType,
+)
+from qianfan.errors import InvalidArgumentError, RequestError
 from qianfan.resources.console.prompt import Prompt as PromptResource
-from qianfan.resources.typing import Literal
+from qianfan.resources.llm.completion import Completion
+from qianfan.resources.typing import Literal, QfResponse
 from qianfan.utils import log_warn
 
 
@@ -48,7 +55,7 @@ class Prompt(HubSerializable):
     Prompt
     """
 
-    id: Optional[int] = None
+    id: Optional[str] = None
     name: Optional[str] = None
     template: str
     variables: List[str]
@@ -66,7 +73,7 @@ def __init__(
         self,
         template: Optional[str] = None,
         name: Optional[str] = None,
-        id: Optional[int] = None,
+        id: Optional[str] = None,
         identifier: Literal["{}", "{{}}", "[]", "[[]]", "()", "(())"] = "{}",
         variables: Optional[List[str]] = None,
         labels: List[PromptLabel] = [],
@@ -160,7 +167,7 @@ def _hub_pull(cls, name: str) -> "Prompt":
 
         prompt = cls(
             name=prompt_info["templateName"],
-            id=prompt_info["templateId"],
+            id=prompt_info["templatePK"],
             template=prompt_info["templateContent"],
             variables=(
                 []
@@ -245,7 +252,7 @@ def _hub_push(self) -> None:
                 raise InvalidArgumentError(
                     f"Failed to create prompt: {resp['message']['global']}"
                 )
-            self.id = resp["result"]["templateId"]
+            self.id = resp["result"]["templatePK"]
             self._mode = "remote"
         else:
             if self.name is None:
@@ -264,7 +271,7 @@ def _hub_push(self) -> None:
                 raise InvalidArgumentError(
                     f"Failed to update prompt: {resp['message']['global']}"
                 )
-            self.id = resp["result"]["templateId"]
+            self.id = resp["result"]["templatePK"]
 
     def render(self, **kwargs: str) -> Tuple[str, Optional[str]]:
         """
@@ -460,3 +467,193 @@ def fewshot_prompt(
         if prompt != "":
             output = prompt + "\n\n" + output
         return output
+
+    def optimize(
+        self,
+        optimize_quality: bool = True,
+        simplify_prompt: bool = False,
+        iteration_round: Literal[1, 2] = 1,
+        enable_cot: bool = False,
+        app_id: Optional[int] = None,
+        service_name: Optional[str] = None,
+        **kwargs: Any,
+    ) -> "Prompt":
+        """
+        Optimize a prompt for better performance and effectiveness.
+
+        Parameters:
+          optimize_quality (bool):
+            Flag indicating whether to optimize for prompt quality.
+          simplify_prompt (bool):
+            Flag indicating whether to simplify the prompt structure.
+          iteration_round (Literal[1, 2]):
+            The number of optimization iterations to perform (1 or 2).
+          enable_cot (bool):
+            Flag indicating whether to enable chain of thought.
+          app_id (Optional[int]):
+            Optional application ID for context-specific optimizations.
+          service_name (Optional[str]):
+            Optional service used for optimizing.
+          **kwargs (Any):
+            Additional keyword arguments for future extensibility.
+
+        Returns:
+          Prompt:
+            The optimized Prompt object.
+
+        Please refer to the following link for more details about prompt optimization.
+
+        API Doc: https://cloud.baidu.com/doc/WENXINWORKSHOP/s/olr8svd33
+        """
+        operations = []
+        operations.append(
+            {
+                "opType": 1,
+                "payload": 1 if optimize_quality else 0,
+            }
+        )
+        operations.append(
+            {
+                "opType": 2,
+                "payload": 1 if simplify_prompt else 0,
+            }
+        )
+        operations.append({"opType": 3, "payload": iteration_round})
+        operations.append(
+            {
+                "opType": 4,
+                "payload": 1 if enable_cot else 0,
+            }
+        )
+        resp = PromptResource.create_optimiztion_task(
+            self.template,
+            operations,
+            app_id=app_id,
+            service_name=service_name,
+            **kwargs,
+        )
+        task_id = resp["result"]["id"]
+        while True:
+            resp = PromptResource.get_optimization_task(task_id, **kwargs)
+            status = resp["result"]["processStatus"]
+            if status == 2:  # fininished
+                break
+            elif status == 3:  # failed
+                raise RequestError("Prompt optimization task failed.")
+            time.sleep(1)
+        optimized_prompt = resp["result"]["optimizeContent"]
+
+        return Prompt(optimized_prompt)
+
+    @dataclass
+    class PromptEvaluateResult(object):
+        """
+        Evaluation result of a prompt
+        """
+
+        prompt: "Prompt"
+        scene: List[Dict[str, Any]]
+        summary: str
+
+    @classmethod
+    def evaluate(
+        cls,
+        prompt_list: List["Prompt"],
+        scenes: List[Dict[str, Any]],
+        model: Completion,
+        standard: PromptScoreStandard = PromptScoreStandard.Semantic,
+    ) -> List[PromptEvaluateResult]:
+        """
+        Evaluate a list of prompts against specified scenes using the given model.
+
+        Parameters:
+          prompt_list (List["Prompt"]):
+            A list of prompt templates to be evaluated.
+          scenes (List[Dict[str, Any]]):
+            List of scenes represented as dictionaries containing relevant information.
+            The dict should contain the following keys:
+              - args: A dict containing the variables to be replaced in the prompt.
+              - expected: The expected output of the prompt.
+          client (Completion):
+            An instance of the Completion client.
+          standard (PromptScoreStandard, optional):
+            The scoring standard to be used for evaluating prompts.
+
+        Returns:
+          List[PromptEvaluateResult]:
+            A list of evaluation results, each containing the original prompt, the
+            result of each scene, and a summary string.
+
+        Example:
+        result_list = PromptEvaluateResult.evaluate(
+            prompt_list=[prompt1, prompt2, prompt3],
+            scenes=[{
+                "args": {"name": "Alice"},
+                "expected": "Hello, Alice!"
+            }],
+            client=Completion(model="ERNIE-Bot-4"),
+            standard=PromptScoreStandard.Semantic
+        )
+        """
+        results = [
+            Prompt.PromptEvaluateResult(
+                scene=[
+                    {
+                        "new_prompt": prompt.render(**scene["args"])[0],
+                        "variables": scene["args"],
+                        "expected_target": scene["expected"],
+                    }
+                    for scene in scenes
+                ],
+                prompt=prompt,
+                summary="",
+            )
+            for prompt in prompt_list
+        ]
+
+        for i in range(len(results)):
+            for j in range(len(results[i].scene)):
+                resp = cast(QfResponse, model.do(results[i].scene[j]["new_prompt"]))
+                results[i].scene[j]["response"] = resp["result"]
+
+        eval_summary_req = [
+            {
+                "prompt": prompt.prompt.template,
+                "scenes": [
+                    {
+                        "variables": scene["variables"],
+                        "expected_target": scene["expected_target"],
+                        "response": scene["response"],
+                        "new_prompt": scene["new_prompt"],
+                    }
+                    for scene in prompt.scene
+                ],
+                "response_list": [r["response"] for r in prompt.scene],
+            }
+            for prompt in results
+        ]
+
+        eval_score_req = [
+            {
+                "scene": scenes[j]["expected"],
+                "response_list": [
+                    results[i].scene[j]["response"] for i in range(len(prompt_list))
+                ],
+            }
+            for j in range(len(scenes))
+        ]
+
+        summary_resp = PromptResource.evaluation_summary(eval_summary_req)
+        summary = summary_resp["result"]["responses"]
+
+        for i in range(len(results)):
+            results[i].summary = summary[i]["response"]
+
+        score_resp = PromptResource.evaluation_score(standard.value, eval_score_req)
+        score = score_resp["result"]["scores"]
+
+        for i in range(len(results)):
+            for j in range(len(results[i].scene)):
+                results[i].scene[j]["score"] = score[j][i]
+
+        return results
diff --git a/src/qianfan/config.py b/src/qianfan/config.py
@@ -92,10 +92,16 @@ class Config:
     LLM_API_RETRY_TIMEOUT: int = Field(default=DefaultValue.RetryTimeout)
     LLM_API_RETRY_BACKOFF_FACTOR: float = Field(default=DefaultValue.RetryBackoffFactor)
     LLM_API_RETRY_JITTER: float = Field(default=DefaultValue.RetryJitter)
+    LLM_API_RETRY_MAX_WAIT_INTERVAL: float = Field(
+        default=DefaultValue.RetryMaxWaitInterval
+    )
     LLM_API_RETRY_ERR_CODES: set = Field(default=DefaultValue.RetryErrCodes)
     CONSOLE_API_RETRY_COUNT: int = Field(default=DefaultValue.ConsoleRetryCount)
     CONSOLE_API_RETRY_TIMEOUT: int = Field(default=DefaultValue.ConsoleRetryTimeout)
     CONSOLE_API_RETRY_JITTER: float = Field(default=DefaultValue.ConsoleRetryJitter)
+    CONSOLE_API_RETRY_MAX_WAIT_INTERVAL: float = Field(
+        default=DefaultValue.ConsoleRetryMaxWaitInterval
+    )
     CONSOLE_API_RETRY_ERR_CODES: set = Field(default=DefaultValue.ConsoleRetryErrCodes)
     CONSOLE_API_RETRY_BACKOFF_FACTOR: int = Field(
         default=DefaultValue.ConsoleRetryBackoffFactor