refactor: replace OPRO with simple iterative refinement

Major changes: - Remove fake OPRO evaluation (no more fake 0.5 scores) - Add simple refinement based on user selection - New endpoint: POST /opro/refine (selected + rejected instructions) - Update prompt generation to focus on comprehensive coverage instead of style variety - All generated instructions now start with role definition (你是一个...) - Update README to reflect new approach and API endpoints Technical details: - Added refine_based_on_selection() in prompt_utils.py - Added refine_instruction_candidates() in user_prompt_optimizer.py - Added OPRORefineReq model and /opro/refine endpoint in api.py - Updated frontend handleContinueOptimize() to use new refinement flow - Changed prompt requirements from 'different styles' to 'comprehensive coverage' - Added role definition requirement as first item in all prompt templates
2025-12-08 09:43:20 +08:00
parent 602875b08c
commit 65cdcf29dc
5 changed files with 315 additions and 86 deletions
--- a/_qwen_xinference_demo/api.py
+++ b/_qwen_xinference_demo/api.py
@@ -24,7 +24,8 @@ from .opro.session_state import (
 from .opro.user_prompt_optimizer import generate_candidates
 from .opro.user_prompt_optimizer import (
    generate_system_instruction_candidates,
-    evaluate_system_instruction
+    evaluate_system_instruction,
+    refine_instruction_candidates
 )

 from .opro.ollama_client import call_qwen
@@ -159,6 +160,15 @@ class OPROExecuteReq(BaseModel):
    model_name: Optional[str] = None


+class OPRORefineReq(BaseModel):
+    """Request to refine based on selected instruction (simple iterative refinement, NOT OPRO)."""
+    run_id: str
+    selected_instruction: str
+    rejected_instructions: List[str]
+    top_k: Optional[int] = None
+    pool_size: Optional[int] = None
+
+
 # ============================================================================
 # LEGACY ENDPOINTS (Query Rewriting - NOT true OPRO)
 # ============================================================================
@@ -696,3 +706,44 @@ def opro_execute(req: OPROExecuteReq):
        })
    except Exception as e:
        raise AppException(500, f"Execution failed: {e}", "EXECUTION_ERROR")
+
+
+@app.post("/opro/refine", tags=["opro-true"])
+def opro_refine(req: OPRORefineReq):
+    """
+    Simple iterative refinement based on user selection (NOT OPRO).
+
+    This generates new candidates based on the selected instruction while avoiding rejected ones.
+    No scoring, no trajectory - just straightforward refinement based on user preference.
+    """
+    run = get_opro_run(req.run_id)
+    if not run:
+        raise AppException(404, "OPRO run not found", "RUN_NOT_FOUND")
+
+    top_k = req.top_k or config.TOP_K
+    pool_size = req.pool_size or config.GENERATION_POOL_SIZE
+
+    try:
+        candidates = refine_instruction_candidates(
+            task_description=run["task_description"],
+            selected_instruction=req.selected_instruction,
+            rejected_instructions=req.rejected_instructions,
+            top_k=top_k,
+            pool_size=pool_size,
+            model_name=run["model_name"]
+        )
+
+        # Update iteration counter
+        update_opro_iteration(req.run_id, candidates)
+
+        # Get updated run info
+        run = get_opro_run(req.run_id)
+
+        return ok({
+            "run_id": req.run_id,
+            "iteration": run["iteration"],
+            "candidates": [{"instruction": c, "score": None} for c in candidates],
+            "task_description": run["task_description"]
+        })
+    except Exception as e:
+        raise AppException(500, f"Refinement failed: {e}", "REFINEMENT_ERROR")
--- a/_qwen_xinference_demo/opro/prompt_utils.py
+++ b/_qwen_xinference_demo/opro/prompt_utils.py
@@ -56,14 +56,15 @@ def generate_initial_system_instruction_candidates(task_description: str, pool_s
 目标任务描述：
 【{task_description}】

-请根据以上任务，生成 {pool_size} 条高质量、风格各异的"System Instruction"候选指令。
+请根据以上任务，生成 {pool_size} 条高质量、全面的"System Instruction"候选指令。

 要求：
-1. 每条指令必须有明显不同的风格和侧重点
-2. 覆盖不同的实现策略（例如：简洁型、详细型、示例型、角色扮演型、步骤型等）
-3. 这些指令应指导LLM的行为和输出格式，以最大化任务性能
-4. 每条指令单独成行，不包含编号或额外说明
-5. 所有生成的指令必须使用简体中文
+1. 每条指令必须以角色定义开头（例如："你是一个..."、"你是..."等）
+2. 每条指令必须全面覆盖任务的所有要求和细节
+3. 指令应清晰、具体、可执行，能够有效指导LLM完成任务
+4. 确保指令包含必要的行为规范、输出格式、注意事项等
+5. 每条指令单独成行，不包含编号或额外说明
+6. 所有生成的指令必须使用简体中文

 生成 {pool_size} 条指令：
 """
@@ -120,11 +121,68 @@ def generate_optimized_system_instruction(
 然后，生成 {pool_size} 条新的、有潜力超越 {highest_score:.4f} 分的System Instruction。

 要求：
-1. 每条指令必须有明显不同的改进策略
-2. 结合高分指令的优点，避免低分指令的缺陷
-3. 探索新的优化方向和表达方式
-4. 每条指令单独成行，不包含编号或额外说明
-5. 所有生成的指令必须使用简体中文
+1. 每条指令必须以角色定义开头（例如："你是一个..."、"你是..."等）
+2. 每条指令必须全面覆盖任务的所有要求和细节
+3. 结合高分指令的优点，避免低分指令的缺陷
+4. 指令应清晰、具体、可执行，能够有效指导LLM完成任务
+5. 每条指令单独成行，不包含编号或额外说明
+6. 所有生成的指令必须使用简体中文

 生成 {pool_size} 条优化后的指令：
 """
+
+
+def refine_based_on_selection(
+    task_description: str,
+    selected_instruction: str,
+    rejected_instructions: List[str],
+    pool_size: int = None
+) -> str:
+    """
+    Simple refinement: Generate variations based on selected instruction while avoiding rejected ones.
+
+    This is NOT OPRO - it's straightforward iterative refinement based on user preference.
+    No scoring, no trajectory, just: "I like this one, give me more like it (but not like those)."
+
+    Args:
+        task_description: Description of the task
+        selected_instruction: The instruction the user selected
+        rejected_instructions: The instructions the user didn't select
+        pool_size: Number of new candidates to generate
+
+    Returns:
+        Prompt for generating refined candidates
+    """
+    import config
+    pool_size = pool_size or config.GENERATION_POOL_SIZE
+
+    rejected_text = ""
+    if rejected_instructions:
+        rejected_formatted = "\n".join(f"- {inst}" for inst in rejected_instructions)
+        rejected_text = f"""
+**用户未选择的指令（避免这些方向）：**
+{rejected_formatted}
+"""
+
+    return f"""
+你是一个"System Prompt 改进助手"。
+目标任务描述：
+【{task_description}】
+
+**用户选择的指令（基于此改进）：**
+{selected_instruction}
+{rejected_text}
+
+请基于用户选择的指令，生成 {pool_size} 条改进版本。
+
+要求：
+1. 每条指令必须以角色定义开头（例如："你是一个..."、"你是..."等）
+2. 保留用户选择指令的核心优点
+3. 每条指令必须全面覆盖任务的所有要求和细节
+4. 指令应清晰、具体、可执行，能够有效指导LLM完成任务
+5. 避免与未选择指令相似的方向
+6. 每条指令单独成行，不包含编号或额外说明
+7. 所有生成的指令必须使用简体中文
+
+生成 {pool_size} 条改进后的指令：
+"""
--- a/_qwen_xinference_demo/opro/user_prompt_optimizer.py
+++ b/_qwen_xinference_demo/opro/user_prompt_optimizer.py
@@ -11,7 +11,8 @@ from .prompt_utils import (
    refine_instruction,
    refine_instruction_with_history,
    generate_initial_system_instruction_candidates,
-    generate_optimized_system_instruction
+    generate_optimized_system_instruction,
+    refine_based_on_selection
 )

 def parse_candidates(raw: str) -> list:
@@ -147,3 +148,46 @@ def evaluate_system_instruction(
            correct += 1

    return correct / total
+
+
+def refine_instruction_candidates(
+    task_description: str,
+    selected_instruction: str,
+    rejected_instructions: List[str],
+    top_k: int = config.TOP_K,
+    pool_size: int = None,
+    model_name: str = None
+) -> List[str]:
+    """
+    Simple refinement: Generate new candidates based on user's selection.
+
+    This is NOT OPRO - just straightforward iterative refinement.
+    User picks a favorite, we generate variations of it while avoiding rejected ones.
+
+    Args:
+        task_description: Description of the task
+        selected_instruction: The instruction the user selected
+        rejected_instructions: The instructions the user didn't select
+        top_k: Number of diverse candidates to return
+        pool_size: Number of candidates to generate before clustering
+        model_name: Optional model name to use
+
+    Returns:
+        List of refined instruction candidates
+    """
+    pool_size = pool_size or config.GENERATION_POOL_SIZE
+
+    # Generate the refinement prompt
+    meta_prompt = refine_based_on_selection(
+        task_description,
+        selected_instruction,
+        rejected_instructions,
+        pool_size
+    )
+
+    # Use LLM to generate refined candidates
+    raw = call_qwen(meta_prompt, temperature=0.9, max_tokens=1024, model_name=model_name)
+
+    # Parse and cluster
+    all_candidates = parse_candidates(raw)
+    return cluster_and_select(all_candidates, top_k=top_k)