feat: implement true OPRO with Gemini-style UI
- Add true OPRO system instruction optimization (vs query rewriting) - Implement iterative optimization with performance trajectory - Add new OPRO API endpoints (/opro/create, /opro/generate_and_evaluate, /opro/execute) - Create modern Gemini-style chat UI (frontend/opro.html) - Optimize performance: reduce candidates from 20 to 10 (2x faster) - Add model selector in UI toolbar - Add collapsible sidebar with session management - Add copy button for instructions - Ensure all generated prompts use simplified Chinese - Update README with comprehensive documentation - Add .gitignore for local_docs folder
This commit is contained in:
184
test_opro_api.py
Normal file
184
test_opro_api.py
Normal file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for TRUE OPRO API endpoints.
|
||||
|
||||
This script tests the complete OPRO workflow:
|
||||
1. Create OPRO run
|
||||
2. Generate initial candidates
|
||||
3. Evaluate candidates
|
||||
4. Generate optimized candidates
|
||||
5. View results
|
||||
|
||||
Usage:
|
||||
python test_opro_api.py
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
|
||||
BASE_URL = "http://127.0.0.1:8010"
|
||||
|
||||
def print_section(title):
|
||||
"""Print a section header."""
|
||||
print("\n" + "=" * 60)
|
||||
print(f" {title}")
|
||||
print("=" * 60)
|
||||
|
||||
def test_opro_workflow():
|
||||
"""Test the complete OPRO workflow."""
|
||||
|
||||
print_section("1. Create OPRO Run")
|
||||
|
||||
# Create a new OPRO run
|
||||
create_req = {
|
||||
"task_description": "将用户输入的中文翻译成英文,要求准确自然",
|
||||
"test_cases": [
|
||||
{"input": "你好", "expected_output": "Hello"},
|
||||
{"input": "谢谢", "expected_output": "Thank you"},
|
||||
{"input": "早上好", "expected_output": "Good morning"},
|
||||
{"input": "晚安", "expected_output": "Good night"},
|
||||
{"input": "再见", "expected_output": "Goodbye"}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(f"{BASE_URL}/opro/create", json=create_req)
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success"):
|
||||
print(f"❌ Failed to create OPRO run: {result}")
|
||||
return
|
||||
|
||||
run_id = result["data"]["run_id"]
|
||||
print(f"✅ Created OPRO run: {run_id}")
|
||||
print(f" Task: {result['data']['task_description']}")
|
||||
print(f" Test cases: {result['data']['num_test_cases']}")
|
||||
|
||||
# ========================================================================
|
||||
print_section("2. Generate Initial Candidates")
|
||||
|
||||
iterate_req = {"run_id": run_id, "top_k": 5}
|
||||
response = requests.post(f"{BASE_URL}/opro/iterate", json=iterate_req)
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success"):
|
||||
print(f"❌ Failed to generate candidates: {result}")
|
||||
return
|
||||
|
||||
candidates = result["data"]["candidates"]
|
||||
print(f"✅ Generated {len(candidates)} initial candidates:")
|
||||
for i, candidate in enumerate(candidates, 1):
|
||||
print(f"\n [{i}] {candidate[:100]}...")
|
||||
|
||||
# ========================================================================
|
||||
print_section("3. Evaluate Candidates")
|
||||
|
||||
scores = []
|
||||
for i, candidate in enumerate(candidates, 1):
|
||||
print(f"\n Evaluating candidate {i}/{len(candidates)}...")
|
||||
|
||||
eval_req = {
|
||||
"run_id": run_id,
|
||||
"instruction": candidate
|
||||
}
|
||||
|
||||
response = requests.post(f"{BASE_URL}/opro/evaluate", json=eval_req)
|
||||
result = response.json()
|
||||
|
||||
if result.get("success"):
|
||||
score = result["data"]["score"]
|
||||
scores.append(score)
|
||||
is_best = "🏆" if result["data"]["is_new_best"] else ""
|
||||
print(f" ✅ Score: {score:.4f} {is_best}")
|
||||
else:
|
||||
print(f" ❌ Evaluation failed: {result}")
|
||||
|
||||
time.sleep(0.5) # Small delay to avoid overwhelming the API
|
||||
|
||||
print(f"\n Average score: {sum(scores)/len(scores):.4f}")
|
||||
print(f" Best score: {max(scores):.4f}")
|
||||
|
||||
# ========================================================================
|
||||
print_section("4. Generate Optimized Candidates (Iteration 2)")
|
||||
|
||||
print(" Generating candidates based on performance trajectory...")
|
||||
|
||||
iterate_req = {"run_id": run_id, "top_k": 5}
|
||||
response = requests.post(f"{BASE_URL}/opro/iterate", json=iterate_req)
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success"):
|
||||
print(f"❌ Failed to generate optimized candidates: {result}")
|
||||
return
|
||||
|
||||
optimized_candidates = result["data"]["candidates"]
|
||||
print(f"✅ Generated {len(optimized_candidates)} optimized candidates:")
|
||||
for i, candidate in enumerate(optimized_candidates, 1):
|
||||
print(f"\n [{i}] {candidate[:100]}...")
|
||||
|
||||
# ========================================================================
|
||||
print_section("5. View Run Details")
|
||||
|
||||
response = requests.get(f"{BASE_URL}/opro/run/{run_id}")
|
||||
result = response.json()
|
||||
|
||||
if not result.get("success"):
|
||||
print(f"❌ Failed to get run details: {result}")
|
||||
return
|
||||
|
||||
data = result["data"]
|
||||
print(f"✅ OPRO Run Details:")
|
||||
print(f" Run ID: {data['run_id']}")
|
||||
print(f" Task: {data['task_description']}")
|
||||
print(f" Iteration: {data['iteration']}")
|
||||
print(f" Status: {data['status']}")
|
||||
print(f" Best Score: {data['best_score']:.4f}")
|
||||
print(f"\n Best Instruction:")
|
||||
print(f" {data['best_instruction'][:200]}...")
|
||||
|
||||
print(f"\n Top 5 Trajectory:")
|
||||
for i, item in enumerate(data['trajectory'][:5], 1):
|
||||
print(f" [{i}] Score: {item['score']:.4f}")
|
||||
print(f" {item['instruction'][:80]}...")
|
||||
|
||||
# ========================================================================
|
||||
print_section("6. List All Runs")
|
||||
|
||||
response = requests.get(f"{BASE_URL}/opro/runs")
|
||||
result = response.json()
|
||||
|
||||
if result.get("success"):
|
||||
runs = result["data"]["runs"]
|
||||
print(f"✅ Total OPRO runs: {result['data']['total']}")
|
||||
for run in runs:
|
||||
print(f"\n Run: {run['run_id']}")
|
||||
print(f" Task: {run['task_description'][:50]}...")
|
||||
print(f" Iteration: {run['iteration']}, Best Score: {run['best_score']:.4f}")
|
||||
|
||||
print_section("✅ OPRO Workflow Test Complete!")
|
||||
print(f"\nRun ID: {run_id}")
|
||||
print("You can view details at:")
|
||||
print(f" {BASE_URL}/opro/run/{run_id}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print(" TRUE OPRO API Test")
|
||||
print("=" * 60)
|
||||
print(f"\nBase URL: {BASE_URL}")
|
||||
print("\nMake sure the API server is running:")
|
||||
print(" uvicorn _qwen_xinference_demo.api:app --host 127.0.0.1 --port 8010")
|
||||
print("\nStarting test in 3 seconds...")
|
||||
time.sleep(3)
|
||||
|
||||
try:
|
||||
test_opro_workflow()
|
||||
except requests.exceptions.ConnectionError:
|
||||
print("\n❌ ERROR: Could not connect to API server")
|
||||
print("Please start the server first:")
|
||||
print(" uvicorn _qwen_xinference_demo.api:app --host 127.0.0.1 --port 8010")
|
||||
except Exception as e:
|
||||
print(f"\n❌ ERROR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
Reference in New Issue
Block a user