refactor: replace OPRO with simple iterative refinement

Major changes:
- Remove fake OPRO evaluation (no more fake 0.5 scores)
- Add simple refinement based on user selection
- New endpoint: POST /opro/refine (selected + rejected instructions)
- Update prompt generation to focus on comprehensive coverage instead of style variety
- All generated instructions now start with role definition (你是一个...)
- Update README to reflect new approach and API endpoints

Technical details:
- Added refine_based_on_selection() in prompt_utils.py
- Added refine_instruction_candidates() in user_prompt_optimizer.py
- Added OPRORefineReq model and /opro/refine endpoint in api.py
- Updated frontend handleContinueOptimize() to use new refinement flow
- Changed prompt requirements from 'different styles' to 'comprehensive coverage'
- Added role definition requirement as first item in all prompt templates
This commit is contained in:
2025-12-08 09:43:20 +08:00
parent 602875b08c
commit 65cdcf29dc
5 changed files with 315 additions and 86 deletions

View File

@@ -304,41 +304,55 @@
createNewRun(msg);
}
async function handleContinueOptimize(selectedInstruction, selectedScore) {
if (!currentRunId || loading) return;
async function handleContinueOptimize(selectedInstruction, allCandidates) {
if (!currentRunId || loading || !selectedInstruction) return;
// First, evaluate the selected instruction to add it to trajectory
if (selectedInstruction) {
setLoading(true);
try {
// Add the selected instruction to trajectory
const res = await fetch(`${API_BASE}/opro/evaluate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
run_id: currentRunId,
instruction: selectedInstruction
})
});
const data = await res.json();
setLoading(true);
try {
// Get rejected instructions (all except the selected one)
const rejectedInstructions = allCandidates
.map(c => c.instruction)
.filter(inst => inst !== selectedInstruction);
if (!data.success) {
throw new Error(data.error || 'Failed to evaluate instruction');
}
// Call the refinement endpoint
const res = await fetch(`${API_BASE}/opro/refine`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
run_id: currentRunId,
selected_instruction: selectedInstruction,
rejected_instructions: rejectedInstructions
})
});
const data = await res.json();
console.log('Evaluated instruction, score:', data.data.score);
} catch (err) {
alert('评估指令失败: ' + err.message);
console.error('Error evaluating instruction:', err);
setLoading(false);
return;
} finally {
setLoading(false);
if (!data.success) {
throw new Error(data.error || 'Failed to refine instruction');
}
}
// Then generate new candidates based on updated trajectory
await generateCandidates(currentRunId);
// Add refined candidates to messages
const newMessage = {
role: 'assistant',
type: 'candidates',
iteration: data.data.iteration,
candidates: data.data.candidates
};
setMessages(prev => {
const updated = [...prev, newMessage];
// Save to session messages
setSessionMessages(prevSessions => ({
...prevSessions,
[currentSessionId]: updated
}));
return updated;
});
} catch (err) {
alert('优化失败: ' + err.message);
console.error('Error refining instruction:', err);
} finally {
setLoading(false);
}
}
function handleExecute(instruction) {
@@ -537,7 +551,7 @@
),
React.createElement('div', { className: 'flex gap-2' },
React.createElement('button', {
onClick: () => handleContinueOptimize(cand.instruction, cand.score),
onClick: () => handleContinueOptimize(cand.instruction, msg.candidates),
disabled: loading,
className: 'px-4 py-2 bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 disabled:bg-gray-100 disabled:text-gray-400 disabled:cursor-not-allowed transition-colors text-sm font-medium'
}, '继续优化'),