refactor: replace OPRO with simple iterative refinement

Major changes: - Remove fake OPRO evaluation (no more fake 0.5 scores) - Add simple refinement based on user selection - New endpoint: POST /opro/refine (selected + rejected instructions) - Update prompt generation to focus on comprehensive coverage instead of style variety - All generated instructions now start with role definition (你是一个...) - Update README to reflect new approach and API endpoints Technical details: - Added refine_based_on_selection() in prompt_utils.py - Added refine_instruction_candidates() in user_prompt_optimizer.py - Added OPRORefineReq model and /opro/refine endpoint in api.py - Updated frontend handleContinueOptimize() to use new refinement flow - Changed prompt requirements from 'different styles' to 'comprehensive coverage' - Added role definition requirement as first item in all prompt templates
2025-12-08 09:43:20 +08:00
parent 602875b08c
commit 65cdcf29dc
5 changed files with 315 additions and 86 deletions
--- a/frontend/opro.html
+++ b/frontend/opro.html
@@ -304,41 +304,55 @@
                createNewRun(msg);
            }

-            async function handleContinueOptimize(selectedInstruction, selectedScore) {
-                if (!currentRunId || loading) return;
+            async function handleContinueOptimize(selectedInstruction, allCandidates) {
+                if (!currentRunId || loading || !selectedInstruction) return;

-                // First, evaluate the selected instruction to add it to trajectory
-                if (selectedInstruction) {
-                    setLoading(true);
-                    try {
-                        // Add the selected instruction to trajectory
-                        const res = await fetch(`${API_BASE}/opro/evaluate`, {
-                            method: 'POST',
-                            headers: { 'Content-Type': 'application/json' },
-                            body: JSON.stringify({
-                                run_id: currentRunId,
-                                instruction: selectedInstruction
-                            })
-                        });
-                        const data = await res.json();
+                setLoading(true);
+                try {
+                    // Get rejected instructions (all except the selected one)
+                    const rejectedInstructions = allCandidates
+                        .map(c => c.instruction)
+                        .filter(inst => inst !== selectedInstruction);

-                        if (!data.success) {
-                            throw new Error(data.error || 'Failed to evaluate instruction');
-                        }
+                    // Call the refinement endpoint
+                    const res = await fetch(`${API_BASE}/opro/refine`, {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({
+                            run_id: currentRunId,
+                            selected_instruction: selectedInstruction,
+                            rejected_instructions: rejectedInstructions
+                        })
+                    });
+                    const data = await res.json();

-                        console.log('Evaluated instruction, score:', data.data.score);
-                    } catch (err) {
-                        alert('评估指令失败: ' + err.message);
-                        console.error('Error evaluating instruction:', err);
-                        setLoading(false);
-                        return;
-                    } finally {
-                        setLoading(false);
+                    if (!data.success) {
+                        throw new Error(data.error || 'Failed to refine instruction');
                    }
-                }

-                // Then generate new candidates based on updated trajectory
-                await generateCandidates(currentRunId);
+                    // Add refined candidates to messages
+                    const newMessage = {
+                        role: 'assistant',
+                        type: 'candidates',
+                        iteration: data.data.iteration,
+                        candidates: data.data.candidates
+                    };
+
+                    setMessages(prev => {
+                        const updated = [...prev, newMessage];
+                        // Save to session messages
+                        setSessionMessages(prevSessions => ({
+                            ...prevSessions,
+                            [currentSessionId]: updated
+                        }));
+                        return updated;
+                    });
+                } catch (err) {
+                    alert('优化失败: ' + err.message);
+                    console.error('Error refining instruction:', err);
+                } finally {
+                    setLoading(false);
+                }
            }

            function handleExecute(instruction) {
@@ -537,7 +551,7 @@
                                                        ),
                                                        React.createElement('div', { className: 'flex gap-2' },
                                                            React.createElement('button', {
-                                                                onClick: () => handleContinueOptimize(cand.instruction, cand.score),
+                                                                onClick: () => handleContinueOptimize(cand.instruction, msg.candidates),
                                                                disabled: loading,
                                                                className: 'px-4 py-2 bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 disabled:bg-gray-100 disabled:text-gray-400 disabled:cursor-not-allowed transition-colors text-sm font-medium'
                                                            }, '继续优化'),