More ollama fixes

2026-05-14 21:55:19 -04:00
parent 323e7593ef
commit dab3f293a1
1 changed files with 102 additions and 285 deletions
@@ -1,11 +1,11 @@
-import json, os, logging, base64, time, hashlib, codecs, re, http.client, ssl
+import json, os, logging, base64, time, hashlib, re, http.client, ssl
 from urllib.parse import urlparse
 from searx import network
 try:
    from searx.network import get_network
 except ImportError:
    get_network = None
-from flask import Response, request, abort, jsonify, stream_with_context
+from flask import Response, request, abort, jsonify
 from searx.plugins import Plugin, PluginInfo
 from searx.result_types import EngineResults
 from searx import settings
@@ -537,8 +537,46 @@ FRONTEND_JS_TEMPLATE = r"""
                return;
            }
-            const reader = res.body.getReader();
+            const respJson = await res.json();
-            const decoder = new TextDecoder();
+
            if (respJson.error) {
                const cursorErr = data.querySelector('.sxng-cursor');
                if (cursorErr) cursorErr.remove();
                const errSpan = document.createElement('span');
                errSpan.style.color = '#bf616a';
                errSpan.textContent = "⚠️ " + respJson.error;
                data.appendChild(errSpan);
                return;
            }
            const fullText = (respJson.text || '').trim();
            if (!fullText) {
                const cursorErr = data.querySelector('.sxng-cursor');
                if (cursorErr) cursorErr.remove();
                const errSpan = document.createElement('span');
                errSpan.style.color = '#bf616a';
                errSpan.textContent = 'No response received. Check API configuration and server logs.';
                data.appendChild(errSpan);
                return;
            }
            let mainText = fullText;
            const thinkMatch = mainText.match(/^<think>([\s\S]*?)<\/think>\s*/);
            if (thinkMatch) {
                const cursorTh = data.querySelector('.sxng-cursor');
                const details = document.createElement('details');
                details.className = 'sxng-reasoning';
                details.innerHTML = '<summary>Thought Process</summary>';
                const thoughtDiv = document.createElement('div');
                thoughtDiv.className = 'sxng-thought-content';
                thoughtDiv.textContent = thinkMatch[1];
                details.appendChild(thoughtDiv);
                if (cursorTh) cursorTh.before(details);
                else data.appendChild(details);
                mainText = mainText.substring(thinkMatch[0].length);
            }
            let cursor = data.querySelector('.sxng-cursor');
            if (!cursor) {
                cursor = document.createElement('span');
@@ -546,10 +584,6 @@ FRONTEND_JS_TEMPLATE = r"""
                data.appendChild(cursor);
            }
            let started = false;
            let collectedResponse = '';
            let isThinking = false, thoughtDiv = null;
            let buffer = '';
            const flushBuffer = (force = false) => {
                if (!buffer) return;
@@ -611,105 +645,23 @@ FRONTEND_JS_TEMPLATE = r"""
                }
            };
-            let streamBuffer = '';
+            let twPos = 0;
-            while (true) {
+            const twBatch = 4;
-                const {done, value} = await reader.read();
+            await new Promise(resolve => {
-                if (done) break;
+                function twTick() {
-
+                    if (twPos >= mainText.length) {
-                clearTimeout(timeoutId);
+                        flushBuffer(true);
-                timeoutId = setTimeout(() => controller.abort(), 60000);
+                        resolve();
-
+                        return;
                const chunk = decoder.decode(value, {stream: true});
                if (!chunk) continue;
                streamBuffer += chunk;
                if (streamBuffer.match(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/)) {
                    continue; 
                }
                while (true) {
                    const openIdx = streamBuffer.indexOf('<think>');
                    const closeIdx = streamBuffer.indexOf('</think>');
                    if (openIdx === -1 && closeIdx === -1) break;
                    if (!isThinking) {
                        if (openIdx !== -1 && (closeIdx === -1 || openIdx < closeIdx)) {
                            const preTag = streamBuffer.substring(0, openIdx);
                            if (preTag) {
                                if (!started) {
                                    const trimmed = preTag.replace(/^[\s.,;:!?]+/, '');
                                    if (trimmed || collectedResponse.trim()) {
                                        if (cursor && !cursor.isConnected) data.appendChild(cursor);
                                        started = true;
                                    }
                                }
                                if (started) {
                                    buffer += preTag;
                                    flushBuffer(false);
                                }
                                collectedResponse += preTag;
                            }
                            isThinking = true;
                            const details = document.createElement('details');
                            details.className = 'sxng-reasoning';
                            details.innerHTML = '<summary>Thought Process</summary>';
                            thoughtDiv = document.createElement('div');
                            thoughtDiv.className = 'sxng-thought-content';
                            details.appendChild(thoughtDiv);
                            (cursor ? cursor.before(details) : data.appendChild(details));
                            streamBuffer = streamBuffer.substring(openIdx + 7);
                        } else {
                            streamBuffer = streamBuffer.replace('</think>', '');
                        }
                    } else {
                        if (closeIdx !== -1 && (openIdx === -1 || closeIdx < openIdx)) {
                            const thoughtText = streamBuffer.substring(0, closeIdx);
                            if (thoughtDiv) thoughtDiv.textContent += thoughtText;
                            isThinking = false;
                            streamBuffer = streamBuffer.substring(closeIdx + 8);
                        } else {
                            streamBuffer = streamBuffer.replace('<think>', '');
                        }
                    }
                    const end = Math.min(twPos + twBatch, mainText.length);
                    buffer += mainText.substring(twPos, end);
                    twPos = end;
                    flushBuffer(false);
                    setTimeout(twTick, 8);
                }
-
+                twTick();
-                if (streamBuffer.length > 0) {
+            });
                    if (isThinking && thoughtDiv) {
                        thoughtDiv.textContent += streamBuffer;
                    } else {
                        if (!started) {
                            const trimmed = streamBuffer.replace(/^[\s.,;:!?]+/, '');
                            if (trimmed || collectedResponse.trim()) {
                                if (cursor && !cursor.isConnected) data.appendChild(cursor);
                                started = true;
                            }
                        }
                        if (started) {
                            buffer += streamBuffer;
                            flushBuffer(false);
                        }
                        collectedResponse += streamBuffer; 
                    }
                    streamBuffer = '';
                }
            }
            if (streamBuffer.length > 0) {
                streamBuffer = streamBuffer.replace(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/, '');
                if (streamBuffer.length > 0) {
                    if (isThinking && thoughtDiv) {
                        thoughtDiv.textContent += streamBuffer;
                    } else {
                        buffer += streamBuffer;
                        collectedResponse += streamBuffer;
                    }
                }
            }
            flushBuffer(true);
            if (cursor) cursor.remove();
@@ -725,21 +677,7 @@ FRONTEND_JS_TEMPLATE = r"""
                }
            }
-            if (!started && !collectedResponse.trim()) {
+            const collectedResponse = mainText;
                const cursor = data.querySelector('.sxng-cursor');
                if (cursor) cursor.remove();
                const errSpan = document.createElement('span');
                if (thoughtDiv && thoughtDiv.textContent.trim().length > 0) {
                    errSpan.style.color = '#ebcb8b';
                    errSpan.textContent = 'Model provided reasoning but stopped before the final answer. Try adjusting token limits.';
                } else {
                    errSpan.style.color = '#bf616a';
                    errSpan.textContent = 'No response received. Check API configuration and server logs.';
                }
                data.appendChild(errSpan);
                return;
            }
            __INTERACTIVE_JS_COMPLETE__
@@ -1163,103 +1101,48 @@ class SXNGPlugin(Plugin):
 {numbered_instructions}
 </CORE_DIRECTIVES>"""
-            def stream_gemini():
+            def call_gemini():
-                yield ""
+                base = self.endpoint_url.replace('streamGenerateContent', 'generateContent')
-                if '?' in self.endpoint_url:
+                url = f"{base}&key={self.api_key}" if '?' in base else f"{base}?key={self.api_key}"
                    url = f"{self.endpoint_url}&key={self.api_key}"
                else:
                    url = f"{self.endpoint_url}?key={self.api_key}"
                conn = None
                try:
                    conn, path = _get_streaming_connection(url)
-                    payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}})
+                    payload = json.dumps({
                        "contents": [{"parts": [{"text": prompt}]}],
                        "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}
                    })
                    conn.request("POST", path, body=payload.encode('utf-8'), headers={"Content-Type": "application/json"})
                    res = conn.getresponse()
                    if res.status != 200:
                        body = res.read(2048).decode('utf-8', errors='replace')[:500]
                        logger.error(f"{PLUGIN_NAME}: Gemini API {res.status}: {body}")
-                        yield f"\n⚠️ API error {res.status}. Check server logs.\n"
+                        return '', f"API error {res.status}. Check server logs."
-                        return
+                    obj = json.loads(res.read().decode('utf-8', errors='replace'))
-
+                    if obj.get('promptFeedback', {}).get('blockReason'):
-                    decoder = json.JSONDecoder()
+                        return '', f"Gemini blocked prompt: {obj['promptFeedback']['blockReason']}"
-                    utf8_decoder = codecs.getincrementaldecoder('utf-8')(errors='replace')
+                    candidates = obj.get('candidates', [])
-                    buffer = ""
+                    if not candidates:
-                    while True:
+                        return '', "No candidates in Gemini response."
-                        chunk = res.read(STREAM_CHUNK_SIZE)
+                    first = candidates[0]
-                        if not chunk: 
+                    if first.get('finishReason') == 'SAFETY':
-                            buffer += utf8_decoder.decode(b'', final=True)
+                        return '', "Gemini stopped generation due to safety filters."
-                            break
+                    parts = first.get('content', {}).get('parts', [])
-                        buffer += utf8_decoder.decode(chunk)
+                    text = ''.join(p.get('text', '') for p in parts if isinstance(p, dict))
-                        while buffer:
+                    return text, None
                            buffer = buffer.lstrip()
                            if buffer.startswith('['):
                                buffer = buffer[1:].lstrip()
                            elif buffer.startswith(','):
                                buffer = buffer[1:].lstrip()
                            elif buffer.startswith(']'):
                                buffer = buffer[1:].lstrip()
                            if not buffer: break
                            try:
                                obj, idx = decoder.raw_decode(buffer)
                                items = obj if isinstance(obj, list) else [obj]
                                for item in items:
                                    if not isinstance(item, dict):
                                        continue
                                    if 'promptFeedback' in item and item['promptFeedback'].get('blockReason'):
                                        yield f"\n⚠️ Gemini blocked prompt. Reason: {item['promptFeedback']['blockReason']}\n"
                                        return
                                    candidates = item.get('candidates')
                                    if not isinstance(candidates, list) or len(candidates) == 0:
                                        continue
                                    first_candidate = candidates[0]
                                    if not isinstance(first_candidate, dict):
                                        continue
                                    if first_candidate.get('finishReason') == 'SAFETY':
                                        yield "\n⚠️ Gemini stopped generation due to safety filters.\n"
                                        return
                                    content = first_candidate.get('content')
                                    if not isinstance(content, dict):
                                        continue
                                    parts = content.get('parts')
                                    if not isinstance(parts, list) or len(parts) == 0:
                                        continue
                                    first_part = parts[0]
                                    if isinstance(first_part, dict):
                                        text = first_part.get('text')
                                        if text and isinstance(text, str):
                                            yield text
                                buffer = buffer[idx:]
                            except json.JSONDecodeError: 
                                break
                            except Exception as parse_err:
                                logger.debug(f"{PLUGIN_NAME}: Ignored malformed Gemini chunk. Error: {parse_err}")
                                break
                except Exception as e:
-                    logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}", exc_info=True)
+                    logger.error(f"{PLUGIN_NAME}: Gemini call error: {e}", exc_info=True)
-                    yield f"\n⚠️ Connection Error: {e}\n"
+                    return '', f"Connection Error: {e}"
                finally:
                    if conn: conn.close()
-            def stream_openai_compatible():
+            def call_openai_compatible():
                yield ""
                conn = None
                try:
                    conn, path = _get_streaming_connection(self.endpoint_url)
                    payload_dict = {
                        "model": effective_model,
                        "messages": [{"role": "user", "content": prompt}],
-                        "stream": True,
+                        "stream": False,
                        "max_tokens": self.max_tokens,
                        "temperature": self.temperature
                    }
@@ -1268,7 +1151,6 @@ class SXNGPlugin(Plugin):
                    payload = json.dumps(payload_dict)
                    headers = {
                        "Content-Type": "application/json",
                        "Accept": "text/event-stream",
                        "HTTP-Referer": "https://github.com/searxng/searxng",
                        "X-Title": "SearXNG"
                    }
@@ -1278,101 +1160,36 @@ class SXNGPlugin(Plugin):
                        headers['Authorization'] = f"Bearer {self.api_key}"
                    conn.request("POST", path, body=payload.encode('utf-8'), headers=headers)
                    res = conn.getresponse()
                    if res.status != 200:
                        body = res.read(2048).decode('utf-8', errors='replace')[:500]
                        logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status}: {body}")
-                        yield f"\n⚠️ API error {res.status}. Check server logs.\n"
+                        return '', f"API error {res.status}. Check server logs."
-                        return
+                    obj = json.loads(res.read().decode('utf-8', errors='replace'))
-
+                    if "error" in obj:
-                    decoder = json.JSONDecoder()
+                        err = obj["error"]
-                    in_reasoning_block = False
+                        msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
-                    
+                        return '', f"API Error: {msg}"
-                    while True:
+                    choices = obj.get("choices", [])
-                        line_bytes = res.readline()
+                    if not choices:
-                        if not line_bytes: break
+                        return '', "No choices in API response."
-                        
+                    message = choices[0].get("message", {})
-                        line = line_bytes.decode('utf-8', errors='replace').strip()
+                    content = message.get("content") or ""
-                        if not line: 
+                    reasoning = message.get("reasoning_content") or ""
-                            continue
+                    full = (f"<think>\n{reasoning}\n</think>\n\n" if reasoning else "") + content
-                            
+                    return full, None
                        if line.startswith("data: "):
                            data_str = line[6:].strip()
                            if data_str == "[DONE]":
                                if in_reasoning_block:
                                    yield "\n</think>\n\n"
                                return
                            try:
                                obj, _ = decoder.raw_decode(data_str)
                                if not isinstance(obj, dict):
                                    continue
                                # Catch upstream errors
                                if "error" in obj:
                                    err_msg = obj["error"].get("message", str(obj["error"])) if isinstance(obj["error"], dict) else str(obj["error"])
                                    yield f"\n⚠️ API Error: {err_msg}\n"
                                    return
                                choices = obj.get("choices")
                                if not isinstance(choices, list) or len(choices) == 0:
                                    continue
                                choice = choices[0]
                                if not isinstance(choice, dict):
                                    continue
                                delta = choice.get("delta")
                                if not isinstance(delta, dict):
                                    continue
                                reasoning = delta.get("reasoning_content")
                                content = delta.get("content")
                                if reasoning and isinstance(reasoning, str):
                                    if not in_reasoning_block:
                                        yield "<think>\n"
                                        in_reasoning_block = True
                                    yield reasoning
                                if content and isinstance(content, str):
                                    if in_reasoning_block:
                                        yield "\n</think>\n\n"
                                        in_reasoning_block = False
                                    yield content
                            except json.JSONDecodeError:
                                pass
                            except Exception as parse_err:
                                logger.debug(f"{PLUGIN_NAME}: Ignored malformed OpenAI chunk. Error: {parse_err}")
                                pass
                    if in_reasoning_block:
                        yield "\n</think>\n\n"
                except Exception as e:
-                    logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}", exc_info=True)
+                    logger.error(f"{PLUGIN_NAME}: {self.provider} call error: {e}", exc_info=True)
-                    yield f"\n⚠️ Connection Error: {e}\n"
+                    return '', f"Connection Error: {e}"
                finally:
                    if conn: conn.close()
-            generator = stream_gemini if self.is_gemini else stream_openai_compatible
+            call_fn = call_gemini if self.is_gemini else call_openai_compatible
            text, error = call_fn()
            if self.provider == 'ollama' and getattr(self, 'ollama_unload_after', False):
                self._ollama_unload_model()
-                gen_fn = generator
+            return jsonify({"text": text, "error": error})
                def generator():
                    try:
                        yield from gen_fn()
                    finally:
                        self._ollama_unload_model()
            return Response(stream_with_context(generator()), mimetype='text/event-stream', headers={
                'X-Accel-Buffering': 'no',
                'Cache-Control': 'no-cache, no-store',
                'Connection': 'keep-alive'
            })
        return True
    def _assemble_context(self, clean_results, infoboxes, answers, offset=0) -> tuple[str, list]: