From dab3f293a14ba7a533e58192ab40dd1fd588fbd3 Mon Sep 17 00:00:00 2001 From: Tyler <68524461+TySP-Dev@users.noreply.github.com> Date: Thu, 14 May 2026 21:55:19 -0400 Subject: [PATCH] More ollama fixes --- ai_answers.py | 387 +++++++++++++------------------------------------- 1 file changed, 102 insertions(+), 285 deletions(-) diff --git a/ai_answers.py b/ai_answers.py index 915d6f1..eb2778f 100644 --- a/ai_answers.py +++ b/ai_answers.py @@ -1,11 +1,11 @@ -import json, os, logging, base64, time, hashlib, codecs, re, http.client, ssl +import json, os, logging, base64, time, hashlib, re, http.client, ssl from urllib.parse import urlparse from searx import network try: from searx.network import get_network except ImportError: get_network = None -from flask import Response, request, abort, jsonify, stream_with_context +from flask import Response, request, abort, jsonify from searx.plugins import Plugin, PluginInfo from searx.result_types import EngineResults from searx import settings @@ -537,8 +537,46 @@ FRONTEND_JS_TEMPLATE = r""" return; } - const reader = res.body.getReader(); - const decoder = new TextDecoder(); + const respJson = await res.json(); + + if (respJson.error) { + const cursorErr = data.querySelector('.sxng-cursor'); + if (cursorErr) cursorErr.remove(); + const errSpan = document.createElement('span'); + errSpan.style.color = '#bf616a'; + errSpan.textContent = "⚠️ " + respJson.error; + data.appendChild(errSpan); + return; + } + + const fullText = (respJson.text || '').trim(); + + if (!fullText) { + const cursorErr = data.querySelector('.sxng-cursor'); + if (cursorErr) cursorErr.remove(); + const errSpan = document.createElement('span'); + errSpan.style.color = '#bf616a'; + errSpan.textContent = 'No response received. Check API configuration and server logs.'; + data.appendChild(errSpan); + return; + } + + let mainText = fullText; + const thinkMatch = mainText.match(/^([\s\S]*?)<\/think>\s*/); + if (thinkMatch) { + const cursorTh = data.querySelector('.sxng-cursor'); + const details = document.createElement('details'); + details.className = 'sxng-reasoning'; + details.innerHTML = 'Thought Process'; + const thoughtDiv = document.createElement('div'); + thoughtDiv.className = 'sxng-thought-content'; + thoughtDiv.textContent = thinkMatch[1]; + details.appendChild(thoughtDiv); + if (cursorTh) cursorTh.before(details); + else data.appendChild(details); + mainText = mainText.substring(thinkMatch[0].length); + } + let cursor = data.querySelector('.sxng-cursor'); if (!cursor) { cursor = document.createElement('span'); @@ -546,10 +584,6 @@ FRONTEND_JS_TEMPLATE = r""" data.appendChild(cursor); } - let started = false; - let collectedResponse = ''; - let isThinking = false, thoughtDiv = null; - let buffer = ''; const flushBuffer = (force = false) => { if (!buffer) return; @@ -611,106 +645,24 @@ FRONTEND_JS_TEMPLATE = r""" } }; - let streamBuffer = ''; - while (true) { - const {done, value} = await reader.read(); - if (done) break; - - clearTimeout(timeoutId); - timeoutId = setTimeout(() => controller.abort(), 60000); - - const chunk = decoder.decode(value, {stream: true}); - if (!chunk) continue; - - streamBuffer += chunk; - - if (streamBuffer.match(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/)) { - continue; - } - - while (true) { - const openIdx = streamBuffer.indexOf(''); - const closeIdx = streamBuffer.indexOf(''); - - if (openIdx === -1 && closeIdx === -1) break; - - if (!isThinking) { - if (openIdx !== -1 && (closeIdx === -1 || openIdx < closeIdx)) { - const preTag = streamBuffer.substring(0, openIdx); - if (preTag) { - if (!started) { - const trimmed = preTag.replace(/^[\s.,;:!?]+/, ''); - if (trimmed || collectedResponse.trim()) { - if (cursor && !cursor.isConnected) data.appendChild(cursor); - started = true; - } - } - if (started) { - buffer += preTag; - flushBuffer(false); - } - collectedResponse += preTag; - } - isThinking = true; - const details = document.createElement('details'); - details.className = 'sxng-reasoning'; - details.innerHTML = 'Thought Process'; - thoughtDiv = document.createElement('div'); - thoughtDiv.className = 'sxng-thought-content'; - details.appendChild(thoughtDiv); - (cursor ? cursor.before(details) : data.appendChild(details)); - - streamBuffer = streamBuffer.substring(openIdx + 7); - } else { - streamBuffer = streamBuffer.replace('', ''); - } - } else { - if (closeIdx !== -1 && (openIdx === -1 || closeIdx < openIdx)) { - const thoughtText = streamBuffer.substring(0, closeIdx); - if (thoughtDiv) thoughtDiv.textContent += thoughtText; - isThinking = false; - streamBuffer = streamBuffer.substring(closeIdx + 8); - } else { - streamBuffer = streamBuffer.replace('', ''); - } + let twPos = 0; + const twBatch = 4; + await new Promise(resolve => { + function twTick() { + if (twPos >= mainText.length) { + flushBuffer(true); + resolve(); + return; } + const end = Math.min(twPos + twBatch, mainText.length); + buffer += mainText.substring(twPos, end); + twPos = end; + flushBuffer(false); + setTimeout(twTick, 8); } + twTick(); + }); - if (streamBuffer.length > 0) { - if (isThinking && thoughtDiv) { - thoughtDiv.textContent += streamBuffer; - } else { - if (!started) { - const trimmed = streamBuffer.replace(/^[\s.,;:!?]+/, ''); - if (trimmed || collectedResponse.trim()) { - if (cursor && !cursor.isConnected) data.appendChild(cursor); - started = true; - } - } - if (started) { - buffer += streamBuffer; - flushBuffer(false); - } - collectedResponse += streamBuffer; - } - streamBuffer = ''; - } - } - - if (streamBuffer.length > 0) { - streamBuffer = streamBuffer.replace(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/, ''); - if (streamBuffer.length > 0) { - if (isThinking && thoughtDiv) { - thoughtDiv.textContent += streamBuffer; - } else { - buffer += streamBuffer; - collectedResponse += streamBuffer; - } - } - } - - flushBuffer(true); - if (cursor) cursor.remove(); let last = data.lastChild; @@ -725,21 +677,7 @@ FRONTEND_JS_TEMPLATE = r""" } } - if (!started && !collectedResponse.trim()) { - const cursor = data.querySelector('.sxng-cursor'); - if (cursor) cursor.remove(); - - const errSpan = document.createElement('span'); - if (thoughtDiv && thoughtDiv.textContent.trim().length > 0) { - errSpan.style.color = '#ebcb8b'; - errSpan.textContent = 'Model provided reasoning but stopped before the final answer. Try adjusting token limits.'; - } else { - errSpan.style.color = '#bf616a'; - errSpan.textContent = 'No response received. Check API configuration and server logs.'; - } - data.appendChild(errSpan); - return; - } + const collectedResponse = mainText; __INTERACTIVE_JS_COMPLETE__ @@ -1163,103 +1101,48 @@ class SXNGPlugin(Plugin): {numbered_instructions} """ - def stream_gemini(): - yield "​" - if '?' in self.endpoint_url: - url = f"{self.endpoint_url}&key={self.api_key}" - else: - url = f"{self.endpoint_url}?key={self.api_key}" - + def call_gemini(): + base = self.endpoint_url.replace('streamGenerateContent', 'generateContent') + url = f"{base}&key={self.api_key}" if '?' in base else f"{base}?key={self.api_key}" conn = None try: conn, path = _get_streaming_connection(url) - payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}}) + payload = json.dumps({ + "contents": [{"parts": [{"text": prompt}]}], + "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature} + }) conn.request("POST", path, body=payload.encode('utf-8'), headers={"Content-Type": "application/json"}) res = conn.getresponse() - if res.status != 200: body = res.read(2048).decode('utf-8', errors='replace')[:500] logger.error(f"{PLUGIN_NAME}: Gemini API {res.status}: {body}") - yield f"\n⚠️ API error {res.status}. Check server logs.\n" - return - - decoder = json.JSONDecoder() - utf8_decoder = codecs.getincrementaldecoder('utf-8')(errors='replace') - buffer = "" - while True: - chunk = res.read(STREAM_CHUNK_SIZE) - if not chunk: - buffer += utf8_decoder.decode(b'', final=True) - break - buffer += utf8_decoder.decode(chunk) - while buffer: - buffer = buffer.lstrip() - if buffer.startswith('['): - buffer = buffer[1:].lstrip() - elif buffer.startswith(','): - buffer = buffer[1:].lstrip() - elif buffer.startswith(']'): - buffer = buffer[1:].lstrip() - - if not buffer: break - try: - obj, idx = decoder.raw_decode(buffer) - items = obj if isinstance(obj, list) else [obj] - for item in items: - if not isinstance(item, dict): - continue - - if 'promptFeedback' in item and item['promptFeedback'].get('blockReason'): - yield f"\n⚠️ Gemini blocked prompt. Reason: {item['promptFeedback']['blockReason']}\n" - return - - candidates = item.get('candidates') - if not isinstance(candidates, list) or len(candidates) == 0: - continue - - first_candidate = candidates[0] - if not isinstance(first_candidate, dict): - continue - - if first_candidate.get('finishReason') == 'SAFETY': - yield "\n⚠️ Gemini stopped generation due to safety filters.\n" - return - - content = first_candidate.get('content') - if not isinstance(content, dict): - continue - - parts = content.get('parts') - if not isinstance(parts, list) or len(parts) == 0: - continue - - first_part = parts[0] - if isinstance(first_part, dict): - text = first_part.get('text') - if text and isinstance(text, str): - yield text - - buffer = buffer[idx:] - except json.JSONDecodeError: - break - except Exception as parse_err: - logger.debug(f"{PLUGIN_NAME}: Ignored malformed Gemini chunk. Error: {parse_err}") - break + return '', f"API error {res.status}. Check server logs." + obj = json.loads(res.read().decode('utf-8', errors='replace')) + if obj.get('promptFeedback', {}).get('blockReason'): + return '', f"Gemini blocked prompt: {obj['promptFeedback']['blockReason']}" + candidates = obj.get('candidates', []) + if not candidates: + return '', "No candidates in Gemini response." + first = candidates[0] + if first.get('finishReason') == 'SAFETY': + return '', "Gemini stopped generation due to safety filters." + parts = first.get('content', {}).get('parts', []) + text = ''.join(p.get('text', '') for p in parts if isinstance(p, dict)) + return text, None except Exception as e: - logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}", exc_info=True) - yield f"\n⚠️ Connection Error: {e}\n" + logger.error(f"{PLUGIN_NAME}: Gemini call error: {e}", exc_info=True) + return '', f"Connection Error: {e}" finally: if conn: conn.close() - def stream_openai_compatible(): - yield "​" + def call_openai_compatible(): conn = None try: conn, path = _get_streaming_connection(self.endpoint_url) payload_dict = { "model": effective_model, "messages": [{"role": "user", "content": prompt}], - "stream": True, + "stream": False, "max_tokens": self.max_tokens, "temperature": self.temperature } @@ -1268,7 +1151,6 @@ class SXNGPlugin(Plugin): payload = json.dumps(payload_dict) headers = { "Content-Type": "application/json", - "Accept": "text/event-stream", "HTTP-Referer": "https://github.com/searxng/searxng", "X-Title": "SearXNG" } @@ -1278,101 +1160,36 @@ class SXNGPlugin(Plugin): headers['Authorization'] = f"Bearer {self.api_key}" conn.request("POST", path, body=payload.encode('utf-8'), headers=headers) res = conn.getresponse() - if res.status != 200: body = res.read(2048).decode('utf-8', errors='replace')[:500] logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status}: {body}") - yield f"\n⚠️ API error {res.status}. Check server logs.\n" - return - - decoder = json.JSONDecoder() - in_reasoning_block = False - - while True: - line_bytes = res.readline() - if not line_bytes: break - - line = line_bytes.decode('utf-8', errors='replace').strip() - if not line: - continue - - if line.startswith("data: "): - data_str = line[6:].strip() - if data_str == "[DONE]": - if in_reasoning_block: - yield "\n\n\n" - return - try: - obj, _ = decoder.raw_decode(data_str) - if not isinstance(obj, dict): - continue - - # Catch upstream errors - if "error" in obj: - err_msg = obj["error"].get("message", str(obj["error"])) if isinstance(obj["error"], dict) else str(obj["error"]) - yield f"\n⚠️ API Error: {err_msg}\n" - return - - choices = obj.get("choices") - if not isinstance(choices, list) or len(choices) == 0: - continue - - choice = choices[0] - if not isinstance(choice, dict): - continue - - delta = choice.get("delta") - if not isinstance(delta, dict): - continue - - reasoning = delta.get("reasoning_content") - content = delta.get("content") - - if reasoning and isinstance(reasoning, str): - if not in_reasoning_block: - yield "\n" - in_reasoning_block = True - yield reasoning - - if content and isinstance(content, str): - if in_reasoning_block: - yield "\n\n\n" - in_reasoning_block = False - yield content - except json.JSONDecodeError: - pass - except Exception as parse_err: - logger.debug(f"{PLUGIN_NAME}: Ignored malformed OpenAI chunk. Error: {parse_err}") - pass - - if in_reasoning_block: - yield "\n\n\n" + return '', f"API error {res.status}. Check server logs." + obj = json.loads(res.read().decode('utf-8', errors='replace')) + if "error" in obj: + err = obj["error"] + msg = err.get("message", str(err)) if isinstance(err, dict) else str(err) + return '', f"API Error: {msg}" + choices = obj.get("choices", []) + if not choices: + return '', "No choices in API response." + message = choices[0].get("message", {}) + content = message.get("content") or "" + reasoning = message.get("reasoning_content") or "" + full = (f"\n{reasoning}\n\n\n" if reasoning else "") + content + return full, None except Exception as e: - logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}", exc_info=True) - yield f"\n⚠️ Connection Error: {e}\n" + logger.error(f"{PLUGIN_NAME}: {self.provider} call error: {e}", exc_info=True) + return '', f"Connection Error: {e}" finally: if conn: conn.close() - generator = stream_gemini if self.is_gemini else stream_openai_compatible + call_fn = call_gemini if self.is_gemini else call_openai_compatible + text, error = call_fn() if self.provider == 'ollama' and getattr(self, 'ollama_unload_after', False): + self._ollama_unload_model() - gen_fn = generator - - def generator(): - - try: - - yield from gen_fn() - - finally: - - self._ollama_unload_model() - return Response(stream_with_context(generator()), mimetype='text/event-stream', headers={ - 'X-Accel-Buffering': 'no', - 'Cache-Control': 'no-cache, no-store', - 'Connection': 'keep-alive' - }) + return jsonify({"text": text, "error": error}) return True def _assemble_context(self, clean_results, infoboxes, answers, offset=0) -> tuple[str, list]: