More ollama fixes
CI Test Guard / validate-code (push) Has been cancelled

This commit is contained in:
Tyler
2026-05-14 21:55:19 -04:00
parent 323e7593ef
commit dab3f293a1
+102 -285
View File
@@ -1,11 +1,11 @@
import json, os, logging, base64, time, hashlib, codecs, re, http.client, ssl import json, os, logging, base64, time, hashlib, re, http.client, ssl
from urllib.parse import urlparse from urllib.parse import urlparse
from searx import network from searx import network
try: try:
from searx.network import get_network from searx.network import get_network
except ImportError: except ImportError:
get_network = None get_network = None
from flask import Response, request, abort, jsonify, stream_with_context from flask import Response, request, abort, jsonify
from searx.plugins import Plugin, PluginInfo from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults from searx.result_types import EngineResults
from searx import settings from searx import settings
@@ -537,8 +537,46 @@ FRONTEND_JS_TEMPLATE = r"""
return; return;
} }
const reader = res.body.getReader(); const respJson = await res.json();
const decoder = new TextDecoder();
if (respJson.error) {
const cursorErr = data.querySelector('.sxng-cursor');
if (cursorErr) cursorErr.remove();
const errSpan = document.createElement('span');
errSpan.style.color = '#bf616a';
errSpan.textContent = "⚠️ " + respJson.error;
data.appendChild(errSpan);
return;
}
const fullText = (respJson.text || '').trim();
if (!fullText) {
const cursorErr = data.querySelector('.sxng-cursor');
if (cursorErr) cursorErr.remove();
const errSpan = document.createElement('span');
errSpan.style.color = '#bf616a';
errSpan.textContent = 'No response received. Check API configuration and server logs.';
data.appendChild(errSpan);
return;
}
let mainText = fullText;
const thinkMatch = mainText.match(/^<think>([\s\S]*?)<\/think>\s*/);
if (thinkMatch) {
const cursorTh = data.querySelector('.sxng-cursor');
const details = document.createElement('details');
details.className = 'sxng-reasoning';
details.innerHTML = '<summary>Thought Process</summary>';
const thoughtDiv = document.createElement('div');
thoughtDiv.className = 'sxng-thought-content';
thoughtDiv.textContent = thinkMatch[1];
details.appendChild(thoughtDiv);
if (cursorTh) cursorTh.before(details);
else data.appendChild(details);
mainText = mainText.substring(thinkMatch[0].length);
}
let cursor = data.querySelector('.sxng-cursor'); let cursor = data.querySelector('.sxng-cursor');
if (!cursor) { if (!cursor) {
cursor = document.createElement('span'); cursor = document.createElement('span');
@@ -546,10 +584,6 @@ FRONTEND_JS_TEMPLATE = r"""
data.appendChild(cursor); data.appendChild(cursor);
} }
let started = false;
let collectedResponse = '';
let isThinking = false, thoughtDiv = null;
let buffer = ''; let buffer = '';
const flushBuffer = (force = false) => { const flushBuffer = (force = false) => {
if (!buffer) return; if (!buffer) return;
@@ -611,105 +645,23 @@ FRONTEND_JS_TEMPLATE = r"""
} }
}; };
let streamBuffer = ''; let twPos = 0;
while (true) { const twBatch = 4;
const {done, value} = await reader.read(); await new Promise(resolve => {
if (done) break; function twTick() {
if (twPos >= mainText.length) {
clearTimeout(timeoutId); flushBuffer(true);
timeoutId = setTimeout(() => controller.abort(), 60000); resolve();
return;
const chunk = decoder.decode(value, {stream: true});
if (!chunk) continue;
streamBuffer += chunk;
if (streamBuffer.match(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/)) {
continue;
}
while (true) {
const openIdx = streamBuffer.indexOf('<think>');
const closeIdx = streamBuffer.indexOf('</think>');
if (openIdx === -1 && closeIdx === -1) break;
if (!isThinking) {
if (openIdx !== -1 && (closeIdx === -1 || openIdx < closeIdx)) {
const preTag = streamBuffer.substring(0, openIdx);
if (preTag) {
if (!started) {
const trimmed = preTag.replace(/^[\s.,;:!?]+/, '');
if (trimmed || collectedResponse.trim()) {
if (cursor && !cursor.isConnected) data.appendChild(cursor);
started = true;
}
}
if (started) {
buffer += preTag;
flushBuffer(false);
}
collectedResponse += preTag;
}
isThinking = true;
const details = document.createElement('details');
details.className = 'sxng-reasoning';
details.innerHTML = '<summary>Thought Process</summary>';
thoughtDiv = document.createElement('div');
thoughtDiv.className = 'sxng-thought-content';
details.appendChild(thoughtDiv);
(cursor ? cursor.before(details) : data.appendChild(details));
streamBuffer = streamBuffer.substring(openIdx + 7);
} else {
streamBuffer = streamBuffer.replace('</think>', '');
}
} else {
if (closeIdx !== -1 && (openIdx === -1 || closeIdx < openIdx)) {
const thoughtText = streamBuffer.substring(0, closeIdx);
if (thoughtDiv) thoughtDiv.textContent += thoughtText;
isThinking = false;
streamBuffer = streamBuffer.substring(closeIdx + 8);
} else {
streamBuffer = streamBuffer.replace('<think>', '');
}
} }
const end = Math.min(twPos + twBatch, mainText.length);
buffer += mainText.substring(twPos, end);
twPos = end;
flushBuffer(false);
setTimeout(twTick, 8);
} }
twTick();
if (streamBuffer.length > 0) { });
if (isThinking && thoughtDiv) {
thoughtDiv.textContent += streamBuffer;
} else {
if (!started) {
const trimmed = streamBuffer.replace(/^[\s.,;:!?]+/, '');
if (trimmed || collectedResponse.trim()) {
if (cursor && !cursor.isConnected) data.appendChild(cursor);
started = true;
}
}
if (started) {
buffer += streamBuffer;
flushBuffer(false);
}
collectedResponse += streamBuffer;
}
streamBuffer = '';
}
}
if (streamBuffer.length > 0) {
streamBuffer = streamBuffer.replace(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/, '');
if (streamBuffer.length > 0) {
if (isThinking && thoughtDiv) {
thoughtDiv.textContent += streamBuffer;
} else {
buffer += streamBuffer;
collectedResponse += streamBuffer;
}
}
}
flushBuffer(true);
if (cursor) cursor.remove(); if (cursor) cursor.remove();
@@ -725,21 +677,7 @@ FRONTEND_JS_TEMPLATE = r"""
} }
} }
if (!started && !collectedResponse.trim()) { const collectedResponse = mainText;
const cursor = data.querySelector('.sxng-cursor');
if (cursor) cursor.remove();
const errSpan = document.createElement('span');
if (thoughtDiv && thoughtDiv.textContent.trim().length > 0) {
errSpan.style.color = '#ebcb8b';
errSpan.textContent = 'Model provided reasoning but stopped before the final answer. Try adjusting token limits.';
} else {
errSpan.style.color = '#bf616a';
errSpan.textContent = 'No response received. Check API configuration and server logs.';
}
data.appendChild(errSpan);
return;
}
__INTERACTIVE_JS_COMPLETE__ __INTERACTIVE_JS_COMPLETE__
@@ -1163,103 +1101,48 @@ class SXNGPlugin(Plugin):
{numbered_instructions} {numbered_instructions}
</CORE_DIRECTIVES>""" </CORE_DIRECTIVES>"""
def stream_gemini(): def call_gemini():
yield "" base = self.endpoint_url.replace('streamGenerateContent', 'generateContent')
if '?' in self.endpoint_url: url = f"{base}&key={self.api_key}" if '?' in base else f"{base}?key={self.api_key}"
url = f"{self.endpoint_url}&key={self.api_key}"
else:
url = f"{self.endpoint_url}?key={self.api_key}"
conn = None conn = None
try: try:
conn, path = _get_streaming_connection(url) conn, path = _get_streaming_connection(url)
payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}}) payload = json.dumps({
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}
})
conn.request("POST", path, body=payload.encode('utf-8'), headers={"Content-Type": "application/json"}) conn.request("POST", path, body=payload.encode('utf-8'), headers={"Content-Type": "application/json"})
res = conn.getresponse() res = conn.getresponse()
if res.status != 200: if res.status != 200:
body = res.read(2048).decode('utf-8', errors='replace')[:500] body = res.read(2048).decode('utf-8', errors='replace')[:500]
logger.error(f"{PLUGIN_NAME}: Gemini API {res.status}: {body}") logger.error(f"{PLUGIN_NAME}: Gemini API {res.status}: {body}")
yield f"\n⚠️ API error {res.status}. Check server logs.\n" return '', f"API error {res.status}. Check server logs."
return obj = json.loads(res.read().decode('utf-8', errors='replace'))
if obj.get('promptFeedback', {}).get('blockReason'):
decoder = json.JSONDecoder() return '', f"Gemini blocked prompt: {obj['promptFeedback']['blockReason']}"
utf8_decoder = codecs.getincrementaldecoder('utf-8')(errors='replace') candidates = obj.get('candidates', [])
buffer = "" if not candidates:
while True: return '', "No candidates in Gemini response."
chunk = res.read(STREAM_CHUNK_SIZE) first = candidates[0]
if not chunk: if first.get('finishReason') == 'SAFETY':
buffer += utf8_decoder.decode(b'', final=True) return '', "Gemini stopped generation due to safety filters."
break parts = first.get('content', {}).get('parts', [])
buffer += utf8_decoder.decode(chunk) text = ''.join(p.get('text', '') for p in parts if isinstance(p, dict))
while buffer: return text, None
buffer = buffer.lstrip()
if buffer.startswith('['):
buffer = buffer[1:].lstrip()
elif buffer.startswith(','):
buffer = buffer[1:].lstrip()
elif buffer.startswith(']'):
buffer = buffer[1:].lstrip()
if not buffer: break
try:
obj, idx = decoder.raw_decode(buffer)
items = obj if isinstance(obj, list) else [obj]
for item in items:
if not isinstance(item, dict):
continue
if 'promptFeedback' in item and item['promptFeedback'].get('blockReason'):
yield f"\n⚠️ Gemini blocked prompt. Reason: {item['promptFeedback']['blockReason']}\n"
return
candidates = item.get('candidates')
if not isinstance(candidates, list) or len(candidates) == 0:
continue
first_candidate = candidates[0]
if not isinstance(first_candidate, dict):
continue
if first_candidate.get('finishReason') == 'SAFETY':
yield "\n⚠️ Gemini stopped generation due to safety filters.\n"
return
content = first_candidate.get('content')
if not isinstance(content, dict):
continue
parts = content.get('parts')
if not isinstance(parts, list) or len(parts) == 0:
continue
first_part = parts[0]
if isinstance(first_part, dict):
text = first_part.get('text')
if text and isinstance(text, str):
yield text
buffer = buffer[idx:]
except json.JSONDecodeError:
break
except Exception as parse_err:
logger.debug(f"{PLUGIN_NAME}: Ignored malformed Gemini chunk. Error: {parse_err}")
break
except Exception as e: except Exception as e:
logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}", exc_info=True) logger.error(f"{PLUGIN_NAME}: Gemini call error: {e}", exc_info=True)
yield f"\n⚠️ Connection Error: {e}\n" return '', f"Connection Error: {e}"
finally: finally:
if conn: conn.close() if conn: conn.close()
def stream_openai_compatible(): def call_openai_compatible():
yield ""
conn = None conn = None
try: try:
conn, path = _get_streaming_connection(self.endpoint_url) conn, path = _get_streaming_connection(self.endpoint_url)
payload_dict = { payload_dict = {
"model": effective_model, "model": effective_model,
"messages": [{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
"stream": True, "stream": False,
"max_tokens": self.max_tokens, "max_tokens": self.max_tokens,
"temperature": self.temperature "temperature": self.temperature
} }
@@ -1268,7 +1151,6 @@ class SXNGPlugin(Plugin):
payload = json.dumps(payload_dict) payload = json.dumps(payload_dict)
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Accept": "text/event-stream",
"HTTP-Referer": "https://github.com/searxng/searxng", "HTTP-Referer": "https://github.com/searxng/searxng",
"X-Title": "SearXNG" "X-Title": "SearXNG"
} }
@@ -1278,101 +1160,36 @@ class SXNGPlugin(Plugin):
headers['Authorization'] = f"Bearer {self.api_key}" headers['Authorization'] = f"Bearer {self.api_key}"
conn.request("POST", path, body=payload.encode('utf-8'), headers=headers) conn.request("POST", path, body=payload.encode('utf-8'), headers=headers)
res = conn.getresponse() res = conn.getresponse()
if res.status != 200: if res.status != 200:
body = res.read(2048).decode('utf-8', errors='replace')[:500] body = res.read(2048).decode('utf-8', errors='replace')[:500]
logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status}: {body}") logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status}: {body}")
yield f"\n⚠️ API error {res.status}. Check server logs.\n" return '', f"API error {res.status}. Check server logs."
return obj = json.loads(res.read().decode('utf-8', errors='replace'))
if "error" in obj:
decoder = json.JSONDecoder() err = obj["error"]
in_reasoning_block = False msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
return '', f"API Error: {msg}"
while True: choices = obj.get("choices", [])
line_bytes = res.readline() if not choices:
if not line_bytes: break return '', "No choices in API response."
message = choices[0].get("message", {})
line = line_bytes.decode('utf-8', errors='replace').strip() content = message.get("content") or ""
if not line: reasoning = message.get("reasoning_content") or ""
continue full = (f"<think>\n{reasoning}\n</think>\n\n" if reasoning else "") + content
return full, None
if line.startswith("data: "):
data_str = line[6:].strip()
if data_str == "[DONE]":
if in_reasoning_block:
yield "\n</think>\n\n"
return
try:
obj, _ = decoder.raw_decode(data_str)
if not isinstance(obj, dict):
continue
# Catch upstream errors
if "error" in obj:
err_msg = obj["error"].get("message", str(obj["error"])) if isinstance(obj["error"], dict) else str(obj["error"])
yield f"\n⚠️ API Error: {err_msg}\n"
return
choices = obj.get("choices")
if not isinstance(choices, list) or len(choices) == 0:
continue
choice = choices[0]
if not isinstance(choice, dict):
continue
delta = choice.get("delta")
if not isinstance(delta, dict):
continue
reasoning = delta.get("reasoning_content")
content = delta.get("content")
if reasoning and isinstance(reasoning, str):
if not in_reasoning_block:
yield "<think>\n"
in_reasoning_block = True
yield reasoning
if content and isinstance(content, str):
if in_reasoning_block:
yield "\n</think>\n\n"
in_reasoning_block = False
yield content
except json.JSONDecodeError:
pass
except Exception as parse_err:
logger.debug(f"{PLUGIN_NAME}: Ignored malformed OpenAI chunk. Error: {parse_err}")
pass
if in_reasoning_block:
yield "\n</think>\n\n"
except Exception as e: except Exception as e:
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}", exc_info=True) logger.error(f"{PLUGIN_NAME}: {self.provider} call error: {e}", exc_info=True)
yield f"\n⚠️ Connection Error: {e}\n" return '', f"Connection Error: {e}"
finally: finally:
if conn: conn.close() if conn: conn.close()
generator = stream_gemini if self.is_gemini else stream_openai_compatible call_fn = call_gemini if self.is_gemini else call_openai_compatible
text, error = call_fn()
if self.provider == 'ollama' and getattr(self, 'ollama_unload_after', False): if self.provider == 'ollama' and getattr(self, 'ollama_unload_after', False):
self._ollama_unload_model()
gen_fn = generator return jsonify({"text": text, "error": error})
def generator():
try:
yield from gen_fn()
finally:
self._ollama_unload_model()
return Response(stream_with_context(generator()), mimetype='text/event-stream', headers={
'X-Accel-Buffering': 'no',
'Cache-Control': 'no-cache, no-store',
'Connection': 'keep-alive'
})
return True return True
def _assemble_context(self, clean_results, infoboxes, answers, offset=0) -> tuple[str, list]: def _assemble_context(self, clean_results, infoboxes, answers, offset=0) -> tuple[str, list]: