From d0c29f4b34be13c8ee073bfa2921b1ad4036902e Mon Sep 17 00:00:00 2001 From: cra88y/pc Date: Sat, 2 May 2026 21:01:37 -0500 Subject: [PATCH] variety of changes: custom system prompt, some bugs, dev qol --- .github/workflows/validate.yml | 75 ++++ README.md | 4 +- ai_answers.py | 754 ++++++++++++++++++++------------- tests/test.py | 383 ----------------- 4 files changed, 543 insertions(+), 673 deletions(-) create mode 100644 .github/workflows/validate.yml delete mode 100644 tests/test.py diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml new file mode 100644 index 0000000..5c298d6 --- /dev/null +++ b/.github/workflows/validate.yml @@ -0,0 +1,75 @@ +name: CI Test Guard + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + validate-code: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Set up Node.js (For JS Validation) + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Python Linters + run: | + python -m pip install --upgrade pip + pip install flake8 + + - name: Python Syntax Check + run: python -m py_compile ai_answers.py + + - name: Python Undefined Variable Check + run: flake8 ai_answers.py --select=E9,F63,F7,F82 --show-source + + - name: JavaScript Extraction & Syntax Check + run: | + python -c ' + import re, sys + with open("ai_answers.py", "r", encoding="utf-8") as f: + content = f.read() + + match = re.search(r"FRONTEND_JS_TEMPLATE\s*=\s*r\"\"\"(.*?)\"\"\"", content, re.DOTALL) + if not match: + print("Could not find FRONTEND_JS_TEMPLATE") + sys.exit(1) + + js_code = match.group(1) + + replacements = { + "__IS_INTERACTIVE__": "true", + "__JS_Q__": "\"dummy_query\"", + "__JS_LANG__": "\"en\"", + "__JS_URLS__": "[]", + "__B64_CONTEXT__": "\"YmFzZTY0\"", + "__TK__": "\"dummy_token\"", + "__SCRIPT_ROOT__": "\"/searxng\"", + "__CITATION_HELPER_JS__": "/* citation helper */", + "__INTERACTIVE_JS_INIT__": "/* init */", + "__STREAM_FN_SIG__": "async function startStream(overrideQ = null, prevAnswer = null, auxContext = null)", + "__STREAM_Q__": "\"dummy_q\"", + "__STREAM_BODY__": "", + "__INTERACTIVE_JS_COMPLETE__": "/* complete */" + } + + for key, val in replacements.items(): + js_code = js_code.replace(key, val) + + with open("frontend_test.js", "w", encoding="utf-8") as f: + f.write(js_code) + ' + + node --check frontend_test.js \ No newline at end of file diff --git a/README.md b/README.md index 9ce9d76..4c6e075 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Configure via the environment variables: - `LLM_MODEL`: Model identifier. Defaults vary. Recommended: 10-30B dense or 5-15B MoE activated. - `LLM_URL`: Overrides endpoint URL for any provider preset. +- `LLM_SYSTEM_PROMPT`: Overrides some of the system prompt. Default `You are a direct, citation-accurate search synthesis engine.`. - `LLM_MAX_TOKENS`: Default `500`. - `LLM_TEMPERATURE`: Default `0.2`. - `LLM_CONTEXT_DEEP_COUNT`: results as context with full snippets. Default `5`. @@ -105,6 +106,5 @@ LLM_MODEL=meta-llama/Meta-Llama-3-8B-Instruct ```bash pip install flask flask-babel -python tests/demo.py # Interactive demo at localhost:5000 -python tests/test.py # One-shot test suite +python tests/demo.py # UI demo at localhost:5000 ``` diff --git a/ai_answers.py b/ai_answers.py index 472d813..12819af 100644 --- a/ai_answers.py +++ b/ai_answers.py @@ -8,13 +8,14 @@ except ImportError: from flask import Response, request, abort, jsonify from searx.plugins import Plugin, PluginInfo from searx.result_types import EngineResults +from searx import settings from flask_babel import gettext from markupsafe import Markup logger = logging.getLogger(__name__) TOKEN_EXPIRY_SEC = 3600 -STREAM_CHUNK_SIZE = 128 +STREAM_CHUNK_SIZE = 4096 # Increased from 128 for I/O efficiency STREAM_TIMEOUT_SEC = 60 def _get_streaming_connection(url: str): @@ -55,7 +56,7 @@ PROVIDER_PRESETS = { 'huggingface': {'url': 'https://api-inference.huggingface.co/models/{model}/v1/chat/completions', 'model': 'meta-llama/Meta-Llama-3-8B-Instruct'} } -# UI assets (inlined for single-file install) +# UI assets INTERACTIVE_CSS = ''' @keyframes sxng-fade-in-up { @@ -163,6 +164,15 @@ INTERACTIVE_CSS = ''' } .sxng-input-submit svg { width: 18px; height: 18px; fill: currentColor; } .sxng-input-submit svg { width: 18px; height: 18px; fill: currentColor; } + .sxng-reasoning { + margin: 0.5rem 0; padding: 0.5rem; + border-left: 2px solid var(--color-result-link, #5e81ac); + background: var(--color-base-background-hover, rgba(0,0,0,0.03)); + font-size: 0.85rem; opacity: 0.7; transition: opacity 0.2s; + } + .sxng-reasoning:hover { opacity: 1; } + .sxng-reasoning summary { cursor: pointer; font-weight: bold; color: var(--color-result-link, #5e81ac); } + .sxng-thought-content { margin-top: 0.5rem; white-space: pre-wrap; font-family: monospace; } ''' INTERACTIVE_HTML = ''' @@ -240,9 +250,9 @@ CITATION_HELPER_JS = r''' INTERACTIVE_JS = r''' const footer = document.getElementById('sxng-footer'); const input = document.getElementById('sxng-action-input'); - // Inherited from outer scope: box, data, conversation + // Closure inheritance: box, data, conversation references injected from outer scope. - // Theme detection: inherit host accent color + // Dynamic theme propagation: extract and bind host CSS variables for UI cohesion. if (window.getComputedStyle && box) { try { const docStyles = getComputedStyle(document.documentElement); @@ -260,7 +270,7 @@ INTERACTIVE_JS = r''' } catch(e) {} } - // Persist conversation state to URL hash (stateless) + // Stateless persistence: encode conversation matrix as base64 URL fragment. const updateState = () => { try { const state = { @@ -292,7 +302,7 @@ INTERACTIVE_JS = r''' ts: 0 })); - // Helper function to inject citations into text + // Citation rendering proxy const injectCitations = (text) => { return renderCitations(text, urls); }; @@ -310,7 +320,7 @@ INTERACTIVE_JS = r''' data.appendChild(clr); } } else { - // Inject citations for assistant responses + // Execute citation routing for synthesized payload data.appendChild(injectCitations(turn.content)); } }); @@ -372,10 +382,10 @@ INTERACTIVE_JS = r''' const synthesized = synthesizeQuery(q_init, val); let auxContext = null; try { - const auxData = await fetch('/ai-auxiliary-search', { + const auxData = await fetch(script_root + '/ai-auxiliary-search', { method: 'POST', headers: {'Content-Type': 'application/json'}, - body: JSON.stringify({query: synthesized, lang: lang_init, offset: urls.length}) + body: JSON.stringify({query: synthesized, lang: lang_init, offset: urls.length, tk: tk_init}) }).then(r => r.json()); if (auxData.context) { const originalBackground = conversation.originalContext.substring(0, 1500); @@ -416,6 +426,327 @@ INTERACTIVE_JS = r''' }; ''' +FRONTEND_JS_TEMPLATE = r""" +(async () => { + const is_interactive = __IS_INTERACTIVE__; + const q_init = __JS_Q__; + const lang_init = __JS_LANG__; + let urls = __JS_URLS__; + const b64_init = __B64_CONTEXT__; + const tk_init = __TK__; + const script_root = __SCRIPT_ROOT__; + const conversation = { + originalQuery: q_init, + originalContext: new TextDecoder().decode(Uint8Array.from(atob(b64_init), c => c.charCodeAt(0))), + originalSources: [...urls], + turns: [{role: 'user', content: q_init, ts: Date.now()}] + }; + const box = document.getElementById('sxng-stream-box'); + const data = document.getElementById('sxng-stream-data'); + const wrapper = box.closest('.answer'); + if (wrapper) wrapper.style.display = 'none'; + let restored = false; + let isStreaming = false; + + __CITATION_HELPER_JS__ + + __INTERACTIVE_JS_INIT__ + + function synthesizeQuery(original, followup) { + // Strip deterministic NLP prefixes to isolate primary entities + const cleanOrig = original.replace(/^(what|how|why|when|where|who|which|is|are|can|does|do)(\s+(is|are|do|does|can|to|a|an|the))?\s+/i, ''); + const origWords = cleanOrig.split(' ').slice(0, 12); + return `${origWords.join(' ')} ${followup}`.trim(); + } + + __STREAM_FN_SIG__ { + if (isStreaming) { + console.warn('[AI Answers] Stream already in progress, ignoring duplicate call'); + return; + } + + isStreaming = true; + try { + const ctx = auxContext || conversation.originalContext; + if (wrapper) wrapper.style.display = ''; + box.style.display = 'block'; + + const controller = new AbortController(); + let timeoutId = setTimeout(() => controller.abort(), 60000); + const finalQ = __STREAM_Q__; + + const bodyObj = { q: finalQ, lang: lang_init, context: ctx, tk: tk_init__STREAM_BODY__ }; + const res = await fetch(script_root + '/ai-stream', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(bodyObj), + signal: controller.signal + }); + + clearTimeout(timeoutId); + if (!res.ok) { + const errSpan = document.createElement('span'); + errSpan.style.color = '#bf616a'; + errSpan.textContent = "Error: " + res.statusText; + data.appendChild(errSpan); + return; + } + + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let cursor = data.querySelector('.sxng-cursor'); + if (!cursor) { + cursor = document.createElement('span'); + cursor.className = 'sxng-cursor'; + data.appendChild(cursor); + } + + let started = false; + let pendingSpace = ''; + let lastScrollKick = 0; + let collectedResponse = ''; + let isThinking = false, thoughtDiv = null; + + let buffer = ''; + const flushBuffer = (force = false) => { + if (!buffer) return; + + if (force) { + const fragment = renderCitations(buffer, urls); + if (cursor) cursor.before(fragment); + else data.appendChild(fragment); + buffer = ''; + return; + } + + while (true) { + const match = buffer.match(/(\[\d+(?:,\s*\d+)*\])/); + + if (!match) break; + + const preText = buffer.substring(0, match.index); + if (preText) { + const s = document.createElement('span'); + s.className = 'sxng-chunk'; + s.textContent = preText; + cursor.before(s); + } + + const citationText = match[0]; + const fragment = renderCitations(citationText, urls); + cursor.before(fragment); + + buffer = buffer.substring(match.index + match[0].length); + } + + const openIdx = buffer.lastIndexOf('['); + if (openIdx === -1) { + if (buffer) { + const s = document.createElement('span'); + s.className = 'sxng-chunk'; + s.textContent = buffer; + cursor.before(s); + buffer = ''; + } + } else { + const safeChunk = buffer.substring(0, openIdx); + if (safeChunk) { + const s = document.createElement('span'); + s.className = 'sxng-chunk'; + s.textContent = safeChunk; + cursor.before(s); + } + buffer = buffer.substring(openIdx); + + if (buffer.length > 50) { + const s = document.createElement('span'); + s.className = 'sxng-chunk'; + s.textContent = buffer[0]; + cursor.before(s); + buffer = buffer.substring(1); + } + } + }; + + let streamBuffer = ''; + while (true) { + const {done, value} = await reader.read(); + if (done) break; + + clearTimeout(timeoutId); + timeoutId = setTimeout(() => controller.abort(), 60000); + + const chunk = decoder.decode(value, {stream: true}); + if (!chunk) continue; + + streamBuffer += chunk; + + // Truncation suspension: prevent evaluation of fragmented SGML tags at chunk boundaries. + if (streamBuffer.match(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/)) { + continue; + } + + // Deterministic tag extraction: mitigate infinite recursion on malformed stream boundaries. + while (true) { + const openIdx = streamBuffer.indexOf(''); + const closeIdx = streamBuffer.indexOf(''); + + if (openIdx === -1 && closeIdx === -1) break; + + if (!isThinking) { + if (openIdx !== -1 && (closeIdx === -1 || openIdx < closeIdx)) { + const preTag = streamBuffer.substring(0, openIdx); + if (preTag) { + if (!started) { + const trimmed = preTag.replace(/^[\s.,;:!?]+/, ''); + if (trimmed || collectedResponse.trim()) { + if (cursor && !cursor.isConnected) data.appendChild(cursor); + started = true; + } + } + if (started) { + buffer += preTag; + flushBuffer(false); + } + collectedResponse += preTag; + } + isThinking = true; + const details = document.createElement('details'); + details.className = 'sxng-reasoning'; + details.innerHTML = 'Thought Process'; + thoughtDiv = document.createElement('div'); + thoughtDiv.className = 'sxng-thought-content'; + details.appendChild(thoughtDiv); + (cursor ? cursor.before(details) : data.appendChild(details)); + + streamBuffer = streamBuffer.substring(openIdx + 7); + } else { + // Recover from hallucinated tag boundaries without blocking execution. + streamBuffer = streamBuffer.replace('', ''); + } + } else { + if (closeIdx !== -1 && (openIdx === -1 || closeIdx < openIdx)) { + const thoughtText = streamBuffer.substring(0, closeIdx); + if (thoughtDiv) thoughtDiv.textContent += thoughtText; + isThinking = false; + streamBuffer = streamBuffer.substring(closeIdx + 8); + } else { + // Drop anomalous nested tag states. + streamBuffer = streamBuffer.replace('', ''); + } + } + } + + // Evaluate remainder of validated buffer + if (streamBuffer.length > 0) { + if (isThinking && thoughtDiv) { + thoughtDiv.textContent += streamBuffer; + } else { + if (!started) { + const trimmed = streamBuffer.replace(/^[\s.,;:!?]+/, ''); + if (trimmed || collectedResponse.trim()) { + if (cursor && !cursor.isConnected) data.appendChild(cursor); + started = true; + } + } + if (started) { + buffer += streamBuffer; + flushBuffer(false); + } + // Guarantee absolute isolation between reasoning output and presentation payload. + collectedResponse += streamBuffer; + } + streamBuffer = ''; // Flush consumed buffer chunk + } + + const now = Date.now(); + if (now - lastScrollKick > 500) { + lastScrollKick = now; + void window.getComputedStyle(data).opacity; + } + } + + // Reconcile and flush suspended artifacts trailing an abruptly terminated stream. + if (streamBuffer.length > 0) { + // Strip invalid partial SGML fragments. + streamBuffer = streamBuffer.replace(/<\/?(?:t(?:h(?:i(?:n(?:k)?)?)?)?)?$/, ''); + if (streamBuffer.length > 0) { + if (isThinking && thoughtDiv) { + thoughtDiv.textContent += streamBuffer; + } else { + buffer += streamBuffer; + collectedResponse += streamBuffer; + } + } + } + + // Finalize remaining character outputs. + flushBuffer(true); + + if (cursor) cursor.remove(); + + // Dom-tree cleanup: trim residual whitespace nodes. + let last = data.lastChild; + while (last) { + if (last.textContent && last.textContent.trim().length === 0) { + const prev = last.previousSibling; + last.remove(); + last = prev; + } else { + if (last.textContent) last.textContent = last.textContent.trimEnd(); + break; + } + } + + if (!started && !collectedResponse.trim()) { + const cursor = data.querySelector('.sxng-cursor'); + if (cursor) cursor.remove(); + const errSpan = document.createElement('span'); + errSpan.style.color = '#bf616a'; + errSpan.textContent = 'No response received. Check API configuration and server logs.'; + data.appendChild(errSpan); + return; + } + + __INTERACTIVE_JS_COMPLETE__ + + if (collectedResponse) { + conversation.turns.push({role: 'assistant', content: collectedResponse.trim(), ts: Date.now()}); + } + + } catch (e) { + console.error('[AI Answers] Fatal stream exception:', e); + const errSpan = document.createElement('span'); + errSpan.style.cssText = 'color: #bf616a; font-weight: bold; display: block; margin-top: 0.5rem;'; + + if (e.name === 'AbortError') { + errSpan.textContent = "⚠️ Connection to AI provider timed out."; + } else { + errSpan.textContent = "⚠️ AI Widget encountered a fatal error. Check browser console."; + } + + if (data) { + const cursor = data.querySelector('.sxng-cursor'); + if (cursor) cursor.remove(); + data.appendChild(errSpan); + } + } finally { + // Deallocate stream lock state unconditionally. + isStreaming = false; + } + } + + // Initialize background connection warmup execution. + fetch(script_root + '/ai-stream', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({warmup: true}), + keepalive: true + }).catch(() => {}); + + if (!restored) startStream(); +})(); +""" import typing if typing.TYPE_CHECKING: @@ -439,10 +770,6 @@ class SXNGPlugin(Plugin): def _ollama_unload_model(self) -> None: - """ - Force-unload an Ollama model after a response by calling the native /api/chat endpoint: - {"model": "...", "messages": [], "keep_alive": 0} - """ try: if self.provider != 'ollama': return @@ -455,18 +782,18 @@ class SXNGPlugin(Plugin): conn = None try: conn, path = _get_streaming_connection(unload_url) + conn.timeout = 2.0 payload = json.dumps({ "model": self.model, "messages": [], "keep_alive": 0 }) headers = {"Content-Type": "application/json"} - # Optional: if Ollama is behind auth, reuse LLM_KEY if self.api_key and self.api_key not in ('none', 'ollama'): headers["Authorization"] = f"Bearer {self.api_key}" conn.request("POST", path, body=payload, headers=headers) res = conn.getresponse() - res.read() # drain + res.read() if res.status >= 400: logger.warning(f"{PLUGIN_NAME}: Ollama unload failed: {res.status} {res.reason}") finally: @@ -496,7 +823,7 @@ class SXNGPlugin(Plugin): elif 'huggingface.co' in url_lower: raw_provider = 'huggingface' else: - # Unknown URL fallback to OpenAI-compatible + # fallback to OpenAI-compatible raw_provider = 'openai' logger.info(f"{PLUGIN_NAME}: Using OpenAI-compatible mode for custom URL") @@ -521,20 +848,24 @@ class SXNGPlugin(Plugin): self.model = os.getenv('LLM_MODEL', preset['model']).strip() try: - self.max_tokens = int(os.getenv('LLM_MAX_TOKENS', 500)) + self.max_tokens = max(1, int(os.getenv('LLM_MAX_TOKENS', 500))) except ValueError: + logger.warning(f"{PLUGIN_NAME}: Invalid LLM_MAX_TOKENS value. Enforcing default (500).") self.max_tokens = 500 try: self.temperature = float(os.getenv('LLM_TEMPERATURE', 0.2)) except ValueError: + logger.warning(f"{PLUGIN_NAME}: Invalid LLM_TEMPERATURE value. Enforcing default (0.2).") self.temperature = 0.2 try: self.context_deep_count = max(0, int(os.getenv('LLM_CONTEXT_DEEP_COUNT', 5))) except ValueError: + logger.warning(f"{PLUGIN_NAME}: Invalid LLM_CONTEXT_DEEP_COUNT value. Enforcing default (5).") self.context_deep_count = 5 try: self.context_shallow_count = max(0, int(os.getenv('LLM_CONTEXT_SHALLOW_COUNT', 15))) except ValueError: + logger.warning(f"{PLUGIN_NAME}: Invalid LLM_CONTEXT_SHALLOW_COUNT value. Enforcing default (15).") self.context_shallow_count = 15 self.allowed_tabs = set(t.strip() for t in os.getenv('LLM_TABS', DEFAULT_TABS).split(',')) @@ -548,9 +879,6 @@ class SXNGPlugin(Plugin): raw_url = f"https://{raw_url}" self.endpoint_url = raw_url - - # Ollama: optional "unload after response" (frees VRAM between queries). - # Enable with: LLM_OLLAMA_UNLOAD_AFTER=true self.ollama_unload_after = os.getenv('LLM_OLLAMA_UNLOAD_AFTER', 'false').lower().strip() in ('true', '1', 'yes', 'on') self.ollama_unload_url = '' if self.provider == 'ollama' and self.ollama_unload_after: @@ -563,10 +891,10 @@ class SXNGPlugin(Plugin): self.ollama_unload_url = f"{scheme}://{netloc}/api/chat" except Exception: self.ollama_unload_url = "http://localhost:11434/api/chat" - if self.api_key: - self.secret = os.getenv('SXNG_LLM_SECRET') or hashlib.sha256(self.api_key.encode()).hexdigest() - else: - self.secret = os.getenv('SXNG_LLM_SECRET', '') + server_secret = settings.get('server', {}).get('secret_key', '') + self.secret = hashlib.sha256(f"ai_answers_{server_secret}".encode()).hexdigest() + + self.system_prompt = os.getenv('LLM_SYSTEM_PROMPT', '').strip() def _parse_aux_results(self, raw_results, raw_infoboxes, raw_answers): results = [] @@ -598,13 +926,15 @@ class SXNGPlugin(Plugin): 'attributes': ib.get('attributes', []) }) - # Only extract simple Answer types (skip Translations, WeatherAnswer etc.) answers = [] for a in list(raw_answers)[:2]: + ans_text = "" if hasattr(a, 'answer') and isinstance(getattr(a, 'answer', None), str): - answers.append(a.answer) + ans_text = a.answer elif isinstance(a, dict) and a.get('answer'): - answers.append(str(a['answer'])) + ans_text = str(a['answer']) + if ans_text and 'id="sxng-stream-box"' not in ans_text and not ans_text.strip().startswith('<'): + answers.append(ans_text) return results, infoboxes, answers @@ -620,6 +950,16 @@ class SXNGPlugin(Plugin): abort(403) data = request.json or {} + token = data.get('tk', '') + + # Cryptographic Access Control + try: + ts, sig = token.rsplit('.', 1) + expected = hashlib.sha256(f"{ts}{self.secret}".encode()).hexdigest() + if sig != expected or (time.time() - float(ts)) > TOKEN_EXPIRY_SEC: + abort(403) + except (ValueError, KeyError, AttributeError): + abort(403) query = data.get('query', '').strip() lang = data.get('lang', 'all') categories = data.get('categories', 'general') @@ -627,7 +967,6 @@ class SXNGPlugin(Plugin): if not query: return jsonify({'results': []}) - # Direct kernel access (bypasses HTTP loopback) try: from searx.search import SearchWithPlugins from searx.search.models import SearchQuery @@ -649,7 +988,6 @@ class SXNGPlugin(Plugin): lang=lang, pageno=1, ) - # Empty plugins list prevents recursion search_obj = SearchWithPlugins(sq, request, user_plugins=[]) result_container = search_obj.search() @@ -708,9 +1046,11 @@ class SXNGPlugin(Plugin): 'query': query }) except Exception as e: - return jsonify({'results': [], 'error': str(e)}) + logger.error(f"{PLUGIN_NAME}: Auxiliary search HTTP fallback failed: {e}") + return jsonify({'results': [], 'error': str(e)}), 500 except Exception as e: - return jsonify({'results': [], 'error': str(e)}) + logger.error(f"{PLUGIN_NAME}: Auxiliary search loopback failed: {e}") + return jsonify({'results': [], 'error': str(e)}), 500 @app.route('/ai-stream', methods=['POST']) def handle_ai_stream(): @@ -740,7 +1080,8 @@ class SXNGPlugin(Plugin): target_words = int(self.max_tokens * 0.4) lang_instruction = f" Respond in {lang}." if lang not in ('all', 'auto') else "" - SYSTEM = f"You are a direct, citation-accurate search synthesis engine. Today is {today}.{lang_instruction}" + base_sys = self.system_prompt if self.system_prompt else "You are a direct, citation-accurate search synthesis engine." + SYSTEM = f"{base_sys} Today is {today}.{lang_instruction}" max_source_idx = 0 if context_text: indices = re.findall(r'\[(\d+)\]', context_text) @@ -799,7 +1140,7 @@ class SXNGPlugin(Plugin): conn = None try: conn, path = _get_streaming_connection(url) - payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature, "stopSequences": [""]}}) + payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature, "stopSequences": [""]}}) conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"}) res = conn.getresponse() @@ -810,13 +1151,23 @@ class SXNGPlugin(Plugin): return decoder = json.JSONDecoder() + utf8_decoder = codecs.getincrementaldecoder('utf-8')(errors='replace') buffer = "" while True: chunk = res.read(STREAM_CHUNK_SIZE) - if not chunk: break - buffer += chunk.decode('utf-8', errors='replace') + if not chunk: + buffer += utf8_decoder.decode(b'', final=True) + break + buffer += utf8_decoder.decode(chunk) while buffer: buffer = buffer.lstrip() + if buffer.startswith('['): + buffer = buffer[1:].lstrip() + elif buffer.startswith(','): + buffer = buffer[1:].lstrip() + elif buffer.startswith(']'): + buffer = buffer[1:].lstrip() + if not buffer: break try: obj, idx = decoder.raw_decode(buffer) @@ -844,7 +1195,7 @@ class SXNGPlugin(Plugin): "model": self.model, "messages": [{"role": "user", "content": prompt}], "stream": True, - "max_tokens": self.max_tokens, + "max_tokens": min(self.max_tokens * 4, 8192), # 4x headroom for reasoning models "temperature": self.temperature, "stop": [""] }) @@ -868,6 +1219,8 @@ class SXNGPlugin(Plugin): decoder = json.JSONDecoder() buffer = b"" + tokens_yielded = 0 + in_reasoning_block = False while True: chunk = res.read(STREAM_CHUNK_SIZE) if not chunk: break @@ -877,13 +1230,43 @@ class SXNGPlugin(Plugin): line = line_bytes.decode('utf-8', errors='replace') if line.startswith("data: "): data_str = line[6:].strip() - if data_str == "[DONE]": return + if data_str == "[DONE]": + if in_reasoning_block: + yield "\n\n\n" + if tokens_yielded == 0: + logger.warning(f"{PLUGIN_NAME}: Stream completed but yielded 0 tokens.") + return try: obj, _ = decoder.raw_decode(data_str) - content = obj.get("choices", [{}])[0].get("delta", {}).get("content", "") - if content: yield content - except json.JSONDecodeError: + choices = obj.get("choices", []) + choice = choices[0] if choices else {} + delta = choice.get("delta", {}) if isinstance(choice, dict) else {} + reasoning = delta.get("reasoning_content", "") + content = delta.get("content", "") + + if reasoning: + if not in_reasoning_block: + yield "\n" + in_reasoning_block = True + yield reasoning + tokens_yielded += 1 + + if content: + if in_reasoning_block: + yield "\n\n\n" + in_reasoning_block = False + yield content + tokens_yielded += 1 + except json.JSONDecodeError as e: + if data_str.strip(): + logger.debug(f"{PLUGIN_NAME}: Upstream JSON parse error: {e} | Payload: {data_str[:200]}") pass + + # automatically inject closure bounds upon upstream socket failure. + if in_reasoning_block: + yield "\n\n\n" + if tokens_yielded == 0: + logger.warning(f"{PLUGIN_NAME}: Stream disconnected abruptly and yielded 0 tokens.") except Exception as e: logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}") finally: @@ -891,9 +1274,6 @@ class SXNGPlugin(Plugin): generator = stream_gemini if self.is_gemini else stream_openai_compatible - - # Force-unload Ollama model after stream via keep_alive=0 - if self.provider == 'ollama' and getattr(self, 'ollama_unload_after', False): gen_fn = generator @@ -916,12 +1296,11 @@ class SXNGPlugin(Plugin): }) return True - def _assemble_context(self, raw_results, infoboxes, answers, offset=0) -> tuple[str, list]: + def _assemble_context(self, clean_results, infoboxes, answers, offset=0) -> tuple[str, list]: """Builds context string from normalized search data. Returns (context_str, urls).""" context_parts = [] result_urls = [] - # Knowledge graph knowledge_graph_lines = [] for ib in infoboxes: ib_name = ib.get('name', '') or ib.get('infobox', '') or ib.get('title', '') @@ -946,32 +1325,30 @@ class SXNGPlugin(Plugin): if knowledge_graph_lines: context_parts.append("KNOWLEDGE GRAPH:\n" + "\n".join(knowledge_graph_lines)) - # Deep sources: full content deep_lines = [] - for i, r in enumerate(raw_results[:self.context_deep_count]): - url = getattr(r, 'url', '') if hasattr(r, 'url') else r.get('url', '') + for i, r in enumerate(clean_results[:self.context_deep_count]): + url = r.get('url', '') result_urls.append(url) domain = urlparse(url).netloc.replace('www.', '') - date = getattr(r, 'publishedDate', '') if hasattr(r, 'publishedDate') else r.get('publishedDate') - date_str = f" ({date})" if date else "" - title = (getattr(r, 'title', '') if hasattr(r, 'title') else r.get('title') or "").replace('\n', ' ').strip() - content = str(getattr(r, 'content', '') if hasattr(r, 'content') else r.get('content', '')).replace('\n', ' ').strip()[:800] + date_str = f" ({r.get('publishedDate')})" if r.get('publishedDate') else "" + title = r.get('title', '').replace('\n', ' ').strip() + content = str(r.get('content', '')).replace('\n', ' ').strip()[:800] idx = i + 1 + offset deep_lines.append(f"[{idx}] {domain}{date_str}: {title}: {content}") if deep_lines: context_parts.append("DEEP SOURCES:\n" + "\n".join(deep_lines)) - # Shallow sources: headlines only + # Low-latency headline heuristics if self.context_shallow_count > 0: shallow_lines = [] start_idx = self.context_deep_count end_idx = self.context_deep_count + self.context_shallow_count - for i, r in enumerate(raw_results[start_idx:end_idx]): - url = getattr(r, 'url', '') if hasattr(r, 'url') else r.get('url', '') + for i, r in enumerate(clean_results[start_idx:end_idx]): + url = r.get('url', '') result_urls.append(url) domain = urlparse(url).netloc.replace('www.', '') - title = (getattr(r, 'title', '') if hasattr(r, 'title') else r.get('title') or '').replace('\n', ' ').strip()[:60] + title = r.get('title', '').replace('\n', ' ').strip()[:60] idx = i + 1 + start_idx + offset shallow_lines.append(f"[{idx}] {domain}: {title}") @@ -1003,9 +1380,8 @@ class SXNGPlugin(Plugin): raw_answers = getattr(search.result_container, 'answers', []) # Normalize for unified context assembly - _, infoboxes, answers = self._parse_aux_results(raw_results, raw_infoboxes, raw_answers) - context_str, _ = self._assemble_context(raw_results, infoboxes, answers) - + clean_results, infoboxes, answers = self._parse_aux_results(raw_results, raw_infoboxes, raw_answers) + context_str, _ = self._assemble_context(clean_results, infoboxes, answers) ts = str(int(time.time())) q_clean = search.search_query.query.strip() @@ -1013,11 +1389,21 @@ class SXNGPlugin(Plugin): sig = hashlib.sha256(f"{ts}{self.secret}".encode()).hexdigest() tk = f"{ts}.{sig}" + # XSS & Syntax Prevention: Safely serialize data for inline ''' search.result_container.answers.add(results.types.Answer(answer=Markup(html_payload))) except Exception as e: logger.error(f"{PLUGIN_NAME}: {e}") - return results - - - + return results \ No newline at end of file diff --git a/tests/test.py b/tests/test.py deleted file mode 100644 index ef6775c..0000000 --- a/tests/test.py +++ /dev/null @@ -1,383 +0,0 @@ -""" -AI Answers Plugin - Comprehensive Test -Test suite that verifies both 'interactive' and 'simple' modes, -checks configuration, and validates LLM integration. - -Usage: python test.py -Requires: pip install flask flask-babel python-dotenv -""" - -import os -import sys - -# Add parent directory to path to find ai_answers.py -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import re -import time -import logging -import subprocess -import tempfile -from types import ModuleType - -import warnings -warnings.filterwarnings("ignore", category=SyntaxWarning) - -# Suppress Flask noise during test -logging.getLogger('werkzeug').setLevel(logging.ERROR) -logging.basicConfig(level=logging.INFO, format='%(message)s') - -# --- MOCKS START --- - -# SearXNG module mocks -searx = ModuleType("searx") -searx_plugins = ModuleType("searx.plugins") -searx_results = ModuleType("searx.result_types") - -class MockPlugin: - def __init__(self, cfg): - self.active = getattr(cfg, 'active', True) - -class MockPluginInfo: - def __init__(self, **kwargs): - self.meta = kwargs - -class MockEngineResults: - def __init__(self): - self.types = ModuleType("types") - self.types.Answer = lambda *args, **kwargs: kwargs.get('answer', args[0] if args else "") - self._results = [] - - def add(self, res): - self._results.append(res) - - def get_ordered_results(self): - return self._results - -searx_plugins.Plugin = MockPlugin -searx_plugins.PluginInfo = MockPluginInfo -searx_results.EngineResults = MockEngineResults - -# Internal search API mocks -searx_search = ModuleType("searx.search") -searx_search_models = ModuleType("searx.search.models") -searx_query = ModuleType("searx.query") -searx_webadapter = ModuleType("searx.webadapter") - -class MockSearchWithPlugins: - def __init__(self, search_query, request, user_plugins): - self.search_query = search_query - self.result_container = MockEngineResults() - # Add some mock results - self.result_container.add({"title": "Mock Aux Result", "url": "https://test.com", "content": "Test content", "publishedDate": "2026"}) - - # Add mock infoboxes/answers - self.result_container.infoboxes = [{"infobox": "Test Box", "content": "Box Content", "attributes": []}] - self.result_container.answers = set() - self.result_container.answers_list = ["Test Answer"] # Simulating raw answers list if needed - - def search(self): - return self.result_container - -class MockSearchQuery: - def __init__(self, query, engineref_list, **kwargs): - self.query = query - -class MockRawTextQuery: - def __init__(self, query, disabled_engines): - self.query = query - def getQuery(self): - return self.query - -searx_search.SearchWithPlugins = MockSearchWithPlugins -searx_search.models = searx_search_models -searx_search_models.SearchQuery = MockSearchQuery -searx_query.RawTextQuery = MockRawTextQuery -searx_webadapter.get_engineref_from_category_list = lambda cats, disabled: [] - -sys.modules["searx.search"] = searx_search -sys.modules["searx.search.models"] = searx_search_models -sys.modules["searx.query"] = searx_query -sys.modules["searx.webadapter"] = searx_webadapter - -# Network module mock -searx_network = ModuleType("searx.network") -def mock_network_call(method, url, **kwargs): - import http.client, ssl, json - from urllib.parse import urlparse - - parsed = urlparse(url) - port = parsed.port or (443 if parsed.scheme=='https' else 80) - target = f"{parsed.hostname}:{port}" - - if parsed.scheme == 'https': - conn = http.client.HTTPSConnection(target, timeout=30, context=ssl.create_default_context()) - else: - conn = http.client.HTTPConnection(target, timeout=30) - - headers = kwargs.get('headers', {}) - body = None - if kwargs.get('json'): - body = json.dumps(kwargs['json']) - elif kwargs.get('data'): - body = kwargs['data'] - - path = parsed.path - if parsed.query: - path += f"?{parsed.query}" - - if kwargs.get('params'): - from urllib.parse import urlencode - query_str = urlencode(kwargs['params']) - if '?' in path: - path += f"&{query_str}" - else: - path += f"?{query_str}" - - conn.request(method, path, body=body, headers=headers) - print(f" [DEBUG] Network Call: {method} {target}{path}") - print(f" [DEBUG] Headers: {headers}") - # print(f" [DEBUG] Body: {body}") - return conn.getresponse() - -def mock_stream(method, url, **kwargs): - res = mock_network_call(method, url, **kwargs) - - class MockResponse: - def __init__(self, r): - self.status_code = r.status - self.text = "Mock Response" # Stub - self._r = r - - def generator(): - while True: - chunk = res.read(128) - if not chunk: break - yield chunk - - return MockResponse(res), generator() - -def mock_get(url, **kwargs): - import json - res = mock_network_call('GET', url, **kwargs) - - class MockResponse: - def __init__(self, r): - self.status_code = r.status - self._content = r.read() - self.text = self._content.decode('utf-8') - - def json(self): - return json.loads(self.text) - - return MockResponse(res) - -searx_network.stream = mock_stream -searx_network.get = mock_get -sys.modules["searx.network"] = searx_network - -sys.modules["searx"] = searx -sys.modules["searx.plugins"] = searx_plugins -sys.modules["searx.result_types"] = searx_results - -# --- MOCKS END --- - -from flask import Flask -from flask_babel import Babel -from ai_answers import SXNGPlugin - -def check_js_syntax(js_code): - """Returns (valid, error_msg)""" - try: - with tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False, encoding='utf-8') as f: - f.write(js_code) - temp_path = f.name - - result = subprocess.run( - ['node', '--check', temp_path], - capture_output=True, - text=True, - timeout=5 - ) - os.unlink(temp_path) - - if result.returncode == 0: - return True, None - else: - return False, result.stderr.strip() - except (FileNotFoundError, subprocess.TimeoutExpired, Exception) as e: - return True, f"[SKIP] {e}" # Skip if node not found - -def run_tests(): - print("AI Answers - Test Suite\n") - - print("[Syntax]") - - import py_compile - try: - target_file = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'ai_answers.py') - py_compile.compile(target_file, doraise=True) - print(" Python: OK") - except py_compile.PyCompileError as e: - print(f" Syntax: [FAIL] {e}") - return False - - modes = ['interactive', 'simple'] - - for mode in modes: - app = Flask(__name__) - Babel(app) - - # Set LLM_INTERACTIVE based on mode - os.environ['LLM_INTERACTIVE'] = 'true' if mode == 'interactive' else 'false' - - # Override env var for this iteration - # os.environ['LLM_STYLE'] = mode # Legacy - os.environ['LLM_INTERACTIVE'] = 'true' if mode == 'interactive' else 'false' - - class MockConfig: - active = True - - # Re-init plugin with new env var in effect - plugin = SXNGPlugin(MockConfig()) - plugin.init(app) - - if mode == 'interactive': - print(f"\n[Config]") - print(f" Provider: {plugin.provider or 'NOT SET'}") - print(f" API Key: {'OK' if plugin.api_key else 'MISSING'}") - - # Construct Search - class MockSearchQuery: - pageno = 1 - query = "test query" - lang = 'en' - categories = ['general'] - - class MockSearch: - search_query = MockSearchQuery() - class MockResultContainer: - def __init__(self): - self.answers = set() - self.infoboxes = [] - def get_ordered_results(self): - return [ - {"title": "T1", "content": "C1", "url": "https://a.com/1", "publishedDate": "2026-01-15"}, - {"title": "T2", "content": "C2", "url": "https://a.com/2", "publishedDate": "2026-01-10"}, - ] - result_container = MockResultContainer() - - search = MockSearch() - plugin.post_search(None, search) - - if not search.result_container.answers: - print(" FAIL: No HTML injected") - return False - - html = str(list(search.result_container.answers)[0]) - - # Mode-specific basic validations - has_box = 'id="sxng-stream-box"' in html - has_footer = 'id="sxng-footer"' in html - - if mode == 'interactive': - if has_box and has_footer: - print("\n[Render: interactive]") - print(" UI: OK") - else: - print(f" FAIL: Box={has_box}, Footer={has_footer}") - return False - else: - if has_box and not has_footer: - print("\n[Render: simple]") - print(" UI: OK") - else: - print(f" FAIL: Box={has_box}, Footer={has_footer}") - return False - - # JS Verification - js_match = re.search(r'', html, re.DOTALL) - if not js_match: - print(" FAIL: No script tag found") - return False - - js_code = js_match.group(1).strip() - valid, err = check_js_syntax(js_code) - - if valid: - print(" JS: OK") - else: - print(" JS Syntax: [FAIL]") - print(f" Error: {err.splitlines()[0][:80]}...") - return False - - print(f" Size: {len(html):,} bytes") - - # Verify Critical Fix: Function Signature - # simple mode caused reference error if signature wasn't unified - if 'async function startStream(overrideQ = null, prevAnswer = null, auxContext = null)' in js_code: - print(" Signature: OK") - else: - print(" Signature Fix: [FAIL] Unified startStream signature MISSING") - # Not fatal for interactive per se, but fatal if consistent code is desired - # For simple mode it IS fatal in runtime. - if mode == 'simple': return False - - - # --------------------------------------------------------- - # GLOBAL ENDPOINT / INTEGRATION TESTS (Using last plugin init) - # --------------------------------------------------------- - - if not plugin.api_key: - print("\n[Skip integration: no LLM_KEY]") - return True - - print(f"\n[Stream]") - print(f" Provider: {plugin.provider}") - print(f" Model: {plugin.model}") - - # Needs a token from the last run to pass auth - token_match = re.search(r'tk_init = "(.*?)";', html) - if not token_match: - print(" FAIL: Could not extract token for stream test") - return False - - with app.test_client() as client: - payload = { - "q": "why is the sky blue", - "context": "[1] Wikipedia: The sky appears blue.", - "lang": "en", - "tk": token_match.group(1) - } - - start = time.time() - response = client.post('/ai-stream', json=payload) - elapsed = time.time() - start - - print(f" Status: {response.status_code}") - print(f" Time: {elapsed:.2f}s") - - if response.status_code != 200: - print(f" FAIL: Expected 200, got {response.status_code}") - return False - - data = response.data.decode('utf-8') - if len(data) < 5: - print(" FAIL: Empty or too short response") - return False - print(" Result: OK") - - print("\n[Aux Search]") - with app.test_client() as client: - aux_response = client.post('/ai-auxiliary-search', json={'query': 'test'}) - if aux_response.status_code == 200 and 'results' in aux_response.get_json(): - print(" Result: OK") - else: - print(" Aux Endpoint: [FAIL]") - - print("\nPASS") - return True - -if __name__ == "__main__": - success = run_tests() - sys.exit(0 if success else 1)