{interactive_html}
import json, http.client, ssl, os, logging, base64, time, hashlib
from urllib.parse import urlparse
from flask import Response, request, abort
from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults
from flask_babel import gettext
from markupsafe import Markup
logger = logging.getLogger(__name__)
TOKEN_EXPIRY_SEC = 86400
CONNECTION_TIMEOUT_SEC = 30
PROVIDER_PRESETS = {
'openai': {'url': 'https://api.openai.com/v1/chat/completions', 'model': 'gpt-4o-mini'},
'openrouter': {'url': 'https://openrouter.ai/api/v1/chat/completions', 'model': 'google/gemma-3-27b-it:free'},
'ollama': {'url': 'http://localhost:11434/v1/chat/completions', 'model': 'llama3.2'},
'localai': {'url': 'http://localhost:8080/v1/chat/completions', 'model': 'gpt-4'},
'lmstudio': {'url': 'http://localhost:1234/v1/chat/completions', 'model': 'local-model'},
'gemini': {'url': 'https://generativelanguage.googleapis.com', 'model': 'gemma-3-27b-it'},
'azure': {'url': None, 'model': 'azure-deployment'},
'huggingface': {'url': 'https://api-inference.huggingface.co/models/{model}/v1/chat/completions', 'model': 'meta-llama/Meta-Llama-3-8B-Instruct'}
}
import typing
if typing.TYPE_CHECKING:
from searx.search import SearchWithPlugins
from searx.extended_types import SXNG_Request
from . import PluginCfg
class SXNGPlugin(Plugin):
"""
AI Answers Plugin for SearXNG.
Injects a real-time streaming answer box synthesized from search results using LLM providers.
Supports OpenAI, OpenRouter, Gemini, Ollama, LocalAI, Azure, and Hugging Face.
"""
id = "ai_answers"
def __init__(self, plg_cfg: "PluginCfg"):
super().__init__(plg_cfg)
self.info = PluginInfo(
id=self.id,
name=gettext("AI Answers Plugin"),
description=gettext("Live AI search answers using LLM providers."),
preference_section="general",
)
self._load_config()
if self.api_key:
self.secret = os.getenv('SXNG_LLM_SECRET') or hashlib.sha256(self.api_key.encode()).hexdigest()
else:
self.secret = os.getenv('SXNG_LLM_SECRET', '')
logger.warning("AI Answers: No API key configured, plugin inactive")
def _load_config(self):
self.style = os.getenv('LLM_STYLE', 'interactive')
raw_provider = os.getenv('LLM_PROVIDER', '').lower().strip()
raw_url = os.getenv('LLM_URL', '').strip()
if not raw_provider and raw_url:
url_lower = raw_url.lower()
if 'openai.com' in url_lower:
raw_provider = 'openai'
elif 'openrouter.ai' in url_lower:
raw_provider = 'openrouter'
elif ':11434' in url_lower:
raw_provider = 'ollama'
elif 'generativelanguage.googleapis.com' in url_lower:
raw_provider = 'gemini'
if not raw_provider:
logger.debug("AI Answers: No provider configured, plugin inactive")
self.provider = ''
self.model = ''
self.is_gemini = False
self.api_key = ''
return
self.provider = raw_provider if raw_provider in PROVIDER_PRESETS else 'openai'
self.is_gemini = (self.provider == 'gemini')
preset = PROVIDER_PRESETS[self.provider]
self.api_key = os.getenv('LLM_KEY', '')
if not self.api_key and self.provider in ('ollama', 'localai', 'lmstudio'):
self.api_key = 'none'
self.api_key = self.api_key.strip()
self.model = os.getenv('LLM_MODEL', preset['model']).strip()
try:
self.max_tokens = int(os.getenv('LLM_MAX_TOKENS', 500))
except ValueError:
self.max_tokens = 500
try:
self.temperature = float(os.getenv('LLM_TEMPERATURE', 0.2))
except ValueError:
self.temperature = 0.2
try:
self.context_count = max(0, int(os.getenv('LLM_CONTEXT_COUNT', 5)))
except ValueError:
self.context_count = 5
self.allowed_tabs = set(t.strip() for t in os.getenv('LLM_TABS', 'general,science,it,news').split(','))
preset_url = preset['url']
if preset_url and '{model}' in preset_url:
preset_url = preset_url.format(model=self.model)
self._parse_url(preset_url)
logger.info(f"AI Answers: {self.provider} @ {self.endpoint_host}")
def _parse_url(self, default_url):
raw_url = os.getenv('LLM_URL', '').strip() or default_url
if not raw_url.startswith(('http://', 'https://')):
raw_url = f"https://{raw_url}"
parsed = urlparse(raw_url)
self.endpoint_url = raw_url
self.endpoint_host = parsed.hostname or 'localhost'
self.endpoint_port = parsed.port
self.endpoint_path = parsed.path or '/v1/chat/completions'
if parsed.query:
self.endpoint_path += f"?{parsed.query}"
self.endpoint_ssl = (parsed.scheme == 'https')
if self.is_gemini:
return
is_local = self.endpoint_host in ('localhost', '127.0.0.1') or self.endpoint_host.startswith('127.')
if not self.endpoint_ssl and not is_local:
logger.warning(f"AI Answers: HTTP on non-localhost ({self.endpoint_host}). Credentials may be exposed.")
def _get_connection(self):
proxy_url = os.getenv('HTTPS_PROXY' if self.endpoint_ssl else 'HTTP_PROXY') or os.getenv('https_proxy' if self.endpoint_ssl else 'http_proxy')
target_host = self.endpoint_host
target_port = self.endpoint_port
target_str = f"{target_host}:{target_port}" if target_port else target_host
if proxy_url:
p = urlparse(proxy_url)
p_host = p.hostname
p_port = p.port or 8080
if p.scheme == 'https':
conn = http.client.HTTPSConnection(p_host, p_port, timeout=CONNECTION_TIMEOUT_SEC, context=ssl.create_default_context())
else:
conn = http.client.HTTPConnection(p_host, p_port, timeout=CONNECTION_TIMEOUT_SEC)
conn.set_tunnel(target_host, target_port)
return conn
# Direct Connection
if self.endpoint_ssl:
return http.client.HTTPSConnection(target_str, timeout=CONNECTION_TIMEOUT_SEC, context=ssl.create_default_context())
return http.client.HTTPConnection(target_str, timeout=CONNECTION_TIMEOUT_SEC)
def init(self, app):
@app.route('/ai-stream', methods=['POST'])
def handle_ai_stream():
data = request.json or {}
token = data.get('tk', '')
q = data.get('q', '')
lang = data.get('lang', 'all')
try:
ts, sig = token.split('.', 1)
expected = hashlib.sha256(f"{ts}{self.secret}".encode()).hexdigest()
if sig != expected or (time.time() - float(ts)) > TOKEN_EXPIRY_SEC:
abort(403)
except (ValueError, KeyError, AttributeError):
abort(403)
context_text = data.get('context', '')
prev_answer = (data.get('prev_answer') or '')[-4000:]
if not self.api_key:
logger.warning(f"AI Answers: request rejected. Key loaded: {bool(self.api_key)}, Query: {bool(q)}")
return Response("Missing API key or query", status=400)
today = time.strftime("%Y-%m-%d")
target_words = int(self.max_tokens * 0.2)
lang_instruction = f" Respond in {lang}." if lang not in ('all', 'auto') else ""
SYSTEM = f"You are a search synthesis engine. Direct, grounded, citation-accurate. Today is {today}.{lang_instruction}"
CORE_RULES = [
"DENSITY 4/5: Expert-briefing level. No filler, no transitions. Every sentence = new information.",
f"BREVITY: {target_words} words max. Complete, not verbose.",
"CITATIONS: Cite [n] only for specific facts from sources. Max 3 total. Sentence-end only. Never cite common knowledge.",
"NO HEDGE: State answers confidently. Note uncertainty only if critical.",
]
if q == "Continue":
task = "CONTINUE: Pick up exactly where previous answer stopped. No repetition. Seamless flow."
elif prev_answer:
task = "FOLLOW-UP: Address the new question using prior context. Prioritize the new query."
else:
task = "ANSWER FIRST: Lead with the direct answer. No preamble, no context-setting."
grounding = "GROUNDING: Trust sources for current events. Use knowledge for fundamentals." if context_text else "GROUNDING: No sources available. Use knowledge and note 'based on general knowledge'."
history_rule = "HISTORY: Refer to prior exchange for context. Do not repeat." if prev_answer else None
instructions = [task] + CORE_RULES + [grounding]
if history_rule:
instructions.append(history_rule)
numbered_instructions = "\n".join(f"{i+1}. {r}" for i, r in enumerate(instructions))
prompt = f"""