feat: implement RAG context awareness and non-blocking streaming

This commit is contained in:
cra88y
2026-01-10 18:25:34 -06:00
parent 531128e828
commit bf04b62577
3 changed files with 85 additions and 16 deletions
+13
View File
@@ -0,0 +1,13 @@
# SearXNG Gemini Stream Plugin
This is a SearXNG plugin that provides live AI search answers using Google Gemini Flash.
## Configuration
Set the following environment variables:
- `GEMINI_API_KEY`: Your Google Gemini API key.
- `GEMINI_MODEL`: (Optional) The model to use, defaults to `gemini-3-flash-preview`.
## Installation
This file should be placed in the `searx/plugins` directory of your SearXNG instance or configured as a plugin.
+70 -16
View File
@@ -34,12 +34,15 @@ class SXNGPlugin(Plugin):
t = request.args.get('token') t = request.args.get('token')
q = request.args.get('q', '') q = request.args.get('q', '')
# Maintenance # Maintenance: handle dict structure
current_time = time.time() current_time = time.time()
self.tokens = {k: v for k, v in self.tokens.items() if v > current_time} self.tokens = {k: v for k, v in self.tokens.items() if v['expires'] > current_time}
if t not in self.tokens or not self.api_key: if t not in self.tokens or not self.api_key:
abort(403) abort(403)
token_data = self.tokens[t]
context_text = token_data.get('context', '')
del self.tokens[t] del self.tokens[t]
def generate(): def generate():
@@ -48,37 +51,75 @@ class SXNGPlugin(Plugin):
try: try:
context = ssl.create_default_context() context = ssl.create_default_context()
conn = http.client.HTTPSConnection(host, context=context) conn = http.client.HTTPSConnection(host, context=context)
conn.request("POST", path, body=json.dumps({"contents": [{"parts": [{"text": q}]}]}),
# RAG PROMPT
prompt = f"""
You are a concise search assistant. Use the provided SEARCH RESULTS to answer the USER QUERY.
If the results don't contain the answer, use your knowledge but prioritize the results.
Keep the answer under 4 sentences.
SEARCH RESULTS:
{context_text}
USER QUERY: {q}
"""
payload = {
"contents": [{"parts": [{"text": prompt}]}],
"generationConfig": {
"maxOutputTokens": 500,
"temperature": 0.2 # Lower temperature for better factual accuracy
}
}
conn.request("POST", path, body=json.dumps(payload),
headers={"Content-Type": "application/json"}) headers={"Content-Type": "application/json"})
res = conn.getresponse() res = conn.getresponse()
buffer = "" buffer = ""
for chunk in res: for chunk in res:
if not chunk: continue
buffer += chunk.decode('utf-8') buffer += chunk.decode('utf-8')
while True: while True:
start = buffer.find('{') start = buffer.find('{')
if start == -1: break if start == -1:
brace_count, end = 0, -1 buffer = "" # Clear garbage
break
brace_count = 0
end = -1
for i in range(start, len(buffer)): for i in range(start, len(buffer)):
if buffer[i] == '{': brace_count += 1 if buffer[i] == '{': brace_count += 1
elif buffer[i] == '}': brace_count -= 1 elif buffer[i] == '}': brace_count -= 1
if brace_count == 0: if brace_count == 0:
end = i + 1 end = i + 1
break break
if end == -1: break
if end == -1: break # Wait for more data
try: try:
text = json.loads(buffer[start:end])['candidates'][0]['content']['parts'][0]['text'] raw_json = buffer[start:end]
if text: data = json.loads(raw_json)
yield text parts = data.get('candidates', [{}])[0].get('content', {}).get('parts', [])
buffer = buffer[end:] for part in parts:
except: text = part.get('text', '')
buffer = buffer[end:] if text:
yield text
except Exception:
pass
buffer = buffer[end:]
conn.close() conn.close()
except Exception as e: except Exception as e:
logger.error(f"[{self.id}] Stream error: {e}") logger.error(f"[{self.id}] Stream error: {e}")
yield f" [Error: {str(e)}]" yield f" [Error: {str(e)}]"
return Response(generate(), mimetype='text/plain') return Response(generate(), mimetype='text/plain', headers={
'Cache-Control': 'no-cache, no-store, must-revalidate',
'Pragma': 'no-cache',
'Expires': '0',
'X-Accel-Buffering': 'no'
})
@app.route('/gemini.js') @app.route('/gemini.js')
def g_script(): def g_script():
@@ -125,10 +166,23 @@ class SXNGPlugin(Plugin):
if search.search_query.pageno > 1 or not self.active or not self.api_key: if search.search_query.pageno > 1 or not self.active or not self.api_key:
return results return results
tk = secrets.token_urlsafe(16) # Extract context from top 5 search results for RAG
self.tokens[tk] = time.time() + 90 context_parts = []
raw_results = search.result_container.get_ordered_results()
for i, res in enumerate(raw_results[:5]):
title = res.get('title', 'No Title')
content = res.get('content', 'No Content')
context_parts.append(f"Source [{i+1}]: {title}\nSnippet: {content}")
logger.warning(f"[{self.id}] Injecting Answer for query: {search.search_query.query[:20]}...") context_str = "\n\n".join(context_parts)
tk = secrets.token_urlsafe(16)
self.tokens[tk] = {
"expires": time.time() + 90,
"context": context_str
}
logger.debug(f"[{self.id}] Prepared RAG context for query: {search.search_query.query[:20]}...")
# Encode query for the URL parameter in the script tag # Encode query for the URL parameter in the script tag
safe_query_param = urllib.parse.quote(search.search_query.query) safe_query_param = urllib.parse.quote(search.search_query.query)
+2
View File
@@ -0,0 +1,2 @@
flask
flask-babel