fix: use http.client for LLM streaming (bypass 64KB buffer)

This commit is contained in:
cra88y/pc
2026-01-20 21:59:31 -06:00
parent 9c5b016690
commit 25053109e2
+57 -20
View File
@@ -1,6 +1,10 @@
import json, os, logging, base64, time, hashlib, codecs, re import json, os, logging, base64, time, hashlib, codecs, re, http.client, ssl
from urllib.parse import urlparse from urllib.parse import urlparse
from searx import network from searx import network
try:
from searx.network import get_network
except ImportError:
get_network = None # Graceful fallback for test/demo environments
from flask import Response, request, abort, jsonify from flask import Response, request, abort, jsonify
from searx.plugins import Plugin, PluginInfo from searx.plugins import Plugin, PluginInfo
from searx.result_types import EngineResults from searx.result_types import EngineResults
@@ -10,6 +14,30 @@ from markupsafe import Markup
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TOKEN_EXPIRY_SEC = 3600 TOKEN_EXPIRY_SEC = 3600
STREAM_CHUNK_SIZE = 128
STREAM_TIMEOUT_SEC = 60
def _get_streaming_connection(url: str):
parsed = urlparse(url)
host = parsed.hostname
port = parsed.port or (443 if parsed.scheme == 'https' else 80)
path = parsed.path + ('?' + parsed.query if parsed.query else '')
verify_ssl = True
if get_network is not None:
try:
net = get_network()
verify_ssl = getattr(net, 'verify', True)
except Exception:
pass
if parsed.scheme == 'https':
ctx = ssl.create_default_context() if verify_ssl else ssl._create_unverified_context()
conn = http.client.HTTPSConnection(host, port, timeout=STREAM_TIMEOUT_SEC, context=ctx)
else:
conn = http.client.HTTPConnection(host, port, timeout=STREAM_TIMEOUT_SEC)
return conn, path
@@ -731,22 +759,23 @@ class SXNGPlugin(Plugin):
else: else:
url = f"{self.endpoint_url}?key={self.api_key}" url = f"{self.endpoint_url}?key={self.api_key}"
conn = None
try: try:
payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature, "stopSequences": ["</answer>"]}} conn, path = _get_streaming_connection(url)
headers = {"Content-Type": "application/json"} payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature, "stopSequences": ["</answer>"]}})
res, chunk_gen = network.stream('POST', url, json=payload, headers=headers, timeout=60) conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"})
res = conn.getresponse()
if res.status_code != 200: if res.status != 200:
for _ in chunk_gen: pass # Drain to prevent resource leak logger.error(f"{PLUGIN_NAME}: Gemini API {res.status}")
logger.error(f"{PLUGIN_NAME}: Gemini API {res.status_code}")
return return
decoder = json.JSONDecoder() decoder = json.JSONDecoder()
buffer = "" buffer = ""
utf8_decoder = codecs.getincrementaldecoder("utf-8")(errors='replace') while True:
for chunk in chunk_gen: chunk = res.read(STREAM_CHUNK_SIZE)
if not chunk: continue if not chunk: break
buffer += utf8_decoder.decode(chunk, final=False) buffer += chunk.decode('utf-8', errors='replace')
while buffer: while buffer:
buffer = buffer.lstrip() buffer = buffer.lstrip()
if not buffer: break if not buffer: break
@@ -765,17 +794,21 @@ class SXNGPlugin(Plugin):
except json.JSONDecodeError: break except json.JSONDecodeError: break
except Exception as e: except Exception as e:
logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}") logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}")
finally:
if conn: conn.close()
def stream_openai_compatible(): def stream_openai_compatible():
conn = None
try: try:
payload = { conn, path = _get_streaming_connection(self.endpoint_url)
payload = json.dumps({
"model": self.model, "model": self.model,
"messages": [{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
"stream": True, "stream": True,
"max_tokens": self.max_tokens, "max_tokens": self.max_tokens,
"temperature": self.temperature, "temperature": self.temperature,
"stop": ["</answer>"] "stop": ["</answer>"]
} })
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"HTTP-Referer": "https://github.com/searxng/searxng", "HTTP-Referer": "https://github.com/searxng/searxng",
@@ -785,17 +818,18 @@ class SXNGPlugin(Plugin):
headers['api-key'] = self.api_key headers['api-key'] = self.api_key
else: else:
headers['Authorization'] = f"Bearer {self.api_key}" headers['Authorization'] = f"Bearer {self.api_key}"
res, chunk_gen = network.stream('POST', self.endpoint_url, json=payload, headers=headers, timeout=60) conn.request("POST", path, body=payload, headers=headers)
res = conn.getresponse()
if res.status_code != 200: if res.status != 200:
for _ in chunk_gen: pass logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status}")
logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status_code}")
return return
decoder = json.JSONDecoder() decoder = json.JSONDecoder()
buffer = b"" buffer = b""
for chunk in chunk_gen: while True:
if not chunk: continue chunk = res.read(STREAM_CHUNK_SIZE)
if not chunk: break
buffer += chunk buffer += chunk
while b"\n" in buffer: while b"\n" in buffer:
line_bytes, buffer = buffer.split(b"\n", 1) line_bytes, buffer = buffer.split(b"\n", 1)
@@ -811,13 +845,16 @@ class SXNGPlugin(Plugin):
pass pass
except Exception as e: except Exception as e:
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}") logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}")
finally:
if conn: conn.close()
generator = stream_gemini if self.is_gemini else stream_openai_compatible generator = stream_gemini if self.is_gemini else stream_openai_compatible
return Response(generator(), mimetype='text/event-stream', headers={ return Response(generator(), mimetype='text/event-stream', headers={
'X-Accel-Buffering': 'no', 'X-Accel-Buffering': 'no',
'Cache-Control': 'no-cache, no-store', 'Cache-Control': 'no-cache, no-store',
'Connection': 'keep-alive', 'Connection': 'keep-alive',
'Content-Encoding': 'identity' 'Transfer-Encoding': 'chunked',
'Content-Encoding': 'identity',
}) })
return True return True