fix: use http.client for LLM streaming (bypass 64KB buffer)
This commit is contained in:
+57
-20
@@ -1,6 +1,10 @@
|
|||||||
import json, os, logging, base64, time, hashlib, codecs, re
|
import json, os, logging, base64, time, hashlib, codecs, re, http.client, ssl
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from searx import network
|
from searx import network
|
||||||
|
try:
|
||||||
|
from searx.network import get_network
|
||||||
|
except ImportError:
|
||||||
|
get_network = None # Graceful fallback for test/demo environments
|
||||||
from flask import Response, request, abort, jsonify
|
from flask import Response, request, abort, jsonify
|
||||||
from searx.plugins import Plugin, PluginInfo
|
from searx.plugins import Plugin, PluginInfo
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
@@ -10,6 +14,30 @@ from markupsafe import Markup
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
TOKEN_EXPIRY_SEC = 3600
|
TOKEN_EXPIRY_SEC = 3600
|
||||||
|
STREAM_CHUNK_SIZE = 128
|
||||||
|
STREAM_TIMEOUT_SEC = 60
|
||||||
|
|
||||||
|
def _get_streaming_connection(url: str):
|
||||||
|
parsed = urlparse(url)
|
||||||
|
host = parsed.hostname
|
||||||
|
port = parsed.port or (443 if parsed.scheme == 'https' else 80)
|
||||||
|
path = parsed.path + ('?' + parsed.query if parsed.query else '')
|
||||||
|
|
||||||
|
verify_ssl = True
|
||||||
|
if get_network is not None:
|
||||||
|
try:
|
||||||
|
net = get_network()
|
||||||
|
verify_ssl = getattr(net, 'verify', True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if parsed.scheme == 'https':
|
||||||
|
ctx = ssl.create_default_context() if verify_ssl else ssl._create_unverified_context()
|
||||||
|
conn = http.client.HTTPSConnection(host, port, timeout=STREAM_TIMEOUT_SEC, context=ctx)
|
||||||
|
else:
|
||||||
|
conn = http.client.HTTPConnection(host, port, timeout=STREAM_TIMEOUT_SEC)
|
||||||
|
|
||||||
|
return conn, path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -731,22 +759,23 @@ class SXNGPlugin(Plugin):
|
|||||||
else:
|
else:
|
||||||
url = f"{self.endpoint_url}?key={self.api_key}"
|
url = f"{self.endpoint_url}?key={self.api_key}"
|
||||||
|
|
||||||
|
conn = None
|
||||||
try:
|
try:
|
||||||
payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature, "stopSequences": ["</answer>"]}}
|
conn, path = _get_streaming_connection(url)
|
||||||
headers = {"Content-Type": "application/json"}
|
payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature, "stopSequences": ["</answer>"]}})
|
||||||
res, chunk_gen = network.stream('POST', url, json=payload, headers=headers, timeout=60)
|
conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"})
|
||||||
|
res = conn.getresponse()
|
||||||
|
|
||||||
if res.status_code != 200:
|
if res.status != 200:
|
||||||
for _ in chunk_gen: pass # Drain to prevent resource leak
|
logger.error(f"{PLUGIN_NAME}: Gemini API {res.status}")
|
||||||
logger.error(f"{PLUGIN_NAME}: Gemini API {res.status_code}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
decoder = json.JSONDecoder()
|
decoder = json.JSONDecoder()
|
||||||
buffer = ""
|
buffer = ""
|
||||||
utf8_decoder = codecs.getincrementaldecoder("utf-8")(errors='replace')
|
while True:
|
||||||
for chunk in chunk_gen:
|
chunk = res.read(STREAM_CHUNK_SIZE)
|
||||||
if not chunk: continue
|
if not chunk: break
|
||||||
buffer += utf8_decoder.decode(chunk, final=False)
|
buffer += chunk.decode('utf-8', errors='replace')
|
||||||
while buffer:
|
while buffer:
|
||||||
buffer = buffer.lstrip()
|
buffer = buffer.lstrip()
|
||||||
if not buffer: break
|
if not buffer: break
|
||||||
@@ -765,17 +794,21 @@ class SXNGPlugin(Plugin):
|
|||||||
except json.JSONDecodeError: break
|
except json.JSONDecodeError: break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}")
|
logger.error(f"{PLUGIN_NAME}: Gemini stream error: {e}")
|
||||||
|
finally:
|
||||||
|
if conn: conn.close()
|
||||||
|
|
||||||
def stream_openai_compatible():
|
def stream_openai_compatible():
|
||||||
|
conn = None
|
||||||
try:
|
try:
|
||||||
payload = {
|
conn, path = _get_streaming_connection(self.endpoint_url)
|
||||||
|
payload = json.dumps({
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"stream": True,
|
"stream": True,
|
||||||
"max_tokens": self.max_tokens,
|
"max_tokens": self.max_tokens,
|
||||||
"temperature": self.temperature,
|
"temperature": self.temperature,
|
||||||
"stop": ["</answer>"]
|
"stop": ["</answer>"]
|
||||||
}
|
})
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"HTTP-Referer": "https://github.com/searxng/searxng",
|
"HTTP-Referer": "https://github.com/searxng/searxng",
|
||||||
@@ -785,17 +818,18 @@ class SXNGPlugin(Plugin):
|
|||||||
headers['api-key'] = self.api_key
|
headers['api-key'] = self.api_key
|
||||||
else:
|
else:
|
||||||
headers['Authorization'] = f"Bearer {self.api_key}"
|
headers['Authorization'] = f"Bearer {self.api_key}"
|
||||||
res, chunk_gen = network.stream('POST', self.endpoint_url, json=payload, headers=headers, timeout=60)
|
conn.request("POST", path, body=payload, headers=headers)
|
||||||
|
res = conn.getresponse()
|
||||||
|
|
||||||
if res.status_code != 200:
|
if res.status != 200:
|
||||||
for _ in chunk_gen: pass
|
logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status}")
|
||||||
logger.error(f"{PLUGIN_NAME}: {self.provider} API {res.status_code}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
decoder = json.JSONDecoder()
|
decoder = json.JSONDecoder()
|
||||||
buffer = b""
|
buffer = b""
|
||||||
for chunk in chunk_gen:
|
while True:
|
||||||
if not chunk: continue
|
chunk = res.read(STREAM_CHUNK_SIZE)
|
||||||
|
if not chunk: break
|
||||||
buffer += chunk
|
buffer += chunk
|
||||||
while b"\n" in buffer:
|
while b"\n" in buffer:
|
||||||
line_bytes, buffer = buffer.split(b"\n", 1)
|
line_bytes, buffer = buffer.split(b"\n", 1)
|
||||||
@@ -811,13 +845,16 @@ class SXNGPlugin(Plugin):
|
|||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}")
|
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}")
|
||||||
|
finally:
|
||||||
|
if conn: conn.close()
|
||||||
|
|
||||||
generator = stream_gemini if self.is_gemini else stream_openai_compatible
|
generator = stream_gemini if self.is_gemini else stream_openai_compatible
|
||||||
return Response(generator(), mimetype='text/event-stream', headers={
|
return Response(generator(), mimetype='text/event-stream', headers={
|
||||||
'X-Accel-Buffering': 'no',
|
'X-Accel-Buffering': 'no',
|
||||||
'Cache-Control': 'no-cache, no-store',
|
'Cache-Control': 'no-cache, no-store',
|
||||||
'Connection': 'keep-alive',
|
'Connection': 'keep-alive',
|
||||||
'Content-Encoding': 'identity'
|
'Transfer-Encoding': 'chunked',
|
||||||
|
'Content-Encoding': 'identity',
|
||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user