fix ollama support, add thinking... fallback placeholder, clean up envar settings
This commit is contained in:
+40
-11
@@ -26,11 +26,11 @@ class SXNGPlugin(Plugin):
|
|||||||
self.api_key = os.getenv('OPENROUTER_API_KEY') if self.provider == 'openrouter' else os.getenv('GEMINI_API_KEY')
|
self.api_key = os.getenv('OPENROUTER_API_KEY') if self.provider == 'openrouter' else os.getenv('GEMINI_API_KEY')
|
||||||
self.model = os.getenv('GEMINI_MODEL', 'gemma-3-27b-it') if self.provider == 'gemini' else os.getenv('OPENROUTER_MODEL', 'google/gemma-3-27b-it:free')
|
self.model = os.getenv('GEMINI_MODEL', 'gemma-3-27b-it') if self.provider == 'gemini' else os.getenv('OPENROUTER_MODEL', 'google/gemma-3-27b-it:free')
|
||||||
try:
|
try:
|
||||||
self.max_tokens = int(os.getenv('GEMINI_MAX_TOKENS', 500))
|
self.max_tokens = int(os.getenv('RESPONSE_MAX_TOKENS', 500))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
self.max_tokens = 500
|
self.max_tokens = 500
|
||||||
try:
|
try:
|
||||||
self.temperature = float(os.getenv('GEMINI_TEMPERATURE', 0.2))
|
self.temperature = float(os.getenv('RESPONSE_TEMPERATURE', 0.2))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
self.temperature = 0.2
|
self.temperature = 0.2
|
||||||
self.base_url = os.getenv('OPENROUTER_BASE_URL', 'openrouter.ai')
|
self.base_url = os.getenv('OPENROUTER_BASE_URL', 'openrouter.ai')
|
||||||
@@ -74,6 +74,7 @@ class SXNGPlugin(Plugin):
|
|||||||
def generate_gemini():
|
def generate_gemini():
|
||||||
host = "generativelanguage.googleapis.com"
|
host = "generativelanguage.googleapis.com"
|
||||||
path = f"/v1/models/{self.model}:streamGenerateContent?key={self.api_key}"
|
path = f"/v1/models/{self.model}:streamGenerateContent?key={self.api_key}"
|
||||||
|
conn = None
|
||||||
try:
|
try:
|
||||||
conn = http.client.HTTPSConnection(host, timeout=CONNECTION_TIMEOUT_SEC, context=ssl.create_default_context())
|
conn = http.client.HTTPSConnection(host, timeout=CONNECTION_TIMEOUT_SEC, context=ssl.create_default_context())
|
||||||
payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature}}
|
payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": self.max_tokens, "temperature": self.temperature}}
|
||||||
@@ -103,12 +104,20 @@ class SXNGPlugin(Plugin):
|
|||||||
if text: yield text
|
if text: yield text
|
||||||
buffer = buffer[idx:]
|
buffer = buffer[idx:]
|
||||||
except json.JSONDecodeError: break
|
except json.JSONDecodeError: break
|
||||||
conn.close()
|
except Exception as e:
|
||||||
except Exception as e: logger.error(f"Gemini Stream Exception: {e}")
|
logger.error(f"Gemini Stream Exception: {e}")
|
||||||
|
finally:
|
||||||
|
if conn: conn.close()
|
||||||
|
|
||||||
def generate_openrouter():
|
def generate_openrouter():
|
||||||
|
conn = None
|
||||||
try:
|
try:
|
||||||
conn = http.client.HTTPSConnection(self.base_url, timeout=CONNECTION_TIMEOUT_SEC, context=ssl.create_default_context())
|
# Support HTTP for localhost/Ollama
|
||||||
|
is_local = self.base_url.startswith('localhost') or self.base_url.startswith('127.')
|
||||||
|
if is_local:
|
||||||
|
conn = http.client.HTTPConnection(self.base_url, timeout=CONNECTION_TIMEOUT_SEC)
|
||||||
|
else:
|
||||||
|
conn = http.client.HTTPSConnection(self.base_url, timeout=CONNECTION_TIMEOUT_SEC, context=ssl.create_default_context())
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
@@ -122,7 +131,9 @@ class SXNGPlugin(Plugin):
|
|||||||
"HTTP-Referer": "https://github.com/searxng/searxng",
|
"HTTP-Referer": "https://github.com/searxng/searxng",
|
||||||
"X-Title": "SearXNG LLM Plugin"
|
"X-Title": "SearXNG LLM Plugin"
|
||||||
}
|
}
|
||||||
conn.request("POST", "/api/v1/chat/completions", body=json.dumps(payload), headers=headers)
|
# Ollama uses /v1/... while OpenRouter uses /api/v1/...
|
||||||
|
api_path = "/v1/chat/completions" if is_local else "/api/v1/chat/completions"
|
||||||
|
conn.request("POST", api_path, body=json.dumps(payload), headers=headers)
|
||||||
res = conn.getresponse()
|
res = conn.getresponse()
|
||||||
if res.status != 200:
|
if res.status != 200:
|
||||||
logger.error(f"OpenRouter API Error {res.status}: {res.read().decode('utf-8')}")
|
logger.error(f"OpenRouter API Error {res.status}: {res.read().decode('utf-8')}")
|
||||||
@@ -145,8 +156,10 @@ class SXNGPlugin(Plugin):
|
|||||||
if content: yield content
|
if content: yield content
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
conn.close()
|
except Exception as e:
|
||||||
except Exception as e: logger.error(f"OpenRouter Stream Exception: {e}")
|
logger.error(f"OpenRouter Stream Exception: {e}")
|
||||||
|
finally:
|
||||||
|
if conn: conn.close()
|
||||||
|
|
||||||
generator = generate_openrouter if self.provider == 'openrouter' else generate_gemini
|
generator = generate_openrouter if self.provider == 'openrouter' else generate_gemini
|
||||||
return Response(generator(), mimetype='text/event-stream', headers={
|
return Response(generator(), mimetype='text/event-stream', headers={
|
||||||
@@ -180,12 +193,18 @@ class SXNGPlugin(Plugin):
|
|||||||
<article id="sxng-stream-box" class="answer" style="display:none; margin-bottom: 1rem;">
|
<article id="sxng-stream-box" class="answer" style="display:none; margin-bottom: 1rem;">
|
||||||
<style>
|
<style>
|
||||||
@keyframes sxng-blink {{ 0%, 100% {{ opacity: 1; }} 50% {{ opacity: 0; }} }}
|
@keyframes sxng-blink {{ 0%, 100% {{ opacity: 1; }} 50% {{ opacity: 0; }} }}
|
||||||
|
@keyframes sxng-pulse {{ 0%, 100% {{ opacity: 0.4; }} 50% {{ opacity: 0.9; }} }}
|
||||||
.sxng-cursor {{
|
.sxng-cursor {{
|
||||||
display: inline-block; width: 0.5rem; height: 1rem;
|
display: inline-block; width: 0.5rem; height: 1rem;
|
||||||
background: var(--color-result-description);
|
background: var(--color-result-description);
|
||||||
margin-left: 2px; vertical-align: middle;
|
margin-left: 2px; vertical-align: middle;
|
||||||
animation: sxng-blink 1s step-end infinite;
|
animation: sxng-blink 1s step-end infinite;
|
||||||
}}
|
}}
|
||||||
|
.sxng-thinking {{
|
||||||
|
color: var(--color-result-description);
|
||||||
|
font-style: italic;
|
||||||
|
animation: sxng-pulse 1.5s ease-in-out infinite;
|
||||||
|
}}
|
||||||
</style>
|
</style>
|
||||||
<p id="sxng-stream-data" style="white-space: pre-wrap; color: var(--color-result-description); font-size: 0.95rem;"></p>
|
<p id="sxng-stream-data" style="white-space: pre-wrap; color: var(--color-result-description); font-size: 0.95rem;"></p>
|
||||||
<script>
|
<script>
|
||||||
@@ -200,12 +219,23 @@ class SXNGPlugin(Plugin):
|
|||||||
|
|
||||||
try {{
|
try {{
|
||||||
const ctx = new TextDecoder().decode(Uint8Array.from(atob(b64), c => c.charCodeAt(0)));
|
const ctx = new TextDecoder().decode(Uint8Array.from(atob(b64), c => c.charCodeAt(0)));
|
||||||
|
|
||||||
|
// Show "Thinking..." placeholder while waiting for LLM
|
||||||
|
data.innerHTML = '<span class="sxng-thinking">Thinking...</span>';
|
||||||
|
if (wrapper) wrapper.style.display = '';
|
||||||
|
box.style.display = 'block';
|
||||||
|
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 60000);
|
||||||
|
|
||||||
const res = await fetch('/ai-stream', {{
|
const res = await fetch('/ai-stream', {{
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {{ 'Content-Type': 'application/json' }},
|
headers: {{ 'Content-Type': 'application/json' }},
|
||||||
body: JSON.stringify({{ q: q, context: ctx, tk: tk }})
|
body: JSON.stringify({{ q: q, context: ctx, tk: tk }}),
|
||||||
|
signal: controller.signal
|
||||||
}});
|
}});
|
||||||
|
|
||||||
|
clearTimeout(timeoutId);
|
||||||
if (!res.ok) {{ if (wrapper) wrapper.remove(); else box.remove(); return; }}
|
if (!res.ok) {{ if (wrapper) wrapper.remove(); else box.remove(); return; }}
|
||||||
|
|
||||||
const reader = res.body.getReader();
|
const reader = res.body.getReader();
|
||||||
@@ -224,9 +254,8 @@ class SXNGPlugin(Plugin):
|
|||||||
if (!started) {{
|
if (!started) {{
|
||||||
text = text.replace(/^[\\s.,;:!?]+/, '');
|
text = text.replace(/^[\\s.,;:!?]+/, '');
|
||||||
if (!text) continue;
|
if (!text) continue;
|
||||||
|
data.textContent = ''; // Clear "Thinking..."
|
||||||
data.appendChild(cursor);
|
data.appendChild(cursor);
|
||||||
if (wrapper) wrapper.style.display = '';
|
|
||||||
box.style.display = 'block';
|
|
||||||
started = true;
|
started = true;
|
||||||
}}
|
}}
|
||||||
cursor.before(text);
|
cursor.before(text);
|
||||||
|
|||||||
+1
-1
@@ -139,7 +139,7 @@ class PluginTestCase(unittest.TestCase):
|
|||||||
response = self.app.post('/ai-stream', json=payload)
|
response = self.app.post('/ai-stream', json=payload)
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
# Note: If the API returns a 404/429, data will be empty due to silent error handling.
|
# If the API returns a 404/429, data will be empty due to silent error handling.
|
||||||
# This test ensures the endpoint exists and responds with 200.
|
# This test ensures the endpoint exists and responds with 200.
|
||||||
data = response.data.decode('utf-8')
|
data = response.data.decode('utf-8')
|
||||||
print(f"\n[Test] Received {len(data)} bytes from {plugin.provider}")
|
print(f"\n[Test] Received {len(data)} bytes from {plugin.provider}")
|
||||||
|
|||||||
Reference in New Issue
Block a user