remove <answer> block response format

This commit is contained in:
cra88y/pc
2026-05-03 09:33:34 -05:00
parent d0c29f4b34
commit b85e4ea76c
+21 -23
View File
@@ -15,7 +15,7 @@ from markupsafe import Markup
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TOKEN_EXPIRY_SEC = 3600 TOKEN_EXPIRY_SEC = 3600
STREAM_CHUNK_SIZE = 4096 # Increased from 128 for I/O efficiency STREAM_CHUNK_SIZE = 256
STREAM_TIMEOUT_SEC = 60 STREAM_TIMEOUT_SEC = 60
def _get_streaming_connection(url: str): def _get_streaming_connection(url: str):
@@ -701,9 +701,15 @@ FRONTEND_JS_TEMPLATE = r"""
if (!started && !collectedResponse.trim()) { if (!started && !collectedResponse.trim()) {
const cursor = data.querySelector('.sxng-cursor'); const cursor = data.querySelector('.sxng-cursor');
if (cursor) cursor.remove(); if (cursor) cursor.remove();
const errSpan = document.createElement('span'); const errSpan = document.createElement('span');
if (thoughtDiv && thoughtDiv.textContent.trim().length > 0) {
errSpan.style.color = '#ebcb8b';
errSpan.textContent = 'Model provided reasoning but stopped before the final answer. Try adjusting token limits.';
} else {
errSpan.style.color = '#bf616a'; errSpan.style.color = '#bf616a';
errSpan.textContent = 'No response received. Check API configuration and server logs.'; errSpan.textContent = 'No response received. Check API configuration and server logs.';
}
data.appendChild(errSpan); data.appendChild(errSpan);
return; return;
} }
@@ -1127,9 +1133,7 @@ class SXNGPlugin(Plugin):
<CORE_DIRECTIVES> <CORE_DIRECTIVES>
{numbered_instructions} {numbered_instructions}
</CORE_DIRECTIVES> </CORE_DIRECTIVES>"""
<answer>"""
def stream_gemini(): def stream_gemini():
if '?' in self.endpoint_url: if '?' in self.endpoint_url:
@@ -1140,7 +1144,7 @@ class SXNGPlugin(Plugin):
conn = None conn = None
try: try:
conn, path = _get_streaming_connection(url) conn, path = _get_streaming_connection(url)
payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature, "stopSequences": ["</answer>"]}}) payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}})
conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"}) conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"})
res = conn.getresponse() res = conn.getresponse()
@@ -1195,9 +1199,8 @@ class SXNGPlugin(Plugin):
"model": self.model, "model": self.model,
"messages": [{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
"stream": True, "stream": True,
"max_tokens": min(self.max_tokens * 4, 8192), # 4x headroom for reasoning models "max_tokens": min(self.max_tokens * 4, 8192),
"temperature": self.temperature, "temperature": self.temperature
"stop": ["</answer>"]
}) })
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
@@ -1218,23 +1221,23 @@ class SXNGPlugin(Plugin):
return return
decoder = json.JSONDecoder() decoder = json.JSONDecoder()
buffer = b""
tokens_yielded = 0 tokens_yielded = 0
in_reasoning_block = False in_reasoning_block = False
while True: while True:
chunk = res.read(STREAM_CHUNK_SIZE) # Use readline() to unblock SSE streaming immediately
if not chunk: break line_bytes = res.readline()
buffer += chunk if not line_bytes: break
while b"\n" in buffer:
line_bytes, buffer = buffer.split(b"\n", 1) line = line_bytes.decode('utf-8', errors='replace').strip()
line = line_bytes.decode('utf-8', errors='replace') if not line:
continue
if line.startswith("data: "): if line.startswith("data: "):
data_str = line[6:].strip() data_str = line[6:].strip()
if data_str == "[DONE]": if data_str == "[DONE]":
if in_reasoning_block: if in_reasoning_block:
yield "\n</think>\n\n" yield "\n</think>\n\n"
if tokens_yielded == 0:
logger.warning(f"{PLUGIN_NAME}: Stream completed but yielded 0 tokens.")
return return
try: try:
obj, _ = decoder.raw_decode(data_str) obj, _ = decoder.raw_decode(data_str)
@@ -1257,16 +1260,11 @@ class SXNGPlugin(Plugin):
in_reasoning_block = False in_reasoning_block = False
yield content yield content
tokens_yielded += 1 tokens_yielded += 1
except json.JSONDecodeError as e: except json.JSONDecodeError:
if data_str.strip():
logger.debug(f"{PLUGIN_NAME}: Upstream JSON parse error: {e} | Payload: {data_str[:200]}")
pass pass
# automatically inject closure bounds upon upstream socket failure.
if in_reasoning_block: if in_reasoning_block:
yield "\n</think>\n\n" yield "\n</think>\n\n"
if tokens_yielded == 0:
logger.warning(f"{PLUGIN_NAME}: Stream disconnected abruptly and yielded 0 tokens.")
except Exception as e: except Exception as e:
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}") logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}")
finally: finally: