remove <answer> block response format
This commit is contained in:
+21
-23
@@ -15,7 +15,7 @@ from markupsafe import Markup
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
TOKEN_EXPIRY_SEC = 3600
|
TOKEN_EXPIRY_SEC = 3600
|
||||||
STREAM_CHUNK_SIZE = 4096 # Increased from 128 for I/O efficiency
|
STREAM_CHUNK_SIZE = 256
|
||||||
STREAM_TIMEOUT_SEC = 60
|
STREAM_TIMEOUT_SEC = 60
|
||||||
|
|
||||||
def _get_streaming_connection(url: str):
|
def _get_streaming_connection(url: str):
|
||||||
@@ -701,9 +701,15 @@ FRONTEND_JS_TEMPLATE = r"""
|
|||||||
if (!started && !collectedResponse.trim()) {
|
if (!started && !collectedResponse.trim()) {
|
||||||
const cursor = data.querySelector('.sxng-cursor');
|
const cursor = data.querySelector('.sxng-cursor');
|
||||||
if (cursor) cursor.remove();
|
if (cursor) cursor.remove();
|
||||||
|
|
||||||
const errSpan = document.createElement('span');
|
const errSpan = document.createElement('span');
|
||||||
|
if (thoughtDiv && thoughtDiv.textContent.trim().length > 0) {
|
||||||
|
errSpan.style.color = '#ebcb8b';
|
||||||
|
errSpan.textContent = 'Model provided reasoning but stopped before the final answer. Try adjusting token limits.';
|
||||||
|
} else {
|
||||||
errSpan.style.color = '#bf616a';
|
errSpan.style.color = '#bf616a';
|
||||||
errSpan.textContent = 'No response received. Check API configuration and server logs.';
|
errSpan.textContent = 'No response received. Check API configuration and server logs.';
|
||||||
|
}
|
||||||
data.appendChild(errSpan);
|
data.appendChild(errSpan);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1127,9 +1133,7 @@ class SXNGPlugin(Plugin):
|
|||||||
|
|
||||||
<CORE_DIRECTIVES>
|
<CORE_DIRECTIVES>
|
||||||
{numbered_instructions}
|
{numbered_instructions}
|
||||||
</CORE_DIRECTIVES>
|
</CORE_DIRECTIVES>"""
|
||||||
|
|
||||||
<answer>"""
|
|
||||||
|
|
||||||
def stream_gemini():
|
def stream_gemini():
|
||||||
if '?' in self.endpoint_url:
|
if '?' in self.endpoint_url:
|
||||||
@@ -1140,7 +1144,7 @@ class SXNGPlugin(Plugin):
|
|||||||
conn = None
|
conn = None
|
||||||
try:
|
try:
|
||||||
conn, path = _get_streaming_connection(url)
|
conn, path = _get_streaming_connection(url)
|
||||||
payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature, "stopSequences": ["</answer>"]}})
|
payload = json.dumps({"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"maxOutputTokens": min(self.max_tokens * 4, 8192), "temperature": self.temperature}})
|
||||||
conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"})
|
conn.request("POST", path, body=payload, headers={"Content-Type": "application/json"})
|
||||||
res = conn.getresponse()
|
res = conn.getresponse()
|
||||||
|
|
||||||
@@ -1195,9 +1199,8 @@ class SXNGPlugin(Plugin):
|
|||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"stream": True,
|
"stream": True,
|
||||||
"max_tokens": min(self.max_tokens * 4, 8192), # 4x headroom for reasoning models
|
"max_tokens": min(self.max_tokens * 4, 8192),
|
||||||
"temperature": self.temperature,
|
"temperature": self.temperature
|
||||||
"stop": ["</answer>"]
|
|
||||||
})
|
})
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
@@ -1218,23 +1221,23 @@ class SXNGPlugin(Plugin):
|
|||||||
return
|
return
|
||||||
|
|
||||||
decoder = json.JSONDecoder()
|
decoder = json.JSONDecoder()
|
||||||
buffer = b""
|
|
||||||
tokens_yielded = 0
|
tokens_yielded = 0
|
||||||
in_reasoning_block = False
|
in_reasoning_block = False
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
chunk = res.read(STREAM_CHUNK_SIZE)
|
# Use readline() to unblock SSE streaming immediately
|
||||||
if not chunk: break
|
line_bytes = res.readline()
|
||||||
buffer += chunk
|
if not line_bytes: break
|
||||||
while b"\n" in buffer:
|
|
||||||
line_bytes, buffer = buffer.split(b"\n", 1)
|
line = line_bytes.decode('utf-8', errors='replace').strip()
|
||||||
line = line_bytes.decode('utf-8', errors='replace')
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
if line.startswith("data: "):
|
if line.startswith("data: "):
|
||||||
data_str = line[6:].strip()
|
data_str = line[6:].strip()
|
||||||
if data_str == "[DONE]":
|
if data_str == "[DONE]":
|
||||||
if in_reasoning_block:
|
if in_reasoning_block:
|
||||||
yield "\n</think>\n\n"
|
yield "\n</think>\n\n"
|
||||||
if tokens_yielded == 0:
|
|
||||||
logger.warning(f"{PLUGIN_NAME}: Stream completed but yielded 0 tokens.")
|
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
obj, _ = decoder.raw_decode(data_str)
|
obj, _ = decoder.raw_decode(data_str)
|
||||||
@@ -1257,16 +1260,11 @@ class SXNGPlugin(Plugin):
|
|||||||
in_reasoning_block = False
|
in_reasoning_block = False
|
||||||
yield content
|
yield content
|
||||||
tokens_yielded += 1
|
tokens_yielded += 1
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError:
|
||||||
if data_str.strip():
|
|
||||||
logger.debug(f"{PLUGIN_NAME}: Upstream JSON parse error: {e} | Payload: {data_str[:200]}")
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# automatically inject closure bounds upon upstream socket failure.
|
|
||||||
if in_reasoning_block:
|
if in_reasoning_block:
|
||||||
yield "\n</think>\n\n"
|
yield "\n</think>\n\n"
|
||||||
if tokens_yielded == 0:
|
|
||||||
logger.warning(f"{PLUGIN_NAME}: Stream disconnected abruptly and yielded 0 tokens.")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}")
|
logger.error(f"{PLUGIN_NAME}: {self.provider} stream error: {e}")
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
Reference in New Issue
Block a user