feat: add optional ollama unload after response, PR credit to @aestheticjmack
This commit is contained in:
@@ -44,8 +44,8 @@ Configure via the environment variables:
|
|||||||
- `LLM_CONTEXT_SHALLOW_COUNT`: Results with headlines only (additional breadth). Default `15`.
|
- `LLM_CONTEXT_SHALLOW_COUNT`: Results with headlines only (additional breadth). Default `15`.
|
||||||
- `LLM_TABS`: Tab whitelist, comma delimiter. Default `general,science,it,news`.
|
- `LLM_TABS`: Tab whitelist, comma delimiter. Default `general,science,it,news`.
|
||||||
- `LLM_INTERACTIVE`: UI mode. Default is `true` (interactive: copy, regenerate, follow up). Set to `false` for simple response only mode.
|
- `LLM_INTERACTIVE`: UI mode. Default is `true` (interactive: copy, regenerate, follow up). Set to `false` for simple response only mode.
|
||||||
* `LLM_OLLAMA_UNLOAD_AFTER`: If true, unload Ollama model immediately after each response (calls `/api/chat` with `keep_alive: 0`).
|
- `LLM_QUESTION_MARK_REQUIRED`: Only trigger AI answers when the query contains `?`. Default `false`.
|
||||||
* `LLM_OLLAMA_UNLOAD_URL`: Override unload endpoint (default derived from `LLM_URL` host/port).
|
- `LLM_OLLAMA_UNLOAD_AFTER`: Unload Ollama model after each response. Default `false`.
|
||||||
|
|
||||||
## How It Works
|
## How It Works
|
||||||
1 user initial search
|
1 user initial search
|
||||||
|
|||||||
+12
-15
@@ -17,11 +17,6 @@ TOKEN_EXPIRY_SEC = 3600
|
|||||||
STREAM_CHUNK_SIZE = 128
|
STREAM_CHUNK_SIZE = 128
|
||||||
STREAM_TIMEOUT_SEC = 60
|
STREAM_TIMEOUT_SEC = 60
|
||||||
|
|
||||||
def _env_flag(name: str, default: bool = False) -> bool:
|
|
||||||
v = os.getenv(name)
|
|
||||||
if v is None:
|
|
||||||
return default
|
|
||||||
return v.strip().lower() in ('1', 'true', 'yes', 'on')
|
|
||||||
def _get_streaming_connection(url: str):
|
def _get_streaming_connection(url: str):
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
host = parsed.hostname
|
host = parsed.hostname
|
||||||
@@ -482,6 +477,7 @@ class SXNGPlugin(Plugin):
|
|||||||
|
|
||||||
def _load_config(self):
|
def _load_config(self):
|
||||||
self.interactive = os.getenv('LLM_INTERACTIVE', 'true').lower().strip() in ('true', '1', 'yes', 'on')
|
self.interactive = os.getenv('LLM_INTERACTIVE', 'true').lower().strip() in ('true', '1', 'yes', 'on')
|
||||||
|
self.question_mark_required = os.getenv('LLM_QUESTION_MARK_REQUIRED', 'false').lower().strip() in ('true', '1', 'yes', 'on')
|
||||||
raw_provider = os.getenv('LLM_PROVIDER', '').lower().strip()
|
raw_provider = os.getenv('LLM_PROVIDER', '').lower().strip()
|
||||||
|
|
||||||
raw_url = os.getenv('LLM_URL', '').strip()
|
raw_url = os.getenv('LLM_URL', '').strip()
|
||||||
@@ -553,16 +549,11 @@ class SXNGPlugin(Plugin):
|
|||||||
self.endpoint_url = raw_url
|
self.endpoint_url = raw_url
|
||||||
|
|
||||||
|
|
||||||
# Ollama: optional "unload after response" behavior (plugin-specific).
|
# Ollama: optional "unload after response" (frees VRAM between queries).
|
||||||
# Enable with:
|
# Enable with: LLM_OLLAMA_UNLOAD_AFTER=true
|
||||||
# LLM_OLLAMA_UNLOAD_AFTER=1
|
self.ollama_unload_after = os.getenv('LLM_OLLAMA_UNLOAD_AFTER', 'false').lower().strip() in ('true', '1', 'yes', 'on')
|
||||||
# Optional override:
|
self.ollama_unload_url = ''
|
||||||
# LLM_OLLAMA_UNLOAD_URL=http://<host>:11434/api/chat
|
if self.provider == 'ollama' and self.ollama_unload_after:
|
||||||
self.ollama_unload_after = (
|
|
||||||
_env_flag('LLM_OLLAMA_UNLOAD_AFTER', False) or _env_flag('OLLAMA_UNLOAD_AFTER', False)
|
|
||||||
)
|
|
||||||
self.ollama_unload_url = (os.getenv('LLM_OLLAMA_UNLOAD_URL') or os.getenv('OLLAMA_UNLOAD_URL') or '').strip()
|
|
||||||
if self.provider == 'ollama' and self.ollama_unload_after and not self.ollama_unload_url:
|
|
||||||
try:
|
try:
|
||||||
p = urlparse(self.endpoint_url)
|
p = urlparse(self.endpoint_url)
|
||||||
scheme = p.scheme or 'http'
|
scheme = p.scheme or 'http'
|
||||||
@@ -980,6 +971,12 @@ class SXNGPlugin(Plugin):
|
|||||||
if request and hasattr(request, 'headers') and request.headers.get('X-AI-Auxiliary'):
|
if request and hasattr(request, 'headers') and request.headers.get('X-AI-Auxiliary'):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
if request and request.form.get('format', 'html') != 'html':
|
||||||
|
return results
|
||||||
|
|
||||||
|
if self.question_mark_required and '?' not in search.search_query.query:
|
||||||
|
return results
|
||||||
|
|
||||||
current_tabs = set(search.search_query.categories)
|
current_tabs = set(search.search_query.categories)
|
||||||
if not current_tabs: current_tabs = {'general'}
|
if not current_tabs: current_tabs = {'general'}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user