diff --git a/.gitignore b/.gitignore index f4c2ae9..b13f288 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ __pycache__/ *$py.class venv/ .env +dev/.env .idea/ .vscode/ .agent/ \ No newline at end of file diff --git a/README.md b/README.md index 71bc7c8..99209b5 100644 --- a/README.md +++ b/README.md @@ -104,14 +104,44 @@ Configure via environment variables. ## Known Issues +- [ ] Update README with all updates + - [x] When asking a follow up question the previous output disappears -- [ ] Parts of the UI are not theme aware resulting in a unpolished look when not using a dark theme + +- [x] Parts of the UI are not theme aware resulting in a unpolished look when not using a dark theme + - [x] When SearXNG provides a info blob for a search it appears on top of the overview i.e. `Wikipedia` or `Linux` For any issues not stated here please create an issue ticket on [Gitea](https://git.tysstech.com/TySS-Dev/ollama-ai-answers-searxng/issues) or [GitHub](https://github.com/TySP-Dev/ollama-ai-answers-searxng/issues) and add the `bug` tag. ## Roadmap +### Dev Server + +- [x] Stream viewer — show tokens arriving in real time in the debug panel as they come out of Valkey, so you can see exactly what the LLM is generating chunk by chunk + +- [x] TF-IDF score visualizer — show a table of which URLs were fetched, their scores, and which chunks were selected for context + +- [ ] Intent detection display — show what intent was detected and which system prompt was used for each query + +- [ ] Saved queries — save/load test queries so you can quickly re-run the same set of searches after making changes to the plugin + +- [ ] A/B model comparison — run the same query against two different models simultaneously and show both responses side by side + +- [ ] Response time breakdown — show how long each phase took: SearXNG fetch, page fetching, TF-IDF scoring, LLM stream start, stream complete + +- [ ] Context inspector — show the full assembled context string that gets sent to the LLM, so you can see exactly what it's working with + +- [ ] Prompt viewer — show the full system prompt + user prompt that gets sent to Ollama + +- [ ] Export button — copy the full context + prompt + response as a JSON blob for bug reports + +- [ ] Per-intent system prompt editor — edit the system prompts for each intent type live without restarting + +- [ ] Token counter — show estimated token count of the context being sent + +### Plugin + - [ ] Working on feature plans ## Architecture diff --git a/ollama_answers.py b/ollama_answers.py index 2aa4d76..0c19211 100644 --- a/ollama_answers.py +++ b/ollama_answers.py @@ -1725,6 +1725,16 @@ class SXNGPlugin(Plugin): job_id = hashlib.sha256(f"{time.time()}{q}".encode()).hexdigest()[:16] + # Persist intent for dev UI + logger.warning(f"INTENT BEFORE PERSIST: {repr(intent)}") + logger.warning(f"JOB_ID BEFORE PERSIST: {repr(job_id)}") + try: + vk = _get_valkey() + vk.setex(f"ai:job:{job_id}:intent", 3600, intent) + logger.debug(f"{PLUGIN_NAME}: persisted intent '{intent}' for job {job_id}") + except Exception: + logger.exception(f"{PLUGIN_NAME}: failed to persist intent") + payload_dict = { "model": effective_model, "messages": [ @@ -2038,6 +2048,17 @@ class SXNGPlugin(Plugin): detected_intent = _detect_intent(q_clean) js_intent = safe_json(detected_intent) + # Persist intent for dev tooling / UI + try: + vk = _get_valkey() + vk.setex( + f"ai:job:{job_id}:intent", + 1800, + detected_intent + ) + except Exception as e: + logger.debug(f"{PLUGIN_NAME}: failed to persist intent: {e}") + b64_context = base64.b64encode(context_str.encode('utf-8')).decode('utf-8') total_context_count = self.context_deep_count + self.context_shallow_count diff --git a/requirements.txt b/requirements.txt index 4912bfe..441583d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ flask flask-babel certifi +python-dotenv