Skip to content

[Feature]: Add Searxng as a default web search provider (alongside firecrawl, tavily, etc) #5941

@RoboRiley

Description

@RoboRiley

Problem or Use Case

Add Searxng in web_tools.py and in setup and env as an alternative to Firecrawl, Tavily, etc.

I would love to add a reranker step as well, but I haven't started that project yet.

Proposed Solution

Modify web_tools.py to include Searxng.

In Backend Selection:

def _has_searxng_config() -> bool:
    """Return True when a SearXNG base URL is configured."""
    return bool(os.getenv("SEARXNG_BASE_URL", "").strip())

and

def _get_backend() -> str:
    """Determine which web backend to use.

    Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
    Falls back to whichever API key is present for users who configured
    keys manually without running setup.
    """
    configured = (_load_web_config().get("backend") or "").lower().strip()
    if configured in ("parallel", "firecrawl", "tavily", "exa", "searxng"):
        return configured

    # Fallback for manual / legacy config — pick the highest-priority
    # available backend. Firecrawl also counts as available when the managed
    # tool gateway is configured for Nous subscribers.
    backend_candidates = (
        ("searxng", _has_searxng_config()),
        ("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()),
        ("parallel", _has_env("PARALLEL_API_KEY")),
        ("tavily", _has_env("TAVILY_API_KEY")),
        ("exa", _has_env("EXA_API_KEY")),
    )
    for backend, available in backend_candidates:
        if available:
            return backend

    return "firecrawl"  # default (backward compat)

and

def _is_backend_available(backend: str) -> bool:
    """Return True when the selected backend is currently usable."""
    if backend == "exa":
        return _has_env("EXA_API_KEY")
    if backend == "parallel":
        return _has_env("PARALLEL_API_KEY")
    if backend == "firecrawl":
        return check_firecrawl_api_key()
    if backend == "tavily":
        return _has_env("TAVILY_API_KEY")
    if backend == "searxng":
        return _has_searxng_config()
    return False

In "Firecrawl Client" section:

def _web_requires_env() -> list[str]:
    """Return tool metadata env vars for the currently enabled web backends."""
    requires = [
        "SEARXNG_BASE_URL", 
        "SEARXNG_API_KEY", 
        "EXA_API_KEY",
        "PARALLEL_API_KEY",
        "TAVILY_API_KEY",
        "FIRECRAWL_API_KEY",
        "FIRECRAWL_API_URL",
    ]
    if managed_nous_tools_enabled():
        requires.extend(
            [
                "FIRECRAWL_GATEWAY_URL",
                "TOOL_GATEWAY_DOMAIN",
                "TOOL_GATEWAY_SCHEME",
                "TOOL_GATEWAY_USER_TOKEN",
            ]
        )
    return requires

In Parallel Search section:

def _searxng_search(query: str, limit: int = 5) -> dict:
    """Search using a self-hosted SearXNG instance.

    Requires SEARXNG_BASE_URL environment variable.
    Optionally uses SEARXNG_API_KEY if the instance requires authentication.
    """
    from tools.interrupt import is_interrupted
    if is_interrupted():
        return {"error": "Interrupted", "success": False}

    base_url = os.getenv("SEARXNG_BASE_URL", "").strip().rstrip("/")
    api_key = os.getenv("SEARXNG_API_KEY", "").strip()

    if not base_url:
        return {"error": "SEARXNG_BASE_URL environment variable not set.", "success": False}

    url = f"{base_url}/search"
    params = {
        "q": query,
        "format": "json",
        "pageno": 1,
        "language": "en",
    }
    headers = {"Accept": "application/json"}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    logger.info("SearXNG search: '%s' (limit=%d)", query, limit)

    try:
        response = httpx.get(url, params=params, headers=headers, timeout=30)
        response.raise_for_status()
        data = response.json()
    except httpx.HTTPStatusError as e:
        return {"error": f"SearXNG HTTP error: {e.response.status_code}", "success": False}
    except Exception as e:
        return {"error": f"SearXNG request failed: {str(e)}", "success": False}

    web_results = []
    for i, result in enumerate(data.get("results", [])[:limit]):
        web_results.append({
            "url": result.get("url", ""),
            "title": result.get("title", ""),
            "description": result.get("content", ""),
            "position": i + 1,
        })

    return {"success": True, "data": {"web": web_results}}

Edit 'try from tools' section:

    try:
        from tools.interrupt import is_interrupted
        if is_interrupted():
            return json.dumps({"error": "Interrupted", "success": False})

        # Dispatch to the configured backend
        backend = _get_backend()
        if backend == "searxng":
            response_data = _searxng_search(query, limit)
            debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
            result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
            debug_call_data["final_response_size"] = len(result_json)
            _debug.log_call("web_search_tool", debug_call_data)
            _debug.save()
            return result_json

        if backend == "parallel": [...continued...]

Seems to be working.

In addition, some changes are needed in the Setup code so Searxng added as an option alongside Firecraw, Tavily, etc during setup and to save the base url and api key to the environment. Right now env has to be edited manually.

Bonus would be if I could set a reranker url, api key, and model to process the searxng results.

Alternatives Considered

Messed with skills to do this but it was not reliable enough.

Feature Type

New tool

Scope

None

Contribution

  • I'd like to implement this myself and submit a PR

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions