File size: 1,712 Bytes
5ccf326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
Simple browser tool to fetch and extract textual content from a webpage.

Provides `visit_page(url)` which returns a short cleaned text excerpt.
"""
import requests
import logging
from typing import Optional

logger = logging.getLogger(__name__)

try:
    from bs4 import BeautifulSoup
except Exception:
    BeautifulSoup = None

try:
    from smolagents import tool
except Exception:
    tool = None


def visit_page(url: str, max_chars: int = 2000) -> str:
    if not url:
        return "(no url provided)"
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        text = resp.text
        if BeautifulSoup is not None:
            soup = BeautifulSoup(text, "html.parser")
            # remove scripts and styles
            for s in soup(["script", "style", "noscript"]):
                s.extract()
            body = soup.get_text(separator=" \n")
            lines = [l.strip() for l in body.splitlines() if l.strip()]
            excerpt = " \n".join(lines)[:max_chars]
            return excerpt
        # fallback: return first chunk of raw HTML (not ideal)
        return text[:max_chars]
    except Exception as e:
        logger.warning("visit_page failed for %s: %s", url, e)
        return f"(visit_page error) {e}"


# If smolagents is available, expose a decorated tool usable by CodeAgent
if tool is not None:
    try:
        @tool
        def visit_page_tool(url: str, max_chars: int = 2000) -> str:
            """smolagents-wrapped tool around `visit_page`"""
            return visit_page(url, max_chars)
    except Exception:
        visit_page_tool = visit_page
else:
    visit_page_tool = visit_page

__all__ = ["visit_page", "visit_page_tool"]