Spaces:
Running
Running
File size: 1,712 Bytes
5ccf326 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
"""
Simple browser tool to fetch and extract textual content from a webpage.
Provides `visit_page(url)` which returns a short cleaned text excerpt.
"""
import requests
import logging
from typing import Optional
logger = logging.getLogger(__name__)
try:
from bs4 import BeautifulSoup
except Exception:
BeautifulSoup = None
try:
from smolagents import tool
except Exception:
tool = None
def visit_page(url: str, max_chars: int = 2000) -> str:
if not url:
return "(no url provided)"
try:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
text = resp.text
if BeautifulSoup is not None:
soup = BeautifulSoup(text, "html.parser")
# remove scripts and styles
for s in soup(["script", "style", "noscript"]):
s.extract()
body = soup.get_text(separator=" \n")
lines = [l.strip() for l in body.splitlines() if l.strip()]
excerpt = " \n".join(lines)[:max_chars]
return excerpt
# fallback: return first chunk of raw HTML (not ideal)
return text[:max_chars]
except Exception as e:
logger.warning("visit_page failed for %s: %s", url, e)
return f"(visit_page error) {e}"
# If smolagents is available, expose a decorated tool usable by CodeAgent
if tool is not None:
try:
@tool
def visit_page_tool(url: str, max_chars: int = 2000) -> str:
"""smolagents-wrapped tool around `visit_page`"""
return visit_page(url, max_chars)
except Exception:
visit_page_tool = visit_page
else:
visit_page_tool = visit_page
__all__ = ["visit_page", "visit_page_tool"]
|