Spaces:
Running
Running
Update browser_tools.py
Browse files- browser_tools.py +16 -1
browser_tools.py
CHANGED
|
@@ -9,6 +9,12 @@ from typing import Optional
|
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
try:
|
| 13 |
from bs4 import BeautifulSoup
|
| 14 |
except Exception:
|
|
@@ -24,7 +30,10 @@ def visit_page(url: str, max_chars: int = 2000) -> str:
|
|
| 24 |
if not url:
|
| 25 |
return "(no url provided)"
|
| 26 |
try:
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
| 28 |
resp.raise_for_status()
|
| 29 |
text = resp.text
|
| 30 |
if BeautifulSoup is not None:
|
|
@@ -39,6 +48,12 @@ def visit_page(url: str, max_chars: int = 2000) -> str:
|
|
| 39 |
# fallback: return first chunk of raw HTML (not ideal)
|
| 40 |
return text[:max_chars]
|
| 41 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
logger.warning("visit_page failed for %s: %s", url, e)
|
| 43 |
return f"(visit_page error) {e}"
|
| 44 |
|
|
|
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
| 12 |
+
# Default headers to mimic a browser and reduce chance of 403 responses
|
| 13 |
+
DEFAULT_HEADERS = {
|
| 14 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0 Safari/537.36",
|
| 15 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
try:
|
| 19 |
from bs4 import BeautifulSoup
|
| 20 |
except Exception:
|
|
|
|
| 30 |
if not url:
|
| 31 |
return "(no url provided)"
|
| 32 |
try:
|
| 33 |
+
# Some domains have SSL certificate issues; disable verification for those
|
| 34 |
+
verify_ssl = not any(domain in url for domain in ["npb.or.jp", "npb.jp"])
|
| 35 |
+
# include headers to reduce bot-blocking by some sites
|
| 36 |
+
resp = requests.get(url, timeout=10, headers=DEFAULT_HEADERS, verify=verify_ssl)
|
| 37 |
resp.raise_for_status()
|
| 38 |
text = resp.text
|
| 39 |
if BeautifulSoup is not None:
|
|
|
|
| 48 |
# fallback: return first chunk of raw HTML (not ideal)
|
| 49 |
return text[:max_chars]
|
| 50 |
except Exception as e:
|
| 51 |
+
# Improve error message for common network/DNS failures so the agent
|
| 52 |
+
# can fallback to alternative tools instead of crashing.
|
| 53 |
+
msg = str(e)
|
| 54 |
+
if "NameResolutionError" in msg or "Failed to resolve" in msg or "Temporary failure in name resolution" in msg:
|
| 55 |
+
logger.warning("visit_page DNS/network error for %s: %s", url, e)
|
| 56 |
+
return f"(visit_page error) network/DNS failure when fetching {url}"
|
| 57 |
logger.warning("visit_page failed for %s: %s", url, e)
|
| 58 |
return f"(visit_page error) {e}"
|
| 59 |
|