alfulanny commited on
Commit
dbef2be
·
verified ·
1 Parent(s): 60a350c

Update browser_tools.py

Browse files
Files changed (1) hide show
  1. browser_tools.py +16 -1
browser_tools.py CHANGED
@@ -9,6 +9,12 @@ from typing import Optional
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
 
 
 
 
 
12
  try:
13
  from bs4 import BeautifulSoup
14
  except Exception:
@@ -24,7 +30,10 @@ def visit_page(url: str, max_chars: int = 2000) -> str:
24
  if not url:
25
  return "(no url provided)"
26
  try:
27
- resp = requests.get(url, timeout=10)
 
 
 
28
  resp.raise_for_status()
29
  text = resp.text
30
  if BeautifulSoup is not None:
@@ -39,6 +48,12 @@ def visit_page(url: str, max_chars: int = 2000) -> str:
39
  # fallback: return first chunk of raw HTML (not ideal)
40
  return text[:max_chars]
41
  except Exception as e:
 
 
 
 
 
 
42
  logger.warning("visit_page failed for %s: %s", url, e)
43
  return f"(visit_page error) {e}"
44
 
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
+ # Default headers to mimic a browser and reduce chance of 403 responses
13
+ DEFAULT_HEADERS = {
14
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0 Safari/537.36",
15
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
16
+ }
17
+
18
  try:
19
  from bs4 import BeautifulSoup
20
  except Exception:
 
30
  if not url:
31
  return "(no url provided)"
32
  try:
33
+ # Some domains have SSL certificate issues; disable verification for those
34
+ verify_ssl = not any(domain in url for domain in ["npb.or.jp", "npb.jp"])
35
+ # include headers to reduce bot-blocking by some sites
36
+ resp = requests.get(url, timeout=10, headers=DEFAULT_HEADERS, verify=verify_ssl)
37
  resp.raise_for_status()
38
  text = resp.text
39
  if BeautifulSoup is not None:
 
48
  # fallback: return first chunk of raw HTML (not ideal)
49
  return text[:max_chars]
50
  except Exception as e:
51
+ # Improve error message for common network/DNS failures so the agent
52
+ # can fallback to alternative tools instead of crashing.
53
+ msg = str(e)
54
+ if "NameResolutionError" in msg or "Failed to resolve" in msg or "Temporary failure in name resolution" in msg:
55
+ logger.warning("visit_page DNS/network error for %s: %s", url, e)
56
+ return f"(visit_page error) network/DNS failure when fetching {url}"
57
  logger.warning("visit_page failed for %s: %s", url, e)
58
  return f"(visit_page error) {e}"
59