TharaKavin commited on
Commit
78a3b6c
·
verified ·
1 Parent(s): c42a70a

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +13 -3
scraper.py CHANGED
@@ -4,10 +4,20 @@ def scrape_url(url: str) -> str:
4
  try:
5
  page = Fetcher.get(url)
6
 
7
- # Extract text properly
8
- texts = page.css("body *::text").getall()
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Clean text
11
  cleaned = [t.strip() for t in texts if t.strip()]
12
 
13
  return " ".join(cleaned)
 
4
  try:
5
  page = Fetcher.get(url)
6
 
7
+ # Extract raw text safely
8
+ elements = page.css("body *::text")
9
+
10
+ texts = []
11
+
12
+ for el in elements:
13
+ try:
14
+ # Try extracting text
15
+ txt = str(el)
16
+ if txt:
17
+ texts.append(txt)
18
+ except:
19
+ continue
20
 
 
21
  cleaned = [t.strip() for t in texts if t.strip()]
22
 
23
  return " ".join(cleaned)