Spaces:

TharaKavin
/

Web-Rag

Sleeping

TharaKavin commited on Apr 2

Commit

78a3b6c

verified ·

1 Parent(s): c42a70a

Update scraper.py

Files changed (1) hide show

scraper.py CHANGED Viewed

@@ -4,10 +4,20 @@ def scrape_url(url: str) -> str:
     try:
         page = Fetcher.get(url)
-        # ✅ Extract text properly
-        texts = page.css("body *::text").getall()
-        # Clean text
         cleaned = [t.strip() for t in texts if t.strip()]
         return " ".join(cleaned)

     try:
         page = Fetcher.get(url)
+        # Extract raw text safely
+        elements = page.css("body *::text")
+        texts = []
+        for el in elements:
+            try:
+                # Try extracting text
+                txt = str(el)
+                if txt:
+                    texts.append(txt)
+            except:
+                continue
         cleaned = [t.strip() for t in texts if t.strip()]
         return " ".join(cleaned)