| import streamlit as st |
| import subprocess |
| import json |
| import os |
| import uuid |
|
|
| st.title("Advanced Scrapy Scraper") |
|
|
| url = st.text_input("Start URL", "https://example.com") |
| run = st.button("Run Scrape") |
|
|
| if run and url: |
| output_file = f"output_{uuid.uuid4().hex}.json" |
|
|
| with st.spinner("Scraping..."): |
| cmd = [ |
| "scrapy", "crawl", "advanced", |
| "-a", f"start_url={url}", |
| "-o", output_file, |
| "--nolog" |
| ] |
|
|
| result = subprocess.run( |
| cmd, |
| cwd=".", |
| capture_output=True, |
| text=True |
| ) |
|
|
| |
| if result.returncode != 0: |
| st.error("Scrapy failed") |
| st.text(result.stderr) |
| else: |
| if os.path.exists(output_file): |
| try: |
| with open(output_file, "r") as f: |
| data = json.load(f) |
|
|
| st.success("Scrape complete") |
| st.json(data) |
|
|
| st.download_button( |
| "Download JSON", |
| json.dumps(data, indent=2), |
| "results.json", |
| "application/json" |
| ) |
|
|
| except json.JSONDecodeError: |
| st.error("Output file is not valid JSON") |
|
|
| else: |
| st.error("No output file generated") |