| | import streamlit as st |
| | import requests |
| | import html2text |
| | import os |
| |
|
| | api_key=os.getenv('APIKEY_BRIGHTDATA') |
| | st.title("ByPass Capcha & Text Extractor") |
| |
|
| | |
| | zone = "web_unlocker1" |
| | url = st.text_input("Target URL", value="https://in.indeed.com/cmp/Ey/reviews") |
| |
|
| | |
| |
|
| | if st.button("Extract Text"): |
| | if not url : |
| | st.warning("Please enter both API key and URL.") |
| | else: |
| | headers = { |
| | "Authorization": f"Bearer {api_key}", |
| | "Content-Type": "application/json", |
| | "Accept-Language": "en-US,en;q=0.9" |
| | } |
| | data = { |
| | "zone": zone, |
| | "url": url, |
| | "format": "raw" |
| | } |
| | with st.spinner("Fetching page..."): |
| | try: |
| | response = requests.post( |
| | "https://api.brightdata.com/request", |
| | json=data, |
| | headers=headers, |
| | timeout=60 |
| | ) |
| | response.raise_for_status() |
| | html = response.text |
| | |
| | text = html2text.html2text(html) |
| | st.subheader("Extracted Text") |
| | st.text_area("Result", text, height=400) |
| | st.download_button("Download as .txt", text, file_name="extracted.txt") |
| | except Exception as e: |
| | st.error(f"Error: {e}") |