Spaces:
Sleeping
Sleeping
fix pandas read file bypassing error 403
Browse files- api/docs.py +11 -2
api/docs.py
CHANGED
|
@@ -23,6 +23,8 @@ from dependencies import get_http_client, get_llm_router
|
|
| 23 |
from fastapi.responses import StreamingResponse
|
| 24 |
from litellm.router import Router
|
| 25 |
from kreuzberg import ExtractionConfig, extract_bytes
|
|
|
|
|
|
|
| 26 |
|
| 27 |
from schemas import DocInfo, GetMeetingDocsRequest, GetMeetingDocsResponse, DocRequirements, DownloadDocsRequest, GetMeetingsRequest, GetMeetingsResponse, ExtractRequirementsRequest, ExtractRequirementsResponse
|
| 28 |
|
|
@@ -449,8 +451,15 @@ async def get_meeting_docs(req: GetMeetingDocsRequest, http_client: AsyncClient
|
|
| 449 |
|
| 450 |
file_url = f"{url}/{files[0]}"
|
| 451 |
file_url = quote(file_url, safe=":/")
|
| 452 |
-
|
| 453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
filtered_df = df[~(
|
| 455 |
df["Uploaded"].isna())][["TDoc", "Title", "CR category", "For", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
|
| 456 |
filtered_df["URL"] = filtered_df["TDoc"].apply(
|
|
|
|
| 23 |
from fastapi.responses import StreamingResponse
|
| 24 |
from litellm.router import Router
|
| 25 |
from kreuzberg import ExtractionConfig, extract_bytes
|
| 26 |
+
import requests
|
| 27 |
+
from io import BytesIO
|
| 28 |
|
| 29 |
from schemas import DocInfo, GetMeetingDocsRequest, GetMeetingDocsResponse, DocRequirements, DownloadDocsRequest, GetMeetingsRequest, GetMeetingsResponse, ExtractRequirementsRequest, ExtractRequirementsResponse
|
| 30 |
|
|
|
|
| 451 |
|
| 452 |
file_url = f"{url}/{files[0]}"
|
| 453 |
file_url = quote(file_url, safe=":/")
|
| 454 |
+
|
| 455 |
+
headers = {
|
| 456 |
+
"User-Agent": "Mozilla/5.0"
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
resp = requests.get(file_url, headers=headers)
|
| 460 |
+
resp.raise_for_status()
|
| 461 |
+
|
| 462 |
+
df = pd.read_excel(BytesIO(resp.content))
|
| 463 |
filtered_df = df[~(
|
| 464 |
df["Uploaded"].isna())][["TDoc", "Title", "CR category", "For", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
|
| 465 |
filtered_df["URL"] = filtered_df["TDoc"].apply(
|