heymenn commited on
Commit
69a660e
·
verified ·
1 Parent(s): d8b43e2

fix pandas read file bypassing error 403

Browse files
Files changed (1) hide show
  1. api/docs.py +11 -2
api/docs.py CHANGED
@@ -23,6 +23,8 @@ from dependencies import get_http_client, get_llm_router
23
  from fastapi.responses import StreamingResponse
24
  from litellm.router import Router
25
  from kreuzberg import ExtractionConfig, extract_bytes
 
 
26
 
27
  from schemas import DocInfo, GetMeetingDocsRequest, GetMeetingDocsResponse, DocRequirements, DownloadDocsRequest, GetMeetingsRequest, GetMeetingsResponse, ExtractRequirementsRequest, ExtractRequirementsResponse
28
 
@@ -449,8 +451,15 @@ async def get_meeting_docs(req: GetMeetingDocsRequest, http_client: AsyncClient
449
 
450
  file_url = f"{url}/{files[0]}"
451
  file_url = quote(file_url, safe=":/")
452
- print(file_url)
453
- df = pd.read_excel(file_url)
 
 
 
 
 
 
 
454
  filtered_df = df[~(
455
  df["Uploaded"].isna())][["TDoc", "Title", "CR category", "For", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
456
  filtered_df["URL"] = filtered_df["TDoc"].apply(
 
23
  from fastapi.responses import StreamingResponse
24
  from litellm.router import Router
25
  from kreuzberg import ExtractionConfig, extract_bytes
26
+ import requests
27
+ from io import BytesIO
28
 
29
  from schemas import DocInfo, GetMeetingDocsRequest, GetMeetingDocsResponse, DocRequirements, DownloadDocsRequest, GetMeetingsRequest, GetMeetingsResponse, ExtractRequirementsRequest, ExtractRequirementsResponse
30
 
 
451
 
452
  file_url = f"{url}/{files[0]}"
453
  file_url = quote(file_url, safe=":/")
454
+
455
+ headers = {
456
+ "User-Agent": "Mozilla/5.0"
457
+ }
458
+
459
+ resp = requests.get(file_url, headers=headers)
460
+ resp.raise_for_status()
461
+
462
+ df = pd.read_excel(BytesIO(resp.content))
463
  filtered_df = df[~(
464
  df["Uploaded"].isna())][["TDoc", "Title", "CR category", "For", "Source", "Type", "Agenda item", "Agenda item description", "TDoc Status"]]
465
  filtered_df["URL"] = filtered_df["TDoc"].apply(