| import requests |
| import pandas as pd |
| from langchain_core.tools import tool |
| from datetime import datetime |
| from typing import Literal |
| from pydantic import BaseModel, Field |
| import functools |
|
|
|
|
| USER_AGENT = "Dev Goyal devgoyal9031@gmail.com" |
| HEADERS = {"User-Agent": USER_AGENT} |
|
|
| @functools.lru_cache(maxsize=1) |
| def _get_ticker_to_cik_mapping() -> dict[str, str]: |
| """Fetches and caches the SEC ticker to CIK mapping.""" |
| url = "https://www.sec.gov/files/company_tickers.json" |
| print("[System: Fetching SEC ticker to CIK mapping...]") |
| response = requests.get(url, headers=HEADERS) |
| response.raise_for_status() |
| data = response.json() |
| |
| mapping = {} |
| for _, company_info in data.items(): |
| mapping[company_info['ticker'].upper()] = str(company_info['cik_str']).zfill(10) |
| return mapping |
|
|
| def get_cik_from_ticker(ticker: str) -> str: |
| ticker = ticker.upper() |
| mapping = _get_ticker_to_cik_mapping() |
| if ticker in mapping: |
| return mapping[ticker] |
| raise ValueError(f"Ticker {ticker} not found in SEC database.") |
|
|
| def get_latest_10k_url(ticker: str) -> str: |
| """Finds the URL for the most recent 10-K filing for a given ticker.""" |
| try: |
| cik = get_cik_from_ticker(ticker) |
| url = f"https://data.sec.gov/submissions/CIK{cik}.json" |
| |
| print(f"[System: Fetching filing history for CIK {cik}...]") |
| response = requests.get(url, headers=HEADERS) |
| response.raise_for_status() |
| |
| filings = response.json()['filings']['recent'] |
| |
| |
| for i, form in enumerate(filings['form']): |
| if form == '10-K': |
| accession_number = filings['accessionNumber'][i] |
| |
| accession_no_dashes = accession_number.replace('-', '') |
| |
| |
| document_url = f"https://www.sec.gov/Archives/edgar/data/{cik.lstrip('0')}/{accession_no_dashes}/{accession_number}.txt" |
| return document_url |
| |
| return f"No 10-K found for {ticker}." |
| |
| except Exception as e: |
| return f"Error: {str(e)}" |
| |
| class XBRLConceptInput(BaseModel): |
| ticker: str = Field( |
| ..., |
| description="The official uppercase ticker symbol (e.g., AAPL)." |
| ) |
| concept: Literal[ |
| "Revenues", |
| "NetIncomeLoss", |
| "Assets", |
| "Liabilities", |
| "GrossProfit", |
| "OperatingIncomeLoss", |
| "AssetsCurrent", |
| "LiabilitiesCurrent", |
| "NetCashProvidedByUsedInOperatingActivities", |
| "PaymentsToAcquirePropertyPlantAndEquipment", |
| "EntityCommonStockSharesOutstanding" |
| ] = Field( |
| ..., |
| description="You MUST select the exact SEC XBRL concept from this list that best matches the user's request." |
| ) |
|
|
| |
| @tool(args_schema=XBRLConceptInput) |
| def get_company_concept_xbrl(ticker: str, concept: str) -> str: |
| """ |
| Fetches official SEC accounting metrics for a company across recent quarters. |
| CRITICAL INSTRUCTIONS: |
| 1. 'ticker': Must be the official uppercase ticker symbol (e.g., AAPL). |
| 2. 'concept': You MUST use one of these exact SEC XBRL concepts (case-sensitive): |
| -- Core Size -- |
| - 'Revenues' (Total Revenue / Sales) |
| - 'NetIncomeLoss' (Net Income / Profit) |
| - 'Assets' (Total Assets) |
| - 'Liabilities' (Total Liabilities) |
| |
| -- Margins & Liquidity -- |
| - 'GrossProfit' (Revenue minus Cost of Goods Sold) |
| - 'OperatingIncomeLoss' (Operating Income) |
| - 'AssetsCurrent' (Short-term assets like cash/inventory) |
| - 'LiabilitiesCurrent' (Short-term debt) |
| |
| -- Cash Flow & Valuation -- |
| - 'NetCashProvidedByUsedInOperatingActivities' (Operating Cash Flow) |
| - 'PaymentsToAcquirePropertyPlantAndEquipment' (Capital Expenditures / CapEx) |
| - 'EntityCommonStockSharesOutstanding' (Total shares outstanding) |
| |
| Do not guess concepts. Only use the exact strings listed above. |
| """ |
| try: |
| cik = get_cik_from_ticker(ticker) |
| url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json" |
| |
| print(f"[System: Fetching latest {concept} for {ticker}...]") |
| response = requests.get(url, headers=HEADERS) |
| response.raise_for_status() |
| data = response.json() |
| |
| if "USD" not in data.get("units", {}): |
| return f"No USD data found for {concept}." |
| |
| |
| df = pd.DataFrame(data["units"]["USD"]) |
| |
| |
| df['end'] = pd.to_datetime(df['end']) |
| df['filed'] = pd.to_datetime(df['filed']) |
| |
| |
| df = df[df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])] |
| |
| |
| |
| df = df.sort_values(by=['end', 'filed'], ascending=[False, False]) |
| df = df.drop_duplicates(subset=['end']) |
| |
| |
| current_year = datetime.now().year |
| df = df[df['end'].dt.year >= (current_year - 2)] |
| |
| |
| df = df.head(4) |
| |
| if df.empty: |
| return f"No recent (2024-2026) {concept} data available for {ticker}." |
| |
| summary = f"Latest official {concept} data for {ticker}:\n" |
| for _, row in df.iterrows(): |
| formatted_val = f"${int(row['val']):,}" |
| date_str = row['end'].strftime('%Y-%m-%d') |
| summary += f"- Period End: {date_str} (Filed: {row['filed'].strftime('%Y-%m-%d')}): {formatted_val}\n" |
| |
| return summary |
|
|
| except Exception as e: |
| return f"Error fetching XBRL data: {str(e)}" |
|
|
| |
| if __name__ == "__main__": |
| test_ticker = "MSFT" |
| |
| |
| try: |
| url = get_latest_10k_url(test_ticker) |
| print(f"\n10-K URL: {url}") |
| except Exception as e: |
| print(f"URL Fetch Failed: {e}") |
| |
| |
| test_concept = "NetIncomeLoss" |
| print(get_company_concept_xbrl.invoke({"ticker": test_ticker, "concept": test_concept})) |