File size: 6,560 Bytes
c6d67ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | import requests
import pandas as pd
from langchain_core.tools import tool
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
import functools
USER_AGENT = "Dev Goyal devgoyal9031@gmail.com"
HEADERS = {"User-Agent": USER_AGENT}
@functools.lru_cache(maxsize=1)
def _get_ticker_to_cik_mapping() -> dict[str, str]:
"""Fetches and caches the SEC ticker to CIK mapping."""
url = "https://www.sec.gov/files/company_tickers.json"
print("[System: Fetching SEC ticker to CIK mapping...]")
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
data = response.json()
mapping = {}
for _, company_info in data.items():
mapping[company_info['ticker'].upper()] = str(company_info['cik_str']).zfill(10)
return mapping
def get_cik_from_ticker(ticker: str) -> str:
ticker = ticker.upper()
mapping = _get_ticker_to_cik_mapping()
if ticker in mapping:
return mapping[ticker]
raise ValueError(f"Ticker {ticker} not found in SEC database.")
def get_latest_10k_url(ticker: str) -> str:
"""Finds the URL for the most recent 10-K filing for a given ticker."""
try:
cik = get_cik_from_ticker(ticker)
url = f"https://data.sec.gov/submissions/CIK{cik}.json"
print(f"[System: Fetching filing history for CIK {cik}...]")
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
filings = response.json()['filings']['recent']
# Search for the most recent 10-K
for i, form in enumerate(filings['form']):
if form == '10-K':
accession_number = filings['accessionNumber'][i]
# The SEC URL format removes dashes from the accession number
accession_no_dashes = accession_number.replace('-', '')
# Construct the final document URL
document_url = f"https://www.sec.gov/Archives/edgar/data/{cik.lstrip('0')}/{accession_no_dashes}/{accession_number}.txt"
return document_url
return f"No 10-K found for {ticker}."
except Exception as e:
return f"Error: {str(e)}"
# 1. Define the strict Pydantic Schema
class XBRLConceptInput(BaseModel):
ticker: str = Field(
...,
description="The official uppercase ticker symbol (e.g., AAPL)."
)
concept: Literal[
"Revenues",
"NetIncomeLoss",
"Assets",
"Liabilities",
"GrossProfit",
"OperatingIncomeLoss",
"AssetsCurrent",
"LiabilitiesCurrent",
"NetCashProvidedByUsedInOperatingActivities",
"PaymentsToAcquirePropertyPlantAndEquipment",
"EntityCommonStockSharesOutstanding"
] = Field(
...,
description="You MUST select the exact SEC XBRL concept from this list that best matches the user's request."
)
# 2. Bind the schema to the tool
@tool(args_schema=XBRLConceptInput)
def get_company_concept_xbrl(ticker: str, concept: str) -> str:
"""
Fetches official SEC accounting metrics for a company across recent quarters.
CRITICAL INSTRUCTIONS:
1. 'ticker': Must be the official uppercase ticker symbol (e.g., AAPL).
2. 'concept': You MUST use one of these exact SEC XBRL concepts (case-sensitive):
-- Core Size --
- 'Revenues' (Total Revenue / Sales)
- 'NetIncomeLoss' (Net Income / Profit)
- 'Assets' (Total Assets)
- 'Liabilities' (Total Liabilities)
-- Margins & Liquidity --
- 'GrossProfit' (Revenue minus Cost of Goods Sold)
- 'OperatingIncomeLoss' (Operating Income)
- 'AssetsCurrent' (Short-term assets like cash/inventory)
- 'LiabilitiesCurrent' (Short-term debt)
-- Cash Flow & Valuation --
- 'NetCashProvidedByUsedInOperatingActivities' (Operating Cash Flow)
- 'PaymentsToAcquirePropertyPlantAndEquipment' (Capital Expenditures / CapEx)
- 'EntityCommonStockSharesOutstanding' (Total shares outstanding)
Do not guess concepts. Only use the exact strings listed above.
"""
try:
cik = get_cik_from_ticker(ticker)
url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json"
print(f"[System: Fetching latest {concept} for {ticker}...]")
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
data = response.json()
if "USD" not in data.get("units", {}):
return f"No USD data found for {concept}."
# 1. Convert to DataFrame
df = pd.DataFrame(data["units"]["USD"])
# 2. Convert date strings to datetime objects
df['end'] = pd.to_datetime(df['end'])
df['filed'] = pd.to_datetime(df['filed'])
# 3. Filter for standard filings to avoid "preliminary" noise
df = df[df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])]
# 4. CRITICAL: Deduplicate.
# If the same period ('end') is reported multiple times, take the most recently filed one.
df = df.sort_values(by=['end', 'filed'], ascending=[False, False])
df = df.drop_duplicates(subset=['end'])
# 5. Filter for the last 2 years
current_year = datetime.now().year
df = df[df['end'].dt.year >= (current_year - 2)]
# 6. Take top 4 most recent periods
df = df.head(4)
if df.empty:
return f"No recent (2024-2026) {concept} data available for {ticker}."
summary = f"Latest official {concept} data for {ticker}:\n"
for _, row in df.iterrows():
formatted_val = f"${int(row['val']):,}"
date_str = row['end'].strftime('%Y-%m-%d')
summary += f"- Period End: {date_str} (Filed: {row['filed'].strftime('%Y-%m-%d')}): {formatted_val}\n"
return summary
except Exception as e:
return f"Error fetching XBRL data: {str(e)}"
# Quick test block for the new function
if __name__ == "__main__":
test_ticker = "MSFT"
# Test 1: URL fetcher
try:
url = get_latest_10k_url(test_ticker)
print(f"\n10-K URL: {url}")
except Exception as e:
print(f"URL Fetch Failed: {e}")
# Test 2: XBRL fetcher
test_concept = "NetIncomeLoss"
print(get_company_concept_xbrl.invoke({"ticker": test_ticker, "concept": test_concept})) |