FinAgent / core /sec_tools.py
Dev Goyal
Initial deployment of FinAgent
c6d67ac
import requests
import pandas as pd
from langchain_core.tools import tool
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
import functools
USER_AGENT = "Dev Goyal devgoyal9031@gmail.com"
HEADERS = {"User-Agent": USER_AGENT}
@functools.lru_cache(maxsize=1)
def _get_ticker_to_cik_mapping() -> dict[str, str]:
"""Fetches and caches the SEC ticker to CIK mapping."""
url = "https://www.sec.gov/files/company_tickers.json"
print("[System: Fetching SEC ticker to CIK mapping...]")
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
data = response.json()
mapping = {}
for _, company_info in data.items():
mapping[company_info['ticker'].upper()] = str(company_info['cik_str']).zfill(10)
return mapping
def get_cik_from_ticker(ticker: str) -> str:
ticker = ticker.upper()
mapping = _get_ticker_to_cik_mapping()
if ticker in mapping:
return mapping[ticker]
raise ValueError(f"Ticker {ticker} not found in SEC database.")
def get_latest_10k_url(ticker: str) -> str:
"""Finds the URL for the most recent 10-K filing for a given ticker."""
try:
cik = get_cik_from_ticker(ticker)
url = f"https://data.sec.gov/submissions/CIK{cik}.json"
print(f"[System: Fetching filing history for CIK {cik}...]")
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
filings = response.json()['filings']['recent']
# Search for the most recent 10-K
for i, form in enumerate(filings['form']):
if form == '10-K':
accession_number = filings['accessionNumber'][i]
# The SEC URL format removes dashes from the accession number
accession_no_dashes = accession_number.replace('-', '')
# Construct the final document URL
document_url = f"https://www.sec.gov/Archives/edgar/data/{cik.lstrip('0')}/{accession_no_dashes}/{accession_number}.txt"
return document_url
return f"No 10-K found for {ticker}."
except Exception as e:
return f"Error: {str(e)}"
# 1. Define the strict Pydantic Schema
class XBRLConceptInput(BaseModel):
ticker: str = Field(
...,
description="The official uppercase ticker symbol (e.g., AAPL)."
)
concept: Literal[
"Revenues",
"NetIncomeLoss",
"Assets",
"Liabilities",
"GrossProfit",
"OperatingIncomeLoss",
"AssetsCurrent",
"LiabilitiesCurrent",
"NetCashProvidedByUsedInOperatingActivities",
"PaymentsToAcquirePropertyPlantAndEquipment",
"EntityCommonStockSharesOutstanding"
] = Field(
...,
description="You MUST select the exact SEC XBRL concept from this list that best matches the user's request."
)
# 2. Bind the schema to the tool
@tool(args_schema=XBRLConceptInput)
def get_company_concept_xbrl(ticker: str, concept: str) -> str:
"""
Fetches official SEC accounting metrics for a company across recent quarters.
CRITICAL INSTRUCTIONS:
1. 'ticker': Must be the official uppercase ticker symbol (e.g., AAPL).
2. 'concept': You MUST use one of these exact SEC XBRL concepts (case-sensitive):
-- Core Size --
- 'Revenues' (Total Revenue / Sales)
- 'NetIncomeLoss' (Net Income / Profit)
- 'Assets' (Total Assets)
- 'Liabilities' (Total Liabilities)
-- Margins & Liquidity --
- 'GrossProfit' (Revenue minus Cost of Goods Sold)
- 'OperatingIncomeLoss' (Operating Income)
- 'AssetsCurrent' (Short-term assets like cash/inventory)
- 'LiabilitiesCurrent' (Short-term debt)
-- Cash Flow & Valuation --
- 'NetCashProvidedByUsedInOperatingActivities' (Operating Cash Flow)
- 'PaymentsToAcquirePropertyPlantAndEquipment' (Capital Expenditures / CapEx)
- 'EntityCommonStockSharesOutstanding' (Total shares outstanding)
Do not guess concepts. Only use the exact strings listed above.
"""
try:
cik = get_cik_from_ticker(ticker)
url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json"
print(f"[System: Fetching latest {concept} for {ticker}...]")
response = requests.get(url, headers=HEADERS)
response.raise_for_status()
data = response.json()
if "USD" not in data.get("units", {}):
return f"No USD data found for {concept}."
# 1. Convert to DataFrame
df = pd.DataFrame(data["units"]["USD"])
# 2. Convert date strings to datetime objects
df['end'] = pd.to_datetime(df['end'])
df['filed'] = pd.to_datetime(df['filed'])
# 3. Filter for standard filings to avoid "preliminary" noise
df = df[df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])]
# 4. CRITICAL: Deduplicate.
# If the same period ('end') is reported multiple times, take the most recently filed one.
df = df.sort_values(by=['end', 'filed'], ascending=[False, False])
df = df.drop_duplicates(subset=['end'])
# 5. Filter for the last 2 years
current_year = datetime.now().year
df = df[df['end'].dt.year >= (current_year - 2)]
# 6. Take top 4 most recent periods
df = df.head(4)
if df.empty:
return f"No recent (2024-2026) {concept} data available for {ticker}."
summary = f"Latest official {concept} data for {ticker}:\n"
for _, row in df.iterrows():
formatted_val = f"${int(row['val']):,}"
date_str = row['end'].strftime('%Y-%m-%d')
summary += f"- Period End: {date_str} (Filed: {row['filed'].strftime('%Y-%m-%d')}): {formatted_val}\n"
return summary
except Exception as e:
return f"Error fetching XBRL data: {str(e)}"
# Quick test block for the new function
if __name__ == "__main__":
test_ticker = "MSFT"
# Test 1: URL fetcher
try:
url = get_latest_10k_url(test_ticker)
print(f"\n10-K URL: {url}")
except Exception as e:
print(f"URL Fetch Failed: {e}")
# Test 2: XBRL fetcher
test_concept = "NetIncomeLoss"
print(get_company_concept_xbrl.invoke({"ticker": test_ticker, "concept": test_concept}))