Spaces:

devg24
/

FinAgent

Sleeping

File size: 6,560 Bytes

c6d67ac

import requests
import pandas as pd
from langchain_core.tools import tool
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
import functools


USER_AGENT = "Dev Goyal devgoyal9031@gmail.com"
HEADERS = {"User-Agent": USER_AGENT}

@functools.lru_cache(maxsize=1)
def _get_ticker_to_cik_mapping() -> dict[str, str]:
    """Fetches and caches the SEC ticker to CIK mapping."""
    url = "https://www.sec.gov/files/company_tickers.json"
    print("[System: Fetching SEC ticker to CIK mapping...]")
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    data = response.json()
    
    mapping = {}
    for _, company_info in data.items():
        mapping[company_info['ticker'].upper()] = str(company_info['cik_str']).zfill(10)
    return mapping

def get_cik_from_ticker(ticker: str) -> str:
    ticker = ticker.upper()
    mapping = _get_ticker_to_cik_mapping()
    if ticker in mapping:
        return mapping[ticker]
    raise ValueError(f"Ticker {ticker} not found in SEC database.")

def get_latest_10k_url(ticker: str) -> str:
    """Finds the URL for the most recent 10-K filing for a given ticker."""
    try:
        cik = get_cik_from_ticker(ticker)
        url = f"https://data.sec.gov/submissions/CIK{cik}.json"
        
        print(f"[System: Fetching filing history for CIK {cik}...]")
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        
        filings = response.json()['filings']['recent']
        
        # Search for the most recent 10-K
        for i, form in enumerate(filings['form']):
            if form == '10-K':
                accession_number = filings['accessionNumber'][i]
                # The SEC URL format removes dashes from the accession number
                accession_no_dashes = accession_number.replace('-', '')
                
                # Construct the final document URL
                document_url = f"https://www.sec.gov/Archives/edgar/data/{cik.lstrip('0')}/{accession_no_dashes}/{accession_number}.txt"
                return document_url
                
        return f"No 10-K found for {ticker}."
        
    except Exception as e:
        return f"Error: {str(e)}"
# 1. Define the strict Pydantic Schema
class XBRLConceptInput(BaseModel):
    ticker: str = Field(
        ..., 
        description="The official uppercase ticker symbol (e.g., AAPL)."
    )
    concept: Literal[
        "Revenues", 
        "NetIncomeLoss", 
        "Assets", 
        "Liabilities",
        "GrossProfit",
        "OperatingIncomeLoss",
        "AssetsCurrent",
        "LiabilitiesCurrent",
        "NetCashProvidedByUsedInOperatingActivities",
        "PaymentsToAcquirePropertyPlantAndEquipment",
        "EntityCommonStockSharesOutstanding"
    ] = Field(
        ..., 
        description="You MUST select the exact SEC XBRL concept from this list that best matches the user's request."
    )

# 2. Bind the schema to the tool
@tool(args_schema=XBRLConceptInput)
def get_company_concept_xbrl(ticker: str, concept: str) -> str:
    """
    Fetches official SEC accounting metrics for a company across recent quarters.
    CRITICAL INSTRUCTIONS:
    1. 'ticker': Must be the official uppercase ticker symbol (e.g., AAPL).
    2. 'concept': You MUST use one of these exact SEC XBRL concepts (case-sensitive):
       -- Core Size --
       - 'Revenues' (Total Revenue / Sales)
       - 'NetIncomeLoss' (Net Income / Profit)
       - 'Assets' (Total Assets)
       - 'Liabilities' (Total Liabilities)
       
       -- Margins & Liquidity --
       - 'GrossProfit' (Revenue minus Cost of Goods Sold)
       - 'OperatingIncomeLoss' (Operating Income)
       - 'AssetsCurrent' (Short-term assets like cash/inventory)
       - 'LiabilitiesCurrent' (Short-term debt)
       
       -- Cash Flow & Valuation --
       - 'NetCashProvidedByUsedInOperatingActivities' (Operating Cash Flow)
       - 'PaymentsToAcquirePropertyPlantAndEquipment' (Capital Expenditures / CapEx)
       - 'EntityCommonStockSharesOutstanding' (Total shares outstanding)
       
    Do not guess concepts. Only use the exact strings listed above.
    """
    try:
        cik = get_cik_from_ticker(ticker)
        url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json"
        
        print(f"[System: Fetching latest {concept} for {ticker}...]")
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        data = response.json()
        
        if "USD" not in data.get("units", {}):
            return f"No USD data found for {concept}."
            
        # 1. Convert to DataFrame
        df = pd.DataFrame(data["units"]["USD"])
        
        # 2. Convert date strings to datetime objects
        df['end'] = pd.to_datetime(df['end'])
        df['filed'] = pd.to_datetime(df['filed'])
        
        # 3. Filter for standard filings to avoid "preliminary" noise
        df = df[df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])]
        
        # 4. CRITICAL: Deduplicate. 
        # If the same period ('end') is reported multiple times, take the most recently filed one.
        df = df.sort_values(by=['end', 'filed'], ascending=[False, False])
        df = df.drop_duplicates(subset=['end'])
        
        # 5. Filter for the last 2 years
        current_year = datetime.now().year
        df = df[df['end'].dt.year >= (current_year - 2)]
        
        # 6. Take top 4 most recent periods
        df = df.head(4)
        
        if df.empty:
            return f"No recent (2024-2026) {concept} data available for {ticker}."
        
        summary = f"Latest official {concept} data for {ticker}:\n"
        for _, row in df.iterrows():
            formatted_val = f"${int(row['val']):,}"
            date_str = row['end'].strftime('%Y-%m-%d')
            summary += f"- Period End: {date_str} (Filed: {row['filed'].strftime('%Y-%m-%d')}): {formatted_val}\n"
            
        return summary

    except Exception as e:
        return f"Error fetching XBRL data: {str(e)}"

# Quick test block for the new function
if __name__ == "__main__":
    test_ticker = "MSFT"
    
    # Test 1: URL fetcher
    try:
        url = get_latest_10k_url(test_ticker)
        print(f"\n10-K URL: {url}")
    except Exception as e:
        print(f"URL Fetch Failed: {e}")
        
    # Test 2: XBRL fetcher
    test_concept = "NetIncomeLoss"
    print(get_company_concept_xbrl.invoke({"ticker": test_ticker, "concept": test_concept}))