File size: 6,560 Bytes
c6d67ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import requests
import pandas as pd
from langchain_core.tools import tool
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, Field
import functools


USER_AGENT = "Dev Goyal devgoyal9031@gmail.com"
HEADERS = {"User-Agent": USER_AGENT}

@functools.lru_cache(maxsize=1)
def _get_ticker_to_cik_mapping() -> dict[str, str]:
    """Fetches and caches the SEC ticker to CIK mapping."""
    url = "https://www.sec.gov/files/company_tickers.json"
    print("[System: Fetching SEC ticker to CIK mapping...]")
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    data = response.json()
    
    mapping = {}
    for _, company_info in data.items():
        mapping[company_info['ticker'].upper()] = str(company_info['cik_str']).zfill(10)
    return mapping

def get_cik_from_ticker(ticker: str) -> str:
    ticker = ticker.upper()
    mapping = _get_ticker_to_cik_mapping()
    if ticker in mapping:
        return mapping[ticker]
    raise ValueError(f"Ticker {ticker} not found in SEC database.")

def get_latest_10k_url(ticker: str) -> str:
    """Finds the URL for the most recent 10-K filing for a given ticker."""
    try:
        cik = get_cik_from_ticker(ticker)
        url = f"https://data.sec.gov/submissions/CIK{cik}.json"
        
        print(f"[System: Fetching filing history for CIK {cik}...]")
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        
        filings = response.json()['filings']['recent']
        
        # Search for the most recent 10-K
        for i, form in enumerate(filings['form']):
            if form == '10-K':
                accession_number = filings['accessionNumber'][i]
                # The SEC URL format removes dashes from the accession number
                accession_no_dashes = accession_number.replace('-', '')
                
                # Construct the final document URL
                document_url = f"https://www.sec.gov/Archives/edgar/data/{cik.lstrip('0')}/{accession_no_dashes}/{accession_number}.txt"
                return document_url
                
        return f"No 10-K found for {ticker}."
        
    except Exception as e:
        return f"Error: {str(e)}"
# 1. Define the strict Pydantic Schema
class XBRLConceptInput(BaseModel):
    ticker: str = Field(
        ..., 
        description="The official uppercase ticker symbol (e.g., AAPL)."
    )
    concept: Literal[
        "Revenues", 
        "NetIncomeLoss", 
        "Assets", 
        "Liabilities",
        "GrossProfit",
        "OperatingIncomeLoss",
        "AssetsCurrent",
        "LiabilitiesCurrent",
        "NetCashProvidedByUsedInOperatingActivities",
        "PaymentsToAcquirePropertyPlantAndEquipment",
        "EntityCommonStockSharesOutstanding"
    ] = Field(
        ..., 
        description="You MUST select the exact SEC XBRL concept from this list that best matches the user's request."
    )

# 2. Bind the schema to the tool
@tool(args_schema=XBRLConceptInput)
def get_company_concept_xbrl(ticker: str, concept: str) -> str:
    """
    Fetches official SEC accounting metrics for a company across recent quarters.
    CRITICAL INSTRUCTIONS:
    1. 'ticker': Must be the official uppercase ticker symbol (e.g., AAPL).
    2. 'concept': You MUST use one of these exact SEC XBRL concepts (case-sensitive):
       -- Core Size --
       - 'Revenues' (Total Revenue / Sales)
       - 'NetIncomeLoss' (Net Income / Profit)
       - 'Assets' (Total Assets)
       - 'Liabilities' (Total Liabilities)
       
       -- Margins & Liquidity --
       - 'GrossProfit' (Revenue minus Cost of Goods Sold)
       - 'OperatingIncomeLoss' (Operating Income)
       - 'AssetsCurrent' (Short-term assets like cash/inventory)
       - 'LiabilitiesCurrent' (Short-term debt)
       
       -- Cash Flow & Valuation --
       - 'NetCashProvidedByUsedInOperatingActivities' (Operating Cash Flow)
       - 'PaymentsToAcquirePropertyPlantAndEquipment' (Capital Expenditures / CapEx)
       - 'EntityCommonStockSharesOutstanding' (Total shares outstanding)
       
    Do not guess concepts. Only use the exact strings listed above.
    """
    try:
        cik = get_cik_from_ticker(ticker)
        url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json"
        
        print(f"[System: Fetching latest {concept} for {ticker}...]")
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        data = response.json()
        
        if "USD" not in data.get("units", {}):
            return f"No USD data found for {concept}."
            
        # 1. Convert to DataFrame
        df = pd.DataFrame(data["units"]["USD"])
        
        # 2. Convert date strings to datetime objects
        df['end'] = pd.to_datetime(df['end'])
        df['filed'] = pd.to_datetime(df['filed'])
        
        # 3. Filter for standard filings to avoid "preliminary" noise
        df = df[df['form'].isin(['10-Q', '10-K', '10-K/A', '10-Q/A'])]
        
        # 4. CRITICAL: Deduplicate. 
        # If the same period ('end') is reported multiple times, take the most recently filed one.
        df = df.sort_values(by=['end', 'filed'], ascending=[False, False])
        df = df.drop_duplicates(subset=['end'])
        
        # 5. Filter for the last 2 years
        current_year = datetime.now().year
        df = df[df['end'].dt.year >= (current_year - 2)]
        
        # 6. Take top 4 most recent periods
        df = df.head(4)
        
        if df.empty:
            return f"No recent (2024-2026) {concept} data available for {ticker}."
        
        summary = f"Latest official {concept} data for {ticker}:\n"
        for _, row in df.iterrows():
            formatted_val = f"${int(row['val']):,}"
            date_str = row['end'].strftime('%Y-%m-%d')
            summary += f"- Period End: {date_str} (Filed: {row['filed'].strftime('%Y-%m-%d')}): {formatted_val}\n"
            
        return summary

    except Exception as e:
        return f"Error fetching XBRL data: {str(e)}"

# Quick test block for the new function
if __name__ == "__main__":
    test_ticker = "MSFT"
    
    # Test 1: URL fetcher
    try:
        url = get_latest_10k_url(test_ticker)
        print(f"\n10-K URL: {url}")
    except Exception as e:
        print(f"URL Fetch Failed: {e}")
        
    # Test 2: XBRL fetcher
    test_concept = "NetIncomeLoss"
    print(get_company_concept_xbrl.invoke({"ticker": test_ticker, "concept": test_concept}))