Dev Goyal commited on
Commit
25d293a
Β·
1 Parent(s): 012bcc4

refactor: improve 8-K filing retrieval logic with Item 2.02 validation and update ingestion script error handling

Browse files
Dockerfile CHANGED
@@ -28,7 +28,7 @@ ENV PYTHONPATH=/app
28
  RUN python scripts/ingest.py --tickers AAPL MSFT TSLA GOOGL NVDA
29
 
30
  # Ingest SEC 8-K / earnings call data for demo tickers
31
- RUN python scripts/ingest_earnings_calls.py --tickers AAPL MSFT GOOGL NVDA TSLA --quarters Q2-2025 Q1-2025 Q3-2025 Q4-2025 Q1-2026
32
 
33
  # ── Supervisord config (runs both services) ─────────────────────────────────
34
  COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 
28
  RUN python scripts/ingest.py --tickers AAPL MSFT TSLA GOOGL NVDA
29
 
30
  # Ingest SEC 8-K / earnings call data for demo tickers
31
+ RUN python scripts/ingest_earnings_calls.py --tickers AAPL MSFT GOOGL NVDA TSLA --quarters Q3-2025 Q4-2025 Q1-2026
32
 
33
  # ── Supervisord config (runs both services) ─────────────────────────────────
34
  COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
core/earnings_tools.py CHANGED
@@ -73,9 +73,21 @@ def parse_quarter(quarter_str: str) -> tuple[int, int]:
73
  return q, y
74
 
75
 
76
- def _quarter_to_month(q: int) -> str:
77
- """Map fiscal quarter to approximate month β€” used by the SEC 8-K fallback."""
78
- return {1: "03", 2: "06", 3: "09", 4: "12"}[q]
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
  # ---------------------------------------------------------------------------
@@ -151,28 +163,29 @@ def fetch_transcript_sec_8k(ticker: str, quarter: int, year: int) -> Optional[st
151
  resp.raise_for_status()
152
  filings = resp.json()["filings"]["recent"]
153
 
154
- target_month = int(_quarter_to_month(quarter))
155
  best_doc_url = None
156
 
157
  for i, form in enumerate(filings["form"]):
158
  if form != "8-K":
159
  continue
 
 
 
 
 
 
 
 
 
160
  filed = filings["filingDate"][i] # "2025-01-30"
161
  filed_year, filed_month = int(filed[:4]), int(filed[5:7])
162
 
163
- # Build a set of acceptable (year, month) pairs:
164
- # Accept filings from the quarter-end month through 3 months after,
165
- # handling year rollover (e.g., Q4 target_month=12 β†’ Dec, Jan, Feb, Mar)
166
- acceptable = set()
167
- for offset in range(4): # 0, 1, 2, 3 months after quarter end
168
- m = target_month + offset
169
- y = year
170
- if m > 12:
171
- m -= 12
172
- y += 1
173
- acceptable.add((y, m))
174
-
175
- if (filed_year, filed_month) in acceptable:
176
  accession = filings["accessionNumber"][i]
177
  acc_clean = accession.replace("-", "")
178
  primary_doc = filings["primaryDocument"][i]
@@ -181,7 +194,7 @@ def fetch_transcript_sec_8k(ticker: str, quarter: int, year: int) -> Optional[st
181
  f"{cik.lstrip('0')}/{acc_clean}/{primary_doc}"
182
  )
183
  best_doc_url = doc_url
184
- break # Take the first matching 8-K
185
 
186
  if not best_doc_url:
187
  print(f"[Earnings Ingest] No matching SEC 8-K found for {ticker} Q{quarter}-{year}.")
 
73
  return q, y
74
 
75
 
76
+ def _get_quarter_month_range(q: int) -> list[int]:
77
+ """
78
+ Get the month range (quarter end month + 3 months after) for a given quarter.
79
+ This is used as a heuristic to find the relevant 8-K filing.
80
+ """
81
+ start_month = {1: 3, 2: 6, 3: 9, 4: 12}[q]
82
+ # We allow a very wide range: 2 months before the standard month to 4 months after.
83
+ # This covers most fiscal year offsets (e.g. AAPL Q1 ends in Dec, reported in Jan/Feb).
84
+ months = []
85
+ for i in range(-2, 5):
86
+ m = start_month + i
87
+ if m < 1: m += 12
88
+ if m > 12: m -= 12
89
+ months.append(m)
90
+ return months
91
 
92
 
93
  # ---------------------------------------------------------------------------
 
163
  resp.raise_for_status()
164
  filings = resp.json()["filings"]["recent"]
165
 
166
+ acceptable_months = _get_quarter_month_range(quarter)
167
  best_doc_url = None
168
 
169
  for i, form in enumerate(filings["form"]):
170
  if form != "8-K":
171
  continue
172
+
173
+ # Check for Item 2.02 (Results of Operations and Financial Condition)
174
+ # Some filings have items like '1.01,2.02,9.01', some just '2.02'
175
+ items = str(filings.get("items", [""])[i])
176
+ if "2.02" not in items:
177
+ # If we can't find Item 2.02, we fallback to checking if 'earnings' is in the title (if available)
178
+ # or just continuing to search for other 8-Ks.
179
+ continue
180
+
181
  filed = filings["filingDate"][i] # "2025-01-30"
182
  filed_year, filed_month = int(filed[:4]), int(filed[5:7])
183
 
184
+ # Logic: If the filing is within the target year (or next year if Q4)
185
+ # and the month is in our heuristic range.
186
+ is_valid_year = (filed_year == year) or (quarter == 4 and filed_year == year + 1)
187
+
188
+ if is_valid_year and filed_month in acceptable_months:
 
 
 
 
 
 
 
 
189
  accession = filings["accessionNumber"][i]
190
  acc_clean = accession.replace("-", "")
191
  primary_doc = filings["primaryDocument"][i]
 
194
  f"{cik.lstrip('0')}/{acc_clean}/{primary_doc}"
195
  )
196
  best_doc_url = doc_url
197
+ break # Take the first matching 8-K (most recent)
198
 
199
  if not best_doc_url:
200
  print(f"[Earnings Ingest] No matching SEC 8-K found for {ticker} Q{quarter}-{year}.")
scripts/ingest_earnings_calls.py CHANGED
@@ -98,12 +98,19 @@ def main():
98
  }.get(r["status"], "❓")
99
  print(f" {icon} {r['ticker']} {r['quarter']}: {r['status']}")
100
 
101
- failed = [r for r in results if r["status"] in ("failed", "error")]
102
- if failed:
103
- print(f"\n{len(failed)} ingest(s) failed. Check logs above.")
 
 
104
  sys.exit(1)
105
- else:
106
- print(f"\nAll {len(results)} ingest(s) completed.")
 
 
 
 
 
107
 
108
 
109
  if __name__ == "__main__":
 
98
  }.get(r["status"], "❓")
99
  print(f" {icon} {r['ticker']} {r['quarter']}: {r['status']}")
100
 
101
+ errors = [r for r in results if r["status"] == "error"]
102
+ failed = [r for r in results if r["status"] == "failed"]
103
+
104
+ if errors:
105
+ print(f"\n[CRITICAL] {len(errors)} ingest(s) hit technical errors. Check logs.")
106
  sys.exit(1)
107
+
108
+ if failed:
109
+ print(f"\n[INFO] {len(failed)} transcript(s) could not be found (likely not yet reported).")
110
+ print("This is not treated as a build failure.")
111
+
112
+ print("\nIngestion process completed successfully.")
113
+ sys.exit(0)
114
 
115
 
116
  if __name__ == "__main__":