Spaces:
Sleeping
Sleeping
| import pandas as pd # type: ignore | |
| import numpy as np # type: ignore | |
| import torch # type: ignore | |
| import os | |
| project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| data_dir = os.path.join(project_root, "data") | |
| if not os.path.exists(data_dir): | |
| os.makedirs(data_dir) | |
| CACHE_FILE = os.path.join(data_dir, "vfv_market_data.csv") | |
| WINDOW_SIZE = 15 | |
| def get_processed_tensors(): | |
| """ | |
| Reads the yfinance CSV, cleans MultiIndex headers, | |
| and converts prices into normalized 15-element windows. | |
| """ | |
| if not os.path.exists(CACHE_FILE): | |
| print(f"Error: {CACHE_FILE} not found. Run your fetcher script first.") | |
| return None | |
| # Load CSV with MultiIndex (Price/Ticker) | |
| # yfinance saves two header rows. header=[0,1] ensures we capture both. | |
| df = pd.read_csv(CACHE_FILE, header=[0, 1], index_col=0, parse_dates=True) | |
| # Flatten MultiIndex | |
| # Converts (Price, VFV.TO) -> Price. This allows df['Close'] to work. | |
| df.columns = df.columns.get_level_values(0) | |
| # Extract Close prices and convert to float | |
| # errors='coerce' turns any non-numeric strings (like Ticker names) into NaN | |
| prices = pd.to_numeric(df['Close'], errors='coerce').dropna().values | |
| if len(prices) < WINDOW_SIZE + 1: | |
| print(f"Error: Not enough data. Need at least {WINDOW_SIZE + 1} points.") | |
| return None | |
| # Calculate Log Returns | |
| # r_t = ln(P_t / P_{t-1}) | |
| # This results in a vector of length len(prices) - 1 | |
| log_returns = np.log(prices[1:] / prices[:-1]) | |
| # Create Sliding Windows | |
| windows = [] | |
| for i in range(len(log_returns) - WINDOW_SIZE + 1): | |
| window = log_returns[i : i + WINDOW_SIZE] | |
| # Z-Score Normalization | |
| # (x - mean) / std_dev | |
| # Essential for Quantum Angle Embedding to avoid saturation | |
| mu = np.mean(window) | |
| std = np.std(window) | |
| if std > 1e-9: | |
| norm_window = (window - mu) / std | |
| else: | |
| norm_window = window - mu # Handle zero-variance cases | |
| windows.append(norm_window) | |
| # Convert to PyTorch Tensor | |
| return torch.tensor(np.array(windows), dtype=torch.float32) | |
| if __name__ == "__main__": | |
| tensors = get_processed_tensors() | |
| if tensors is not None: | |
| print("--- Processing Complete ---") | |
| print(f"Tensor Shape: {tensors.shape}") # Should be [N, 15] | |
| print("\nFirst Window Example:") | |
| print(tensors[0]) | |
| print("\nLatest Window Example (Last 15 minutes):") | |
| print(tensors[-1]) |