Spaces:

Astocoder
/

quant-gym

Sleeping

App Files Files Community

Astocoder commited on Apr 5

Commit

8be84ff

1 Parent(s): 717bee1

update the files

Browse files

Files changed (3) hide show

server/Dockerfile → Dockerfile +0 -0
inference.py +239 -41
server/app.py +119 -45

server/Dockerfile → Dockerfile RENAMED Viewed

File without changes

inference.py CHANGED Viewed

@@ -1,44 +1,242 @@
 import requests
-import time
-BASE_URL = "http://localhost:8000"
-def test_task1():
-    """Test GET_PRICE"""
-    response = requests.post(f"{BASE_URL}/reset")
-    action = {"type": "GET_PRICE", "symbol": "AAPL"}
-    response = requests.post(f"{BASE_URL}/step", json=action)
-    data = response.json()
-    if data.get("observation", {}).get("price"):
-        return 1.0
-    return 0.0
-def test_task2():
-    """Test News Analysis"""
-    response = requests.post(f"{BASE_URL}/reset")
-    action = {"type": "GET_NEWS", "explanation": "Based on positive sentiment, BUY"}
-    response = requests.post(f"{BASE_URL}/step", json=action)
-    return 1.0  # Simplified for now
-def test_task3():
-    """Test Backtest"""
-    response = requests.post(f"{BASE_URL}/reset")
-    action = {"type": "BACKTEST", "strategy": "momentum"}
-    response = requests.post(f"{BASE_URL}/step", json=action)
-    data = response.json()
-    if data.get("observation", {}).get("backtest_results"):
-        return 1.0
-    return 0.0
 if __name__ == "__main__":
-    print("Running inference tests...")
-    score1 = test_task1()
-    score2 = test_task2()
-    score3 = test_task3()
-    print(f"Task 1 Score: {score1}")
-    print(f"Task 2 Score: {score2}")
-    print(f"Task 3 Score: {score3}")
-    print(f"Total Score: {(score1 + score2 + score3) / 3:.2f}")

+import asyncio
+import os
+import textwrap
+from typing import List, Optional
+from openai import OpenAI
 import requests
+# Try to load from .env file if it exists
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("[INFO] Loaded .env file", flush=True)
+except ImportError:
+    print("[INFO] python-dotenv not installed, using system env only", flush=True)
+# Environment variables (set by the judge or .env)
+API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.2-3B-Instruct")
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Quant-Gym specific configuration
+BASE_URL = os.getenv("BASE_URL", "http://localhost:8000")
+TASK_NAME = os.getenv("TASK_NAME", "quant-gym")
+BENCHMARK = os.getenv("BENCHMARK", "quant-gym")
+MAX_STEPS = 10
+TEMPERATURE = 0.7
+MAX_TOKENS = 200
+SUCCESS_SCORE_THRESHOLD = 0.7
+# System prompt for financial analysis
+SYSTEM_PROMPT = textwrap.dedent(
+    """
+    You are a financial analyst AI agent. Your goal is to analyze market data and make trading decisions.
+    Available actions:
+    - GET_PRICE: Get current stock price
+    - BUY [amount]: Buy number of shares
+    - SELL [amount]: Sell number of shares
+    - BACKTEST [strategy]: Backtest a strategy (momentum or mean_reversion)
+    - GET_NEWS: Get latest news headline
+    Strategy tips:
+    - Positive news sentiment suggests BUY
+    - Negative news sentiment suggests SELL
+    - Momentum strategy: Buy when price is rising
+    - Mean reversion: Buy when price is low relative to recent average
+    Respond with EXACTLY one action in format: ACTION [parameter]
+    Example: BUY 10
+    Example: GET_PRICE
+    Example: BACKTEST momentum
+    """
+).strip()
+def log_start(task: str, env: str, model: str) -> None:
+    print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
+    error_val = error if error else "null"
+    done_val = str(done).lower()
+    print(
+        f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
+        flush=True,
+    )
+def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
+    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
+class QuantGymClient:
+    """Client for interacting with Quant-Gym environment"""
+    def __init__(self, base_url: str):
+        self.base_url = base_url
+        self.session = requests.Session()
+    def reset(self):
+        """Reset environment"""
+        try:
+            response = self.session.post(f"{self.base_url}/reset")
+            return response.json()
+        except Exception as e:
+            print(f"[ERROR] Reset failed: {e}", flush=True)
+            return {"observation": {"price": 150, "balance": 10000, "holdings": 0, "portfolio_value": 10000}}
+    def step(self, action: str, amount: int = 0, explanation: str = "", strategy: str = ""):
+        """Execute an action"""
+        action_upper = action.upper()
+        if action_upper == "GET_PRICE":
+            payload = {"type": "GET_PRICE"}
+        elif action_upper == "GET_NEWS":
+            payload = {"type": "GET_NEWS", "explanation": explanation}
+        elif action_upper.startswith("BUY"):
+            if " " in action_upper:
+                try:
+                    amount = int(action_upper.split()[1])
+                except:
+                    amount = 5
+            payload = {"type": "BUY", "amount": amount}
+        elif action_upper.startswith("SELL"):
+            if " " in action_upper:
+                try:
+                    amount = int(action_upper.split()[1])
+                except:
+                    amount = 5
+            payload = {"type": "SELL", "amount": amount}
+        elif action_upper.startswith("BACKTEST"):
+            if " " in action_upper:
+                strategy = action_upper.split()[1]
+            payload = {"type": "BACKTEST", "strategy": strategy}
+        elif action_upper == "GET_NEWS":
+            payload = {"type": "GET_NEWS", "explanation": explanation}
+        else:
+            payload = {"type": "GET_PRICE"}
+        try:
+            response = self.session.post(f"{self.base_url}/step", json=payload)
+            return response.json()
+        except Exception as e:
+            print(f"[ERROR] Step failed: {e}", flush=True)
+            return {"observation": {"price": 150, "balance": 10000, "holdings": 0, "portfolio_value": 10000}}
+    def close(self):
+        """Close the session"""
+        self.session.close()
+def parse_action_from_response(text: str) -> str:
+    """Parse LLM response into action string"""
+    text = text.strip().upper()
+    if text.startswith("BUY"):
+        parts = text.split()
+        if len(parts) > 1 and parts[1].isdigit():
+            return f"BUY {parts[1]}"
+        return "BUY 5"
+    elif text.startswith("SELL"):
+        parts = text.split()
+        if len(parts) > 1 and parts[1].isdigit():
+            return f"SELL {parts[1]}"
+        return "SELL 5"
+    elif text.startswith("BACKTEST"):
+        return "BACKTEST momentum"
+    elif text.startswith("GET_NEWS"):
+        return "GET_NEWS"
+    else:
+        return "GET_PRICE"
+def fallback_strategy(observation: dict) -> str:
+    """Rule-based strategy when LLM is unavailable"""
+    sentiment = observation.get('last_news', {}).get('sentiment', 'neutral')
+    if sentiment == 'positive':
+        return "BUY 5"
+    elif sentiment == 'negative':
+        return "SELL 5"
+    else:
+        return "GET_PRICE"
+def get_model_action(step: int, observation: dict, history: List[str]) -> str:
+    """Get action using fallback strategy (no LLM required for basic testing)"""
+    return fallback_strategy(observation)
+async def main() -> None:
+    print("[INFO] Starting Quant-Gym Inference", flush=True)
+    # Check token status
+    if HF_TOKEN:
+        print(f"[INFO] HF_TOKEN found (length: {len(HF_TOKEN)} chars)", flush=True)
+    else:
+        print("[INFO] No HF_TOKEN found, using rule-based fallback strategy", flush=True)
+    # Initialize environment client
+    env = QuantGymClient(BASE_URL)
+    history: List[str] = []
+    rewards: List[float] = []
+    steps_taken = 0
+    success = False
+    final_score = 0.0
+    log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME if HF_TOKEN else "fallback-rule-based")
+    try:
+        # Reset environment
+        result = env.reset()
+        observation = result.get('observation', {})
+        print(f"[INFO] Reset complete. Initial observation: {observation}", flush=True)
+        for step in range(1, MAX_STEPS + 1):
+            # Get action
+            action_str = get_model_action(step, observation, history)
+            # Execute action
+            result = env.step(action_str)
+            observation = result.get('observation', {})
+            # Calculate reward
+            portfolio_value = observation.get('portfolio_value', 10000)
+            sentiment = observation.get('last_news', {}).get('sentiment', 'neutral')
+            profit_reward = max(0, (portfolio_value - 10000) / 10000)
+            sentiment_bonus = 0.2 if sentiment == 'positive' else (-0.1 if sentiment == 'negative' else 0)
+            reward = min(1.0, max(0.0, profit_reward + sentiment_bonus))
+            done = step >= MAX_STEPS - 1
+            error = None
+            rewards.append(reward)
+            steps_taken = step
+            log_step(step=step, action=action_str, reward=reward, done=done, error=error)
+            history.append(f"Step {step}: {action_str}")
+            if done:
+                break
+        final_score = sum(rewards) / len(rewards) if rewards else 0.0
+        success = final_score >= SUCCESS_SCORE_THRESHOLD
+    except Exception as e:
+        print(f"[ERROR] {e}", flush=True)
+        success = False
+        final_score = 0.0
+    finally:
+        try:
+            env.close()
+        except Exception as e:
+            pass
+        log_end(success=success, steps=steps_taken, score=final_score, rewards=rewards)
 if __name__ == "__main__":
+    asyncio.run(main())

server/app.py CHANGED Viewed

@@ -1,63 +1,137 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from typing import Optional
-app = FastAPI()
-prices = [150.00, 152.50, 151.75, 153.25, 155.00]
-cash = 10000.00
-shares = 0
-step_num = 0
-class Action(BaseModel):
-    action: str
     amount: Optional[int] = 0
-@app.get("/health")
-def health():
-    return {"status": "healthy"}
 @app.get("/")
 def root():
-    return {"message": "Trading API Running"}
 @app.post("/reset")
 def reset():
-    global cash, shares, step_num
-    cash = 10000.00
-    shares = 0
-    step_num = 0
-    return {"cash": cash, "shares": shares, "price": prices[0]}
 @app.post("/step")
-def step(action: Action):
-    global cash, shares, step_num
-    step_num = min(step_num + 1, len(prices) - 1)
-    price = prices[step_num]
-    if action.action == "BUY" and action.amount:
-        cost = price * action.amount
-        if cost <= cash:
-            cash -= cost
-            shares += action.amount
-    elif action.action == "SELL" and action.amount:
-        if action.amount <= shares:
-            cash += price * action.amount
-            shares -= action.amount
-    return {
-        "price": price,
-        "cash": cash,
-        "shares": shares,
-        "portfolio_value": cash + (shares * price)
-    }
 @app.get("/tasks")
-def tasks():
     return {
         "tasks": [
-            {"id": 1, "name": "Get Price"},
-            {"id": 2, "name": "Buy Stock"},
-            {"id": 3, "name": "Sell Stock"}
         ]
-    }

+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, Dict, Any, List
+from enum import Enum
+# Simple models for the API
+class ActionType(str, Enum):
+    GET_PRICE = "GET_PRICE"
+    GET_NEWS = "GET_NEWS"
+    BUY = "BUY"
+    SELL = "SELL"
+    BACKTEST = "BACKTEST"
+class AgentAction(BaseModel):
+    type: ActionType
+    symbol: Optional[str] = "AAPL"
     amount: Optional[int] = 0
+    explanation: Optional[str] = None
+    strategy: Optional[str] = None
+class MarketObservation(BaseModel):
+    timestamp: str = ""
+    price: float = 150.0
+    balance: float = 10000.0
+    holdings: int = 0
+    portfolio_value: float = 10000.0
+    last_news: Optional[Dict[str, Any]] = None
+    backtest_results: Optional[Dict[str, float]] = None
+app = FastAPI(title="Quant-Gym", description="Financial Analysis Environment")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Simple environment state
+class SimpleEnv:
+    def __init__(self):
+        self.prices = [150, 152, 151, 153, 155, 154, 156, 158, 157, 159]
+        self.news = [
+            {"headline": "Apple announces new AI chip", "sentiment": "positive"},
+            {"headline": "Supply chain delays expected", "sentiment": "negative"},
+            {"headline": "Analysts raise price target", "sentiment": "positive"},
+            {"headline": "Market shows strong growth", "sentiment": "positive"},
+        ]
+        self.reset()
+    def reset(self):
+        self.idx = 0
+        self.cash = 10000.0
+        self.shares = 0
+        return self._get_observation()
+    def step(self, action: AgentAction):
+        # Move time forward
+        self.idx = min(self.idx + 1, len(self.prices) - 1)
+        price = self.prices[self.idx]
+        if action.type == "BUY" and action.amount:
+            cost = price * action.amount
+            if cost <= self.cash:
+                self.cash -= cost
+                self.shares += action.amount
+        elif action.type == "SELL" and action.amount:
+            if action.amount <= self.shares:
+                self.cash += price * action.amount
+                self.shares -= action.amount
+        return self._get_observation()
+    def _get_observation(self):
+        price = self.prices[self.idx]
+        news_idx = self.idx % len(self.news)
+        return MarketObservation(
+            timestamp=f"step_{self.idx}",
+            price=float(price),
+            balance=round(self.cash, 2),
+            holdings=self.shares,
+            portfolio_value=round(self.cash + self.shares * price, 2),
+            last_news=self.news[news_idx]
+        )
+    def get_state(self):
+        obs = self._get_observation()
+        return {
+            "current_step": self.idx,
+            "total_steps": len(self.prices),
+            "observation": obs.dict(),
+            "tasks_completed": []
+        }
+env = SimpleEnv()
 @app.get("/")
 def root():
+    return {"message": "Quant-Gym API is running"}
+@app.get("/health")
+def health():
+    return {"status": "healthy"}
 @app.post("/reset")
 def reset():
+    obs = env.reset()
+    return {"status": "reset", "observation": obs.dict()}
 @app.post("/step")
+def step(action: AgentAction):
+    try:
+        observation = env.step(action)
+        return {"observation": observation.dict()}
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/state")
+def get_state():
+    return env.get_state()
 @app.get("/tasks")
+def get_tasks():
     return {
         "tasks": [
+            {"id": "1", "name": "Fetch Market Data", "description": "Get current price for AAPL"},
+            {"id": "2", "name": "News Analysis", "description": "Analyze news and recommend action with explanation"},
+            {"id": "3", "name": "Backtest Strategy", "description": "Backtest a trading strategy and return risk metrics"}
         ]
+    }