| """ |
| Pydantic models for API data validation. |
| |
| Defines request and response schemas with validation rules. |
| """ |
|
|
| from datetime import datetime |
| from typing import Optional |
|
|
| from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator |
|
|
|
|
| class IssueInput(BaseModel): |
| """Input model for GitHub issue or pull request classification.""" |
|
|
| issue_text: str = Field( |
| ..., |
| min_length=1, |
| description="Issue title text", |
| examples=["Fix bug in authentication module"], |
| ) |
| issue_description: Optional[str] = Field( |
| default=None, |
| description="Issue body text", |
| examples=["The authentication module fails when handling expired tokens"], |
| ) |
| repo_name: Optional[str] = Field( |
| default=None, description="Repository name", examples=["user/repo-name"] |
| ) |
| pr_number: Optional[int] = Field( |
| default=None, ge=1, description="Pull request number", examples=[123] |
| ) |
| created_at: Optional[datetime] = Field( |
| default=None, description="Issue creation timestamp", examples=["2024-01-15T10:30:00Z"] |
| ) |
| author_name: Optional[str] = Field( |
| default=None, description="Issue author username", examples=["johndoe"] |
| ) |
|
|
| @field_validator("issue_text", "issue_description") |
| @classmethod |
| def clean_text(cls, v: Optional[str]) -> Optional[str]: |
| """Validate and clean text fields.""" |
| if v is None: |
| return v |
| v = v.strip() |
| if not v: |
| raise ValueError("Text cannot be empty or whitespace only") |
| return v |
|
|
| model_config = ConfigDict( |
| json_schema_extra={ |
| "example": { |
| "issue_text": "Add support for OAuth authentication", |
| "issue_description": "Implement OAuth 2.0 flow for third-party providers", |
| "repo_name": "myorg/myproject", |
| "pr_number": 456, |
| "author_name": "developer123", |
| } |
| } |
| ) |
|
|
|
|
| class SkillPrediction(BaseModel): |
| """Single skill prediction with confidence score.""" |
|
|
| skill_name: str = Field( |
| ..., |
| description="Name of the predicted skill (domain/subdomain)", |
| examples=["Language/Java", "DevOps/CI-CD"], |
| ) |
| confidence: float = Field( |
| ..., ge=0.0, le=1.0, description="Confidence score (0.0 to 1.0)", examples=[0.85] |
| ) |
|
|
| model_config = ConfigDict( |
| json_schema_extra={"example": {"skill_name": "Language/Java", "confidence": 0.92}} |
| ) |
|
|
|
|
| class PredictionResponse(BaseModel): |
| """Response model for skill classification predictions.""" |
|
|
| predictions: list[SkillPrediction] = Field( |
| default_factory=list, description="List of predicted skills with confidence scores" |
| ) |
| num_predictions: int = Field( |
| ..., ge=0, description="Total number of predicted skills", examples=[5] |
| ) |
| model_version: str = Field(default="1.0.0", description="Model version", examples=["1.0.0"]) |
| processing_time_ms: Optional[float] = Field( |
| default=None, ge=0.0, description="Processing time in milliseconds", examples=[125.5] |
| ) |
|
|
| model_config = ConfigDict( |
| json_schema_extra={ |
| "example": { |
| "predictions": [ |
| {"skill_name": "Language/Java", "confidence": 0.92}, |
| {"skill_name": "DevOps/CI-CD", "confidence": 0.78}, |
| ], |
| "num_predictions": 2, |
| "model_version": "1.0.0", |
| "processing_time_ms": 125.5, |
| } |
| } |
| ) |
|
|
|
|
| class BatchIssueInput(BaseModel): |
| """Input model for batch prediction.""" |
|
|
| issues: list[IssueInput] = Field( |
| ..., |
| min_length=1, |
| max_length=100, |
| description="Issues to classify (max 100)", |
| ) |
|
|
| model_config = ConfigDict( |
| json_schema_extra={ |
| "example": { |
| "issues": [ |
| { |
| "issue_text": "Fix authentication bug", |
| "issue_description": "Users cannot login with OAuth", |
| }, |
| { |
| "issue_text": "Add database migration", |
| "issue_description": "Create migration for new user table", |
| }, |
| ] |
| } |
| } |
| ) |
|
|
|
|
| class BatchPredictionResponse(BaseModel): |
| """Response model for batch predictions.""" |
|
|
| results: list[PredictionResponse] = Field( |
| default_factory=list, description="Prediction results, one per issue" |
| ) |
| total_issues: int = Field(..., ge=0, description="Number of issues processed", examples=[2]) |
| total_processing_time_ms: Optional[float] = Field( |
| default=None, ge=0.0, description="Processing time in milliseconds", examples=[250.0] |
| ) |
|
|
| model_config = ConfigDict( |
| json_schema_extra={ |
| "example": { |
| "results": [ |
| { |
| "predictions": [{"skill_name": "Language/Java", "confidence": 0.92}], |
| "num_predictions": 1, |
| "model_version": "1.0.0", |
| } |
| ], |
| "total_issues": 2, |
| "total_processing_time_ms": 250.0, |
| } |
| } |
| ) |
|
|
|
|
| class ErrorResponse(BaseModel): |
| """Error response model.""" |
|
|
| error: str = Field(..., description="Error message", examples=["Invalid input"]) |
| detail: Optional[str] = Field( |
| default=None, description="Detailed error", examples=["Field 'issue_text' is required"] |
| ) |
| timestamp: datetime = Field(default_factory=datetime.now, description="Error timestamp") |
|
|
| @field_serializer("timestamp") |
| def serialize_timestamp(self, value: datetime) -> str: |
| return value.isoformat() |
|
|
| model_config = ConfigDict( |
| json_schema_extra={ |
| "example": { |
| "error": "Validation Error", |
| "detail": "issue_text: field required", |
| "timestamp": "2024-01-15T10:30:00Z", |
| } |
| } |
| ) |
|
|
|
|
| class HealthCheckResponse(BaseModel): |
| """Health check response model.""" |
|
|
| status: str = Field(default="healthy", description="Service status", examples=["healthy"]) |
| model_loaded: bool = Field(..., description="Model ready status", examples=[True]) |
| version: str = Field(default="1.0.0", description="API version", examples=["1.0.0"]) |
| timestamp: datetime = Field(default_factory=datetime.now, description="Timestamp") |
|
|
|
|
| class PredictionRecord(PredictionResponse): |
| """Extended prediction model with metadata from MLflow.""" |
|
|
| run_id: str = Field(..., description="MLflow Run ID") |
| timestamp: datetime = Field(..., description="Prediction timestamp") |
| input_text: Optional[str] = Field(default="", description="Input text classified") |
|
|
| model_config = ConfigDict( |
| json_schema_extra={ |
| "example": { |
| "predictions": [ |
| {"skill_name": "Language/Java", "confidence": 0.92}, |
| {"skill_name": "DevOps/CI-CD", "confidence": 0.78}, |
| ], |
| "num_predictions": 2, |
| "model_version": "1.0.0", |
| "processing_time_ms": 125.5, |
| "run_id": "a1b2c3d4e5f6", |
| "timestamp": "2024-01-15T10:30:00Z", |
| "input_text": "Fix bug in authentication module", |
| } |
| } |
| ) |
|
|