DouDou
Upload data2/instruction_generation/schemas.py with huggingface_hub
2116622 verified
"""
Pydantic data structure definitions for LLM structured output
"""
from pydantic import BaseModel, Field
from typing import List, Optional
class READMESummary(BaseModel):
"""Structured output for README summary"""
project_overview: str = Field(description="One-sentence project overview")
main_features: str = Field(description="Main features list (bullet points)")
dependencies: str = Field(description="Dependencies and environment requirements")
how_to_run: str = Field(description="How to run/install")
directory_structure: str = Field(description="Directory structure highlights")
scientific_computing_related: Optional[str] = Field(
default=None,
description="Scientific computing/chemistry related features (if any)"
)
typical_input_output: Optional[str] = Field(
default=None,
description="Typical input/output examples (if any)"
)
notes: Optional[str] = Field(
default=None,
description="Notes or special instructions"
)
class FunctionInfo(BaseModel):
"""Function information"""
function_name: str = Field(description="Function name")
function_start_line: int = Field(description="Function start line number (1-indexed, inclusive)")
function_end_line: int = Field(description="Function end line number (1-indexed, inclusive)")
function_body: str = Field(description="Complete function code body")
doc_start_line: Optional[int] = Field(
default=None,
description="Documentation comment start line number (if any, 1-indexed, inclusive)"
)
doc_end_line: Optional[int] = Field(
default=None,
description="Documentation comment end line number (if any, 1-indexed, inclusive)"
)
class FileParseResult(BaseModel):
"""Parse result for a single code file"""
language: str = Field(description="Programming language (e.g., python, cpp, java)")
file_path: str = Field(description="Relative file path")
dependencies: List[str] = Field(
default_factory=list,
description="File-level dependency list (import/include/use/require, etc.)"
)
functions: List[FunctionInfo] = Field(
default_factory=list,
description="List of all functions in the file"
)