| | |
| | """Schema Detector Plugin""" |
| | import pandas as pd |
| | from typing import Dict, Any |
| |
|
| | class SchemaDetector: |
| | """Detects and reports data schema.""" |
| | def get_schema(self, df: pd.DataFrame) -> Dict[str, Any]: |
| | schema = {} |
| | for col in df.columns: |
| | dtype = str(df[col].dtype) |
| | if pd.api.types.is_numeric_dtype(df[col]): |
| | base_type = "Numeric" |
| | elif pd.api.types.is_datetime64_any_dtype(df[col]): |
| | base_type = "Datetime" |
| | elif df[col].nunique() < min(10, len(df) / 5): |
| | base_type = "Categorical" |
| | else: |
| | base_type = "Text/Object" |
| | schema[col] = { |
| | "inferred_type": base_type, |
| | "pandas_dtype": dtype, |
| | "non_null_count": int(df[col].count()), |
| | "unique_values": int(df[col].nunique()) |
| | } |
| | return schema |
| |
|