Text Classification
Joblib
Scikit-learn
English
scikit-learn
sklearn-logistic-regression
document-classification
binary-classification
legal-documents
hoa
property-management
ccr
declaration-of-covenants
logistic-regression
Eval Results (legacy)
Instructions to use GoverningDocs/ccr-binary-logreg with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use GoverningDocs/ccr-binary-logreg with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("GoverningDocs/ccr-binary-logreg", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| { | |
| "model_type": "sklearn-logistic-regression", | |
| "embedding_model": "openai-text-embedding-3-small", | |
| "embedding_dim": 1536, | |
| "max_pages_per_doc": 20, | |
| "skip_boilerplate": true, | |
| "operating_threshold": 0.436, | |
| "decision_band": { | |
| "reject_below": 0.25, | |
| "fast_pass_at_or_above": 0.55, | |
| "escalate_between": "(0.25, 0.55)", | |
| "note": "Recalibrated empirically on production score distribution. Plan-time (0.30, 0.85) left FAST_PASS empty." | |
| }, | |
| "training_data": { | |
| "source": "setfit_experiments PostgreSQL DB + multi-signal Phase 0 relabeling", | |
| "n_pages": 7129, | |
| "n_documents": 465, | |
| "binary_class_balance": { | |
| "positive": 3014, | |
| "negative": 4115 | |
| }, | |
| "split": { | |
| "train": 298, | |
| "val": 64, | |
| "test": 65, | |
| "train_pos": 201, | |
| "val_pos": 39, | |
| "test_pos": 47 | |
| } | |
| }, | |
| "test_metrics": { | |
| "name": "logreg_tuned (TEST set)", | |
| "threshold": 0.4359872072086175, | |
| "accuracy": 0.9076923076923077, | |
| "f1": 0.94, | |
| "roc_auc": 0.9550827423167849, | |
| "brier_score": 0.13433461274707392, | |
| "ece": 0.27835753511850964, | |
| "confusion_matrix": [ | |
| [ | |
| 12, | |
| 6 | |
| ], | |
| [ | |
| 0, | |
| 47 | |
| ] | |
| ] | |
| }, | |
| "validation_metrics": { | |
| "name": "LogReg @ best-threshold", | |
| "threshold": 0.4359872072086175, | |
| "accuracy": 0.859375, | |
| "f1": 0.8941176470588236, | |
| "roc_auc": 0.8748717948717949, | |
| "brier_score": 0.15576505514468417, | |
| "ece": 0.19068488965598734, | |
| "confusion_matrix": [ | |
| [ | |
| 17, | |
| 8 | |
| ], | |
| [ | |
| 1, | |
| 38 | |
| ] | |
| ] | |
| }, | |
| "candidates_compared": [ | |
| "logreg_05", | |
| "logreg_tuned", | |
| "logreg_platt_05", | |
| "logreg_platt_tuned", | |
| "mlp_05", | |
| "mlp_tuned" | |
| ], | |
| "winner_selection_rule": "max F1 across LogReg, LogReg+Platt, MLP at best threshold; simplicity tiebreak to LogReg", | |
| "calibrator": { | |
| "filename": "ccr_binary_isotonic_calibrator.joblib", | |
| "method": "isotonic_prefit", | |
| "fit_split_seed": 42, | |
| "fit_split_size": 64, | |
| "test_ece_before": 0.27835753511850964, | |
| "test_ece_after": 0.08663491157117499, | |
| "shipped_model_filename": "ccr_binary_logreg_tuned.joblib", | |
| "note": "Optional artifact. Produces approximately 3 plateau outputs (0.737, 0.833, 1.000) due to small (~70-doc) calibration set. Treat as 3-level confidence rather than fine-grained probability. See ISOTONIC_CALIBRATION_FINDINGS.md." | |
| } | |
| } |