| from sklearn import model_selection, preprocessing |
| from traning_zone.traitement_data.feature_engeneering.data_clearning import * |
|
|
| import os |
| import joblib |
|
|
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn import preprocessing |
|
|
|
|
| def engineering(data, classe): |
|
|
| try : |
| os.mkdir(f'traning_zone/mini_modèles/{classe}') |
| try : |
| os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers') |
| except FileExistsError: |
| pass |
| except FileExistsError: |
| try : |
| os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers') |
| except FileExistsError: |
| pass |
|
|
| X = data.DESCRIPTION |
| Y = data.BEM_CLASS_DESC_FR |
|
|
| object = preprocessing.LabelEncoder() |
| object.fit(Y) |
| Y = object.transform(Y) |
|
|
| Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y) |
|
|
| print(set(Ytrain)) |
| print(set(Ytest)) |
| tv = TfidfVectorizer() |
| tv.fit(X) |
| tv_xtrain = tv.transform(Xtrain) |
| tv_xtest = tv.transform(Xtest) |
|
|
| joblib.dump(tv, open(f"traning_zone/mini_modèles/{classe}/transformers/tv_transform.pkl", 'wb')) |
| joblib.dump(object, open(f"traning_zone/mini_modèles/{classe}/transformers/object.pkl", 'wb')) |
|
|
| return tv_xtrain, tv_xtest, Ytrain, Ytest |
|
|
|
|
|
|
| def engineering_modele(data): |
|
|
| try : |
| os.mkdir(f'traning_zone/modèles') |
| try : |
| os.mkdir(f'traning_zone/modèles/transformers') |
| except FileExistsError: |
| pass |
| except FileExistsError: |
| try : |
| os.mkdir(f'traning_zone/modèles/transformers') |
| except FileExistsError: |
| pass |
|
|
| X = data.DESCRIPTION |
| Y = data.Regroupement_de_Class |
|
|
| object = preprocessing.LabelEncoder() |
| object.fit(Y) |
| Y = object.transform(Y) |
|
|
| Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y) |
|
|
| print(set(Ytrain)) |
| print(set(Ytest)) |
| tv = TfidfVectorizer() |
| tv.fit(X) |
| tv_xtrain = tv.transform(Xtrain) |
| tv_xtest = tv.transform(Xtest) |
|
|
| joblib.dump(tv, open(f"traning_zone/modèles/transformers/tv_transform.pkl", 'wb')) |
| joblib.dump(object, open(f"traning_zone/modèles/transformers/object.pkl", 'wb')) |
|
|
| return tv_xtrain, tv_xtest, Ytrain, Ytest |