| ''' |
| Author : Rupesh Garsondiya |
| github : @Rupeshgarsondiya |
| Organization : L.J university |
| |
| ''' |
|
|
| |
|
|
| |
|
|
| import pandas as pd |
| import numpy as np |
| import streamlit as st |
| from sklearn.preprocessing import OneHotEncoder,StandardScaler |
| from sklearn.model_selection import train_test_split |
| from sklearn.pipeline import Pipeline,make_pipeline |
| from sklearn.compose import ColumnTransformer |
|
|
|
|
| '''create class FeatureEngineering is created to perform feature engineering on the dataset''' |
| class FeatureEngineering: |
|
|
| def __init__(self): |
| pass |
|
|
| def cleandata(self): |
| data = pd.read_csv('Data/user_behavior_dataset.csv') |
| |
| data.drop('User ID',axis=1,inplace=True) |
|
|
| '''Rename column name''' |
| data.rename(columns={'Device Model':'P_Model','Operating System':'OS','App Usage Time (min/day)':'App_Time(hours/day)', |
| 'Screen On Time (hours/day)':'(hours/Screen_timeday)','Battery Drain (mAh/day)':'Battery_Drain(mAh/day)', |
| 'Number of Apps Installed':'Installed_app','Data Usage (MB/day)':'Data_Usage(GB/day)'},inplace=True) |
| |
| |
| data['App_Time(hours/day)']=data['App_Time(hours/day)']/60 |
|
|
| |
| data['Data_Usage(GB/day)']=data['Data_Usage(GB/day)']/1024 |
|
|
| return data |
| |
| def get_clean_data(self): |
| df = FeatureEngineering().cleandata() |
| print(df.head()) |
|
|
| X = df.drop('User Behavior Class', axis=1) |
| y = df['User Behavior Class'] |
|
|
| x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2) |
| |
| categorical_col = ['P_Model','OS','Gender'] |
| categorical_transform = OneHotEncoder() |
|
|
| numerical_col = ['Battery_Drain(mAh/day)'] |
| numerical_transform = StandardScaler() |
|
|
| |
| preprocessor = ColumnTransformer( |
| transformers=[ |
| |
| ('cat', categorical_transform, categorical_col) |
| ],remainder='passthrough') |
| |
| |
| pipeline = Pipeline(steps=[('preprocessor', preprocessor)]) |
| pipeline.fit(x_train) |
| pipeline.fit(x_test) |
| x_train_t = pipeline.transform(x_train) |
| x_test_t = pipeline.transform(x_test) |
| |
|
|
| return x_train_t,x_test_t,y_train,y_test,pipeline |
| |
|
|
|
|
|
|
|
|