AutomaticDatavisualization / feature_select.py
RahulParajuli12's picture
Added application file
2735152
import pandas as pd
import streamlit as st
import plotly.express as px
import os
def feature_importance_plot(model,names):
importance = {}
for i,j in zip(names, list(model.feature_importances_)):
importance[i] = j
feature_importance = dict(sorted(importance.items(), key=lambda item: item[1], reverse=True))
plot_df = pd.DataFrame(feature_importance.items(), columns=["Features", "Importance"])
fig = px.bar(plot_df,
x = "Features",
y = "Importance")
st.plotly_chart(fig)
# Feature Importance with Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
def random_forest_classifier(X,Y,col_names):
model = RandomForestClassifier(n_estimators=100)
model.fit(X, Y)
feature_importance_plot(model,col_names)
# Feature Importance with Extra Trees Classifier
from sklearn.ensemble import ExtraTreesClassifier
def extra_tree_classifier(X,Y,col_names):
model = ExtraTreesClassifier(n_estimators=100)
model.fit(X, Y)
feature_importance_plot(model,col_names)
from xgboost import XGBClassifier
def xgboost(X,Y,col_names):
model = XGBClassifier(random_state = 0)
model.fit(X, Y)
feature_importance_plot(model,col_names)
# primary interface for the App
def st_feature_selection():
df = pd.read_csv("temp_data/test.csv")
# drop object/string containing columns
df_without_obj = df.select_dtypes(exclude=['object'])
# add the label column once again
# df = pd.concat([df_without_obj, df["os"]], axis=1)
consider_features = st.sidebar.selectbox(
'Choose No. of Target Features', ["All", "Select Features"])
if consider_features == "All":
col_names = list(df.columns)
if consider_features == "Select Features":
col_names = []
feature_list = list(df.columns)
for col_name in feature_list:
check_box = st.sidebar.checkbox(col_name)
if check_box:
col_names.append(col_name)
df = df[col_names]
st.write(df)
# considering the last column as class labels
array = df.values
X = array[:,0:len(col_names)-1]
Y = array[:,len(col_names)-1]
select_method = st.sidebar.selectbox(
'Select Feature Selection Method', ["Random Forest", "ExtraTree", "XGBoost"])
if select_method == "Random Forest":
try:
random_forest_classifier(X,Y,col_names)
except Exception as e:
st.write(e)
if select_method == "ExtraTree":
try:
extra_tree_classifier(X,Y,col_names)
except Exception as e:
st.write(e)
if select_method == "XGBoost":
try:
xgboost(X,Y,col_names)
except Exception as e:
st.write(e)