File size: 2,919 Bytes
2735152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd
import streamlit as st
import plotly.express as px
import os

def feature_importance_plot(model,names):
    importance = {}

    for i,j in zip(names, list(model.feature_importances_)):
        importance[i] = j

    feature_importance = dict(sorted(importance.items(), key=lambda item: item[1], reverse=True))
    plot_df = pd.DataFrame(feature_importance.items(), columns=["Features", "Importance"])
    fig = px.bar(plot_df,
                    x = "Features",
                    y = "Importance")
    st.plotly_chart(fig)


# Feature Importance with Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

def random_forest_classifier(X,Y,col_names):
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X, Y)
    
    feature_importance_plot(model,col_names)

    
    
# Feature Importance with Extra Trees Classifier
from sklearn.ensemble import ExtraTreesClassifier

def extra_tree_classifier(X,Y,col_names):
    model = ExtraTreesClassifier(n_estimators=100)
    model.fit(X, Y)
    
    feature_importance_plot(model,col_names)
    


from xgboost import XGBClassifier

def xgboost(X,Y,col_names):
    model = XGBClassifier(random_state = 0)
    model.fit(X, Y)

    feature_importance_plot(model,col_names)

    


# primary interface for the App
def st_feature_selection():
    df = pd.read_csv("temp_data/test.csv")
    # drop object/string containing columns
    df_without_obj = df.select_dtypes(exclude=['object'])
    # add the label column once again
    # df = pd.concat([df_without_obj, df["os"]], axis=1)

    consider_features = st.sidebar.selectbox(
        'Choose No. of Target Features', ["All", "Select Features"])

    if consider_features == "All":
        col_names = list(df.columns)
    if consider_features == "Select Features":
        col_names = []
        feature_list = list(df.columns)
        for col_name in feature_list:
            check_box = st.sidebar.checkbox(col_name)
            if check_box:
                col_names.append(col_name)
    

    df = df[col_names]
    st.write(df)

    # considering the last column as class labels
    array = df.values
    X = array[:,0:len(col_names)-1]
    Y = array[:,len(col_names)-1]

    select_method = st.sidebar.selectbox(
        'Select Feature Selection Method', ["Random Forest", "ExtraTree", "XGBoost"])


    if select_method == "Random Forest":
        try:
            random_forest_classifier(X,Y,col_names)
        except Exception as e:
            st.write(e)

    if select_method == "ExtraTree":
        try:
            extra_tree_classifier(X,Y,col_names)
        except Exception as e:
            st.write(e)

    if select_method == "XGBoost":
        try:
            xgboost(X,Y,col_names)
        except Exception as e:
            st.write(e)