Spaces:
Sleeping
Sleeping
File size: 5,749 Bytes
6039e0a e78a19b 6039e0a e78a19b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import pandas as pd
import streamlit as st
import joblib
from pathlib import Path
st.set_page_config(page_title='Star System Classification (LightGBM)', page_icon='🪐', layout='centered')
BASE_DIR = Path(__file__).resolve().parent
MODEL_PATH = BASE_DIR / 'lightgbm_model.pkl'
FEATURES_PATH = BASE_DIR / 'featurer.pkl' # you saved it with this name
PLANET_ENCODER_PATH = BASE_DIR / 'planet_encoder.pkl'
STAR_ENCODER_PATH = BASE_DIR / 'star_encoder.pkl'
# --- Fixed mapping you used in training ---
ACTIVITY_MAP = {'Low': 0, 'Medium': 1, 'High': 2}
# Optional: label names (edit if your competition uses different names)
LABEL_NAMES = {
0: 'Habitable',
1: 'Young',
2: 'Old',
3: 'Exotic'
}
@st.cache_resource
def load_artifacts():
missing = [p.name for p in [MODEL_PATH, FEATURES_PATH, PLANET_ENCODER_PATH, STAR_ENCODER_PATH] if not p.exists()]
if missing:
raise FileNotFoundError(
'Missing files in repo root: ' + ', '.join(missing) +
'\n\nMake sure these files are in the same folder as app.py:\n'
'- lightgbm_model.pkl\n- featurer.pkl\n- planet_encoder.pkl\n- star_encoder.pkl'
)
model = joblib.load(MODEL_PATH)
features = joblib.load(FEATURES_PATH)
le_planet = joblib.load(PLANET_ENCODER_PATH)
le_star = joblib.load(STAR_ENCODER_PATH)
return model, features, le_planet, le_star
def safe_transform(le, value: str, col_name: str) -> int:
"""Transform a single category value with a saved LabelEncoder.
If unseen value appears, show a helpful error."""
try:
return int(le.transform([value])[0])
except Exception:
known = list(getattr(le, 'classes_', []))
st.error(f'Unknown category for {col_name}: {value}. Known values: {known}')
st.stop()
model, FEATURES, le_planet, le_star = load_artifacts()
st.title('🪐 Star System Classification (LightGBM)')
st.write('Predict the star system type using 10 astrophysical measurements (multiclass).')
with st.expander('ℹ️ Required files in this folder', expanded=False):
st.code(
'app.py\n'
'lightgbm_model.pkl\n'
'featurer.pkl\n'
'planet_encoder.pkl\n'
'star_encoder.pkl\n'
'requirements.txt'
)
st.subheader('Enter feature values')
# --- Inputs ---
# Numeric
star_size = st.number_input('star_size', min_value=0.0, value=1.0, step=0.01)
star_brightness = st.number_input('star_brightness', min_value=0.0, value=1.2, step=0.01)
distance_from_earth = st.number_input('distance_from_earth', min_value=0.0, value=90.0, step=1.0)
star_mass = st.number_input('star_mass', min_value=0.0, value=1.3, step=0.01)
metallicity = st.number_input('metallicity', value=0.02, step=0.001, format='%.4f')
# Discrete numeric / encoded-like
galaxy_region = st.selectbox('galaxy_region', options=[0, 1, 2], index=1)
galaxy_type = st.selectbox('galaxy_type', options=[0, 1, 2], index=0)
# Categorical (original strings)
star_spectral_class = st.selectbox(
'star_spectral_class',
options=list(le_star.classes_),
index=0
)
planet_configuration = st.selectbox(
'planet_configuration',
options=list(le_planet.classes_),
index=0
)
stellar_activity_class = st.selectbox(
'stellar_activity_class',
options=['Low', 'Medium', 'High'],
index=0
)
# --- Build row in the ORIGINAL feature space ---
row = {
'star_size': float(star_size),
'star_brightness': float(star_brightness),
'galaxy_region': int(galaxy_region),
'distance_from_earth': float(distance_from_earth),
'galaxy_type': int(galaxy_type),
'star_spectral_class': star_spectral_class,
'planet_configuration': planet_configuration,
'stellar_activity_class': stellar_activity_class,
'star_mass': float(star_mass),
'metallicity': float(metallicity),
}
# --- Apply same preprocessing as training ---
# Mapping for activity (ordinal)
row['stellar_activity_class'] = ACTIVITY_MAP[row['stellar_activity_class']]
# LabelEncoders for the other two categorical columns
row['planet_configuration'] = safe_transform(le_planet, planet_configuration, 'planet_configuration')
row['star_spectral_class'] = safe_transform(le_star, star_spectral_class, 'star_spectral_class')
# Make DataFrame and enforce correct column order
X_input = pd.DataFrame([row])
# Ensure all expected feature columns exist
missing_cols = [c for c in FEATURES if c not in X_input.columns]
extra_cols = [c for c in X_input.columns if c not in FEATURES]
if missing_cols:
st.error(f'Missing columns for model: {missing_cols}')
st.stop()
if extra_cols:
# Not an error, but we will drop extras to be safe
X_input = X_input.drop(columns=extra_cols)
X_input = X_input[FEATURES]
st.divider()
col1, col2 = st.columns(2)
with col1:
if st.button('🔮 Predict', use_container_width=True):
pred = model.predict(X_input)[0]
pred_int = int(pred)
label = LABEL_NAMES.get(pred_int, str(pred_int))
st.success(f'Prediction: **{label}** (class {pred_int})')
with col2:
if st.button('📊 Predict probabilities', use_container_width=True):
if hasattr(model, 'predict_proba'):
proba = model.predict_proba(X_input)[0]
proba_df = pd.DataFrame({'class': list(range(len(proba))), 'probability': proba}).sort_values('probability', ascending=False)
proba_df['label'] = proba_df['class'].map(LABEL_NAMES).fillna(proba_df['class'].astype(str))
st.dataframe(proba_df[['label', 'class', 'probability']], use_container_width=True)
else:
st.warning('This model does not support predict_proba().')
st.caption('Tip: If predictions look wrong, ensure the same encoders and feature order are used as during training.') |