Commit Β·
ebbb600
1
Parent(s): c3c3195
Update Streamlit app to support deployed backend with fallback to localhost
Browse files
app.py
CHANGED
|
@@ -54,48 +54,67 @@ st.markdown("""
|
|
| 54 |
</style>
|
| 55 |
""", unsafe_allow_html=True)
|
| 56 |
|
| 57 |
-
# API base
|
| 58 |
-
|
|
|
|
| 59 |
|
| 60 |
def check_backend_health():
|
| 61 |
-
"""Check if backend is running"""
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
def upload_dataset(uploaded_file):
|
| 69 |
"""Upload dataset to backend"""
|
|
|
|
|
|
|
|
|
|
| 70 |
try:
|
| 71 |
files = {'file': uploaded_file}
|
| 72 |
-
response = requests.post(f"{
|
| 73 |
if response.status_code == 200:
|
| 74 |
-
return response.json()
|
| 75 |
else:
|
| 76 |
-
return None
|
| 77 |
except Exception as e:
|
| 78 |
-
|
| 79 |
-
return None
|
| 80 |
|
| 81 |
-
def process_pipeline(
|
| 82 |
-
"""Process dataset
|
|
|
|
|
|
|
|
|
|
| 83 |
try:
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
response = requests.post(f"{API_BASE}/process-pipeline", files=files, data=data, timeout=120)
|
| 87 |
if response.status_code == 200:
|
| 88 |
-
return response.json()
|
| 89 |
else:
|
| 90 |
-
|
| 91 |
-
return None
|
| 92 |
except Exception as e:
|
| 93 |
-
|
| 94 |
-
return None
|
| 95 |
|
| 96 |
def download_file(filename):
|
| 97 |
-
"""
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
def main():
|
| 101 |
# Main header
|
|
@@ -103,16 +122,21 @@ def main():
|
|
| 103 |
st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True)
|
| 104 |
|
| 105 |
# Check backend health
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
| 108 |
st.code("""
|
| 109 |
cd promptprepml/backend
|
| 110 |
venv\\Scripts\\activate
|
| 111 |
python app/main.py
|
|
|
|
|
|
|
| 112 |
""")
|
|
|
|
| 113 |
return
|
| 114 |
|
| 115 |
-
st.success("β
Backend
|
| 116 |
|
| 117 |
# Sidebar for navigation
|
| 118 |
st.sidebar.title("π Processing Steps")
|
|
@@ -120,248 +144,179 @@ python app/main.py
|
|
| 120 |
# Initialize session state
|
| 121 |
if 'step' not in st.session_state:
|
| 122 |
st.session_state.step = 'upload'
|
| 123 |
-
if '
|
| 124 |
-
st.session_state.
|
| 125 |
-
if '
|
| 126 |
-
st.session_state.
|
| 127 |
-
if 'uploaded_file' not in st.session_state:
|
| 128 |
-
st.session_state.uploaded_file = None
|
| 129 |
|
| 130 |
# Step indicators
|
| 131 |
-
steps = ['π€ Upload
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
| 139 |
|
|
|
|
| 140 |
for i, step in enumerate(steps):
|
| 141 |
-
if i <
|
| 142 |
-
st.sidebar.
|
|
|
|
|
|
|
| 143 |
else:
|
| 144 |
-
st.sidebar.
|
| 145 |
|
| 146 |
-
#
|
| 147 |
if st.session_state.step == 'upload':
|
| 148 |
-
st.markdown('<h2 class="step-header">π€
|
| 149 |
|
| 150 |
-
# File upload
|
| 151 |
uploaded_file = st.file_uploader(
|
| 152 |
"Choose a CSV file",
|
| 153 |
type=['csv'],
|
| 154 |
-
help="Upload your dataset
|
| 155 |
)
|
| 156 |
|
| 157 |
if uploaded_file is not None:
|
| 158 |
-
|
| 159 |
-
st.markdown('<div class="info-box">', unsafe_allow_html=True)
|
| 160 |
-
st.write(f"**Filename:** {uploaded_file.name}")
|
| 161 |
-
st.write(f"**Size:** {uploaded_file.size / 1024 / 1024:.2f} MB")
|
| 162 |
-
st.markdown('</div>', unsafe_allow_html=True)
|
| 163 |
|
| 164 |
-
#
|
| 165 |
try:
|
| 166 |
df = pd.read_csv(uploaded_file)
|
| 167 |
-
st.
|
| 168 |
-
st.
|
| 169 |
-
st.
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
with st.spinner("Uploading dataset..."):
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
if result:
|
| 179 |
-
st.session_state.dataset_info = result
|
| 180 |
-
st.session_state.uploaded_file = uploaded_file # Store the file
|
| 181 |
-
st.session_state.step = 'prompt'
|
| 182 |
-
st.rerun()
|
| 183 |
else:
|
| 184 |
-
st.
|
| 185 |
-
|
|
|
|
| 186 |
except Exception as e:
|
| 187 |
-
st.error(f"Error reading
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
|
|
|
| 191 |
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
info = st.session_state.dataset_info['dataset_info']
|
| 195 |
st.markdown('<div class="info-box">', unsafe_allow_html=True)
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
st.metric("Rows", info['shape'][0])
|
| 199 |
-
with col2:
|
| 200 |
-
st.metric("Columns", info['shape'][1])
|
| 201 |
-
with col3:
|
| 202 |
-
st.metric("Missing Values", sum(info['missing_values'].values()))
|
| 203 |
-
with col4:
|
| 204 |
-
st.metric("Duplicates", info['duplicates'])
|
| 205 |
st.markdown('</div>', unsafe_allow_html=True)
|
| 206 |
-
|
| 207 |
-
# Show file info
|
| 208 |
-
if st.session_state.uploaded_file:
|
| 209 |
-
st.info(f"π File loaded: {st.session_state.uploaded_file.name} ({st.session_state.uploaded_file.size / 1024 / 1024:.2f} MB)")
|
| 210 |
-
|
| 211 |
-
# Prompt input
|
| 212 |
-
st.write("**Enter your preprocessing instructions in natural language:**")
|
| 213 |
|
| 214 |
-
#
|
| 215 |
-
example_prompts = [
|
| 216 |
-
"Prepare this dataset for fraud classification, handle missing values, encode categorical variables, remove outliers, and scale numeric features.",
|
| 217 |
-
"Clean this dataset for customer churn prediction, fill missing values with median, one-hot encode categories, and apply standard scaling.",
|
| 218 |
-
"Preprocess data for regression analysis, handle null values, remove duplicates, and normalize numerical features.",
|
| 219 |
-
"Get this dataset ready for machine learning, handle missing data, encode categorical variables, and scale features.",
|
| 220 |
-
"Analyze this customer dataset and prepare it for machine learning. Remove duplicate rows and unnecessary identifier columns. Handle missing values appropriately. Encode categorical variables such as country, city, and company. Extract useful features from the subscription date. Scale any numerical features if present. Remove low-variance features and prepare the dataset for clustering or classification."
|
| 221 |
-
]
|
| 222 |
-
|
| 223 |
-
# Prompt text area
|
| 224 |
prompt = st.text_area(
|
| 225 |
-
"
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
help="Describe
|
| 229 |
)
|
| 230 |
|
| 231 |
-
|
| 232 |
-
with
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
st.rerun()
|
| 237 |
-
st.write(f"{i}. {example}")
|
| 238 |
-
|
| 239 |
-
# Supported operations info
|
| 240 |
-
with st.expander("π§ Supported Operations"):
|
| 241 |
-
st.write("""
|
| 242 |
-
**Missing Values:**
|
| 243 |
-
- Mean/median/mode imputation
|
| 244 |
-
- Constant value filling
|
| 245 |
-
- Row deletion
|
| 246 |
-
|
| 247 |
-
**Categorical Encoding:**
|
| 248 |
-
- One-hot encoding
|
| 249 |
-
- Label encoding
|
| 250 |
-
|
| 251 |
-
**Feature Scaling:**
|
| 252 |
-
- Standard scaling (Z-score)
|
| 253 |
-
- Min-max scaling
|
| 254 |
-
- Robust scaling
|
| 255 |
-
|
| 256 |
-
**Outlier Detection:**
|
| 257 |
-
- Isolation Forest
|
| 258 |
-
- IQR method
|
| 259 |
-
- Z-score method
|
| 260 |
-
|
| 261 |
-
**Feature Engineering:**
|
| 262 |
-
- Variance threshold selection
|
| 263 |
-
- Correlation filtering
|
| 264 |
-
- Interaction features
|
| 265 |
-
""")
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
else:
|
| 280 |
-
st.warning("No file found. Please upload your dataset again.")
|
| 281 |
|
|
|
|
| 282 |
elif st.session_state.step == 'results':
|
| 283 |
-
st.markdown('<h2 class="step-header">
|
| 284 |
|
| 285 |
-
if st.session_state.
|
| 286 |
-
|
| 287 |
|
| 288 |
# Success message
|
| 289 |
st.markdown('<div class="success-box">', unsafe_allow_html=True)
|
| 290 |
-
st.success("β
|
| 291 |
st.markdown('</div>', unsafe_allow_html=True)
|
| 292 |
|
| 293 |
-
#
|
| 294 |
-
st.
|
| 295 |
-
info = results['dataset_info']['basic_info']
|
| 296 |
-
col1, col2, col3, col4 = st.columns(4)
|
| 297 |
-
with col1:
|
| 298 |
-
st.metric("Original Shape", f"{info['shape'][0]} Γ {info['shape'][1]}")
|
| 299 |
-
with col2:
|
| 300 |
-
st.metric("Numeric Columns", len(info['numeric_columns']))
|
| 301 |
-
with col3:
|
| 302 |
-
st.metric("Categorical Columns", len(info['categorical_columns']))
|
| 303 |
-
with col4:
|
| 304 |
-
missing_total = sum(results['dataset_info']['missing_values']['counts'].values())
|
| 305 |
-
st.metric("Missing Values", missing_total)
|
| 306 |
-
|
| 307 |
-
# Applied preprocessing steps
|
| 308 |
-
st.write("### π§ Applied Preprocessing Steps")
|
| 309 |
-
for i, step in enumerate(results['preprocessing_steps'], 1):
|
| 310 |
-
st.markdown(f"""
|
| 311 |
-
<div style="padding: 1rem; margin: 0.5rem 0; background-color: #f8fafc; border-left: 4px solid #3b82f6; border-radius: 0.25rem;">
|
| 312 |
-
<strong>Step {i}:</strong> {step['description']}<br>
|
| 313 |
-
<small>Method: {step.get('method', 'N/A')}</small>
|
| 314 |
-
</div>
|
| 315 |
-
""", unsafe_allow_html=True)
|
| 316 |
-
|
| 317 |
-
# Download files
|
| 318 |
-
st.write("### π Download Files")
|
| 319 |
-
|
| 320 |
-
files_to_download = [
|
| 321 |
-
("processed_dataset.csv", "π Processed Dataset", "Fully preprocessed dataset ready for ML"),
|
| 322 |
-
("train.csv", "π Training Set", "80% of data for model training"),
|
| 323 |
-
("test.csv", "π§ͺ Test Set", "20% of data for model testing"),
|
| 324 |
-
("pipeline.pkl", "βοΈ Pipeline", "Scikit-learn pipeline for reuse"),
|
| 325 |
-
("eda_report.html", "π EDA Report", "Exploratory Data Analysis report")
|
| 326 |
-
]
|
| 327 |
-
|
| 328 |
-
col1, col2 = st.columns(2)
|
| 329 |
-
for i, (filename, title, description) in enumerate(files_to_download):
|
| 330 |
-
with col1 if i % 2 == 0 else col2:
|
| 331 |
-
st.markdown(f"""
|
| 332 |
-
<div style="padding: 1rem; margin: 0.5rem 0; border: 1px solid #e5e7eb; border-radius: 0.5rem;">
|
| 333 |
-
<h4>{title}</h4>
|
| 334 |
-
<p><small>{description}</small></p>
|
| 335 |
-
<a href="{download_file(filename)}" download="{filename}" style="text-decoration: none;">
|
| 336 |
-
<button style="background-color: #3b82f6; color: white; padding: 0.5rem 1rem; border: none; border-radius: 0.25rem; cursor: pointer;">
|
| 337 |
-
π₯ Download {filename}
|
| 338 |
-
</button>
|
| 339 |
-
</a>
|
| 340 |
-
</div>
|
| 341 |
-
""", unsafe_allow_html=True)
|
| 342 |
-
|
| 343 |
-
# Quick actions
|
| 344 |
-
st.write("### β‘ Quick Actions")
|
| 345 |
-
col1, col2, col3 = st.columns(3)
|
| 346 |
|
| 347 |
with col1:
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
with col2:
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
# Footer
|
| 367 |
st.markdown("---")
|
|
|
|
| 54 |
</style>
|
| 55 |
""", unsafe_allow_html=True)
|
| 56 |
|
| 57 |
+
# API base URLs - try deployed backend first, fallback to localhost
|
| 58 |
+
DEPLOYED_BACKEND = "https://promptprepml-backend.railway.app"
|
| 59 |
+
LOCAL_BACKEND = "http://localhost:8000"
|
| 60 |
|
| 61 |
def check_backend_health():
|
| 62 |
+
"""Check if backend is running (try deployed first, then local)"""
|
| 63 |
+
backends = [DEPLOYED_BACKEND, LOCAL_BACKEND]
|
| 64 |
+
|
| 65 |
+
for backend_url in backends:
|
| 66 |
+
try:
|
| 67 |
+
response = requests.get(f"{backend_url}/health", timeout=5)
|
| 68 |
+
if response.status_code == 200:
|
| 69 |
+
st.session_state.backend_url = backend_url
|
| 70 |
+
return True, backend_url
|
| 71 |
+
except:
|
| 72 |
+
continue
|
| 73 |
+
return False, None
|
| 74 |
|
| 75 |
def upload_dataset(uploaded_file):
|
| 76 |
"""Upload dataset to backend"""
|
| 77 |
+
if 'backend_url' not in st.session_state:
|
| 78 |
+
return None, "Backend not connected"
|
| 79 |
+
|
| 80 |
try:
|
| 81 |
files = {'file': uploaded_file}
|
| 82 |
+
response = requests.post(f"{st.session_state.backend_url}/api/upload", files=files)
|
| 83 |
if response.status_code == 200:
|
| 84 |
+
return response.json(), None
|
| 85 |
else:
|
| 86 |
+
return None, f"Upload failed: {response.text}"
|
| 87 |
except Exception as e:
|
| 88 |
+
return None, f"Upload error: {str(e)}"
|
|
|
|
| 89 |
|
| 90 |
+
def process_pipeline(file_path, prompt):
|
| 91 |
+
"""Process dataset through ML pipeline"""
|
| 92 |
+
if 'backend_url' not in st.session_state:
|
| 93 |
+
return None, "Backend not connected"
|
| 94 |
+
|
| 95 |
try:
|
| 96 |
+
data = {'file_path': file_path, 'prompt': prompt}
|
| 97 |
+
response = requests.post(f"{st.session_state.backend_url}/process-pipeline", json=data)
|
|
|
|
| 98 |
if response.status_code == 200:
|
| 99 |
+
return response.json(), None
|
| 100 |
else:
|
| 101 |
+
return None, f"Processing failed: {response.text}"
|
|
|
|
| 102 |
except Exception as e:
|
| 103 |
+
return None, f"Processing error: {str(e)}"
|
|
|
|
| 104 |
|
| 105 |
def download_file(filename):
|
| 106 |
+
"""Download processed file"""
|
| 107 |
+
if 'backend_url' not in st.session_state:
|
| 108 |
+
return None, "Backend not connected"
|
| 109 |
+
|
| 110 |
+
try:
|
| 111 |
+
response = requests.get(f"{st.session_state.backend_url}/api/download/{filename}")
|
| 112 |
+
if response.status_code == 200:
|
| 113 |
+
return response.content, None
|
| 114 |
+
else:
|
| 115 |
+
return None, f"Download failed: {response.text}"
|
| 116 |
+
except Exception as e:
|
| 117 |
+
return None, f"Download error: {str(e)}"
|
| 118 |
|
| 119 |
def main():
|
| 120 |
# Main header
|
|
|
|
| 122 |
st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True)
|
| 123 |
|
| 124 |
# Check backend health
|
| 125 |
+
backend_healthy, backend_url = check_backend_health()
|
| 126 |
+
|
| 127 |
+
if not backend_healthy:
|
| 128 |
+
st.error("β Backend is not running! Please start the backend:")
|
| 129 |
st.code("""
|
| 130 |
cd promptprepml/backend
|
| 131 |
venv\\Scripts\\activate
|
| 132 |
python app/main.py
|
| 133 |
+
|
| 134 |
+
# OR wait for deployed backend to be ready
|
| 135 |
""")
|
| 136 |
+
st.info("π **Deploying backend to cloud...** This will make the app work standalone!")
|
| 137 |
return
|
| 138 |
|
| 139 |
+
st.success(f"β
Backend connected at: {backend_url}")
|
| 140 |
|
| 141 |
# Sidebar for navigation
|
| 142 |
st.sidebar.title("π Processing Steps")
|
|
|
|
| 144 |
# Initialize session state
|
| 145 |
if 'step' not in st.session_state:
|
| 146 |
st.session_state.step = 'upload'
|
| 147 |
+
if 'upload_result' not in st.session_state:
|
| 148 |
+
st.session_state.upload_result = None
|
| 149 |
+
if 'processing_result' not in st.session_state:
|
| 150 |
+
st.session_state.processing_result = None
|
|
|
|
|
|
|
| 151 |
|
| 152 |
# Step indicators
|
| 153 |
+
steps = ['π€ Upload', 'βοΈ Configure', 'π Process', 'π Results']
|
| 154 |
+
current_step_index = 0
|
| 155 |
+
|
| 156 |
+
if st.session_state.step == 'upload':
|
| 157 |
+
current_step_index = 0
|
| 158 |
+
elif st.session_state.step == 'configure':
|
| 159 |
+
current_step_index = 1
|
| 160 |
+
elif st.session_state.step == 'process':
|
| 161 |
+
current_step_index = 2
|
| 162 |
+
elif st.session_state.step == 'results':
|
| 163 |
+
current_step_index = 3
|
| 164 |
|
| 165 |
+
# Display step indicators
|
| 166 |
for i, step in enumerate(steps):
|
| 167 |
+
if i < current_step_index:
|
| 168 |
+
st.sidebar.success(f"β
{step}")
|
| 169 |
+
elif i == current_step_index:
|
| 170 |
+
st.sidebar.info(f"π {step}")
|
| 171 |
else:
|
| 172 |
+
st.sidebar.write(f"β³ {step}")
|
| 173 |
|
| 174 |
+
# Step 1: Upload Dataset
|
| 175 |
if st.session_state.step == 'upload':
|
| 176 |
+
st.markdown('<h2 class="step-header">π€ Step 1: Upload Dataset</h2>', unsafe_allow_html=True)
|
| 177 |
|
|
|
|
| 178 |
uploaded_file = st.file_uploader(
|
| 179 |
"Choose a CSV file",
|
| 180 |
type=['csv'],
|
| 181 |
+
help="Upload your dataset for preprocessing"
|
| 182 |
)
|
| 183 |
|
| 184 |
if uploaded_file is not None:
|
| 185 |
+
st.info(f"π File uploaded: `{uploaded_file.name}`")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
+
# Show file preview
|
| 188 |
try:
|
| 189 |
df = pd.read_csv(uploaded_file)
|
| 190 |
+
st.markdown('<div class="info-box">', unsafe_allow_html=True)
|
| 191 |
+
st.markdown(f"**Dataset Shape:** {df.shape}")
|
| 192 |
+
st.markdown(f"**Columns:** {', '.join(df.columns)}")
|
| 193 |
+
st.dataframe(df.head())
|
| 194 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 195 |
|
| 196 |
+
if st.button("π Continue to Configuration", type="primary"):
|
| 197 |
+
# Upload to backend
|
| 198 |
with st.spinner("Uploading dataset..."):
|
| 199 |
+
result, error = upload_dataset(uploaded_file)
|
| 200 |
+
if error:
|
| 201 |
+
st.error(f"β Upload failed: {error}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
else:
|
| 203 |
+
st.session_state.upload_result = result
|
| 204 |
+
st.session_state.step = 'configure'
|
| 205 |
+
st.rerun()
|
| 206 |
except Exception as e:
|
| 207 |
+
st.error(f"β Error reading file: {str(e)}")
|
| 208 |
|
| 209 |
+
# Step 2: Configure Processing
|
| 210 |
+
elif st.session_state.step == 'configure':
|
| 211 |
+
st.markdown('<h2 class="step-header">βοΈ Step 2: Configure Processing</h2>', unsafe_allow_html=True)
|
| 212 |
|
| 213 |
+
if st.session_state.upload_result:
|
| 214 |
+
file_info = st.session_state.upload_result
|
|
|
|
| 215 |
st.markdown('<div class="info-box">', unsafe_allow_html=True)
|
| 216 |
+
st.markdown(f"**File:** {file_info.get('filename', 'Unknown')}")
|
| 217 |
+
st.markdown(f"**Size:** {file_info.get('size', 'Unknown')} bytes")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
st.markdown('</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
+
# Processing options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
prompt = st.text_area(
|
| 222 |
+
"Describe your preprocessing needs:",
|
| 223 |
+
value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
|
| 224 |
+
height=100,
|
| 225 |
+
help="Describe what you want to do with your dataset in natural language"
|
| 226 |
)
|
| 227 |
|
| 228 |
+
col1, col2 = st.columns([1, 1])
|
| 229 |
+
with col1:
|
| 230 |
+
if st.button("β¬
οΈ Back", type="secondary"):
|
| 231 |
+
st.session_state.step = 'upload'
|
| 232 |
+
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
+
with col2:
|
| 235 |
+
if st.button("π Start Processing", type="primary"):
|
| 236 |
+
if st.session_state.upload_result:
|
| 237 |
+
file_path = st.session_state.upload_result.get('file_path')
|
| 238 |
+
with st.spinner("Processing dataset... This may take a few minutes."):
|
| 239 |
+
result, error = process_pipeline(file_path, prompt)
|
| 240 |
+
if error:
|
| 241 |
+
st.error(f"β Processing failed: {error}")
|
| 242 |
+
else:
|
| 243 |
+
st.session_state.processing_result = result
|
| 244 |
+
st.session_state.step = 'results'
|
| 245 |
+
st.rerun()
|
|
|
|
|
|
|
| 246 |
|
| 247 |
+
# Step 3: Results
|
| 248 |
elif st.session_state.step == 'results':
|
| 249 |
+
st.markdown('<h2 class="step-header">π Step 3: Results</h2>', unsafe_allow_html=True)
|
| 250 |
|
| 251 |
+
if st.session_state.processing_result:
|
| 252 |
+
result = st.session_state.processing_result
|
| 253 |
|
| 254 |
# Success message
|
| 255 |
st.markdown('<div class="success-box">', unsafe_allow_html=True)
|
| 256 |
+
st.success("β
Dataset processed successfully!")
|
| 257 |
st.markdown('</div>', unsafe_allow_html=True)
|
| 258 |
|
| 259 |
+
# Results summary
|
| 260 |
+
col1, col2 = st.columns([2, 1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
with col1:
|
| 263 |
+
st.markdown("### π Processing Summary")
|
| 264 |
+
|
| 265 |
+
dataset_info = result.get('dataset_info', {})
|
| 266 |
+
if dataset_info:
|
| 267 |
+
basic_info = dataset_info.get('basic_info', {})
|
| 268 |
+
st.markdown(f"- **Original Shape:** {basic_info.get('shape', 'Unknown')}")
|
| 269 |
+
st.markdown(f"- **Columns:** {basic_info.get('columns', 'Unknown')}")
|
| 270 |
+
|
| 271 |
+
preprocessing_info = result.get('preprocessing_info', {})
|
| 272 |
+
if preprocessing_info:
|
| 273 |
+
st.markdown(f"- **Processed Shape:** {preprocessing_info.get('processed_shape', 'Unknown')}")
|
| 274 |
+
|
| 275 |
+
# Dataset preview
|
| 276 |
+
st.markdown("### π Dataset Preview")
|
| 277 |
+
preview_data = result.get('preview_data', [])
|
| 278 |
+
if preview_data:
|
| 279 |
+
df_preview = pd.DataFrame(preview_data)
|
| 280 |
+
st.dataframe(df_preview)
|
| 281 |
|
| 282 |
with col2:
|
| 283 |
+
st.markdown("### π₯ Download Files")
|
| 284 |
+
|
| 285 |
+
download_links = [
|
| 286 |
+
("Processed Dataset", "processed_dataset.csv"),
|
| 287 |
+
("Training Set", "train.csv"),
|
| 288 |
+
("Test Set", "test.csv"),
|
| 289 |
+
("Pipeline", "pipeline.pkl"),
|
| 290 |
+
("EDA Report", "eda_report.html")
|
| 291 |
+
]
|
| 292 |
+
|
| 293 |
+
for name, filename in download_links:
|
| 294 |
+
if st.button(f"π₯ {name}", key=f"download_{filename}"):
|
| 295 |
+
with st.spinner(f"Downloading {filename}..."):
|
| 296 |
+
file_content, error = download_file(filename)
|
| 297 |
+
if error:
|
| 298 |
+
st.error(f"β Download failed: {error}")
|
| 299 |
+
else:
|
| 300 |
+
st.download_button(
|
| 301 |
+
label=f"πΎ Save {filename}",
|
| 302 |
+
data=file_content,
|
| 303 |
+
file_name=filename,
|
| 304 |
+
mime="application/octet-stream"
|
| 305 |
+
)
|
| 306 |
|
| 307 |
+
# Action buttons
|
| 308 |
+
col1, col2 = st.columns([1, 1])
|
| 309 |
+
with col1:
|
| 310 |
+
if st.button("π Process New Dataset", type="secondary"):
|
| 311 |
+
# Reset session state
|
| 312 |
+
for key in list(st.session_state.keys()):
|
| 313 |
+
del st.session_state[key]
|
| 314 |
+
st.session_state.step = 'upload'
|
| 315 |
+
st.rerun()
|
| 316 |
+
|
| 317 |
+
with col2:
|
| 318 |
+
if st.button("π View EDA Report", type="primary"):
|
| 319 |
+
st.info("π EDA Report feature coming soon!")
|
| 320 |
|
| 321 |
# Footer
|
| 322 |
st.markdown("---")
|