saherPervaiz commited on
Commit
e58ad46
·
verified ·
1 Parent(s): f6cff1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -0
app.py CHANGED
@@ -1,3 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # After generating the metrics (classification or regression)
2
  # Add a button to generate the performance report as an image
3
  generate_report_button = st.button("Generate Performance Report as Image")
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
6
+ from sklearn.linear_model import LogisticRegression, LinearRegression
7
+ from sklearn.svm import SVC, SVR
8
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
9
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
10
+ from sklearn.naive_bayes import GaussianNB
11
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
12
+ import numpy as np
13
+ import matplotlib.pyplot as plt
14
+ import seaborn as sns
15
+ from io import BytesIO
16
+
17
+ # File uploader
18
+ st.title("Model Training with Metrics and Correlation Heatmap")
19
+ uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
20
+
21
+ if uploaded_file is not None:
22
+ df = pd.read_csv(uploaded_file)
23
+
24
+ # Show the dataset
25
+ st.write("Dataset:")
26
+ st.dataframe(df)
27
+
28
+ # Convert categorical (str) data to numerical
29
+ st.write("Converting Categorical Columns to Numerical Values:")
30
+ label_encoder = LabelEncoder()
31
+
32
+ for col in df.columns:
33
+ if df[col].dtype == 'object' or len(df[col].unique()) <= 10:
34
+ st.write(f"Encoding Column: **{col}**")
35
+ df[col] = label_encoder.fit_transform(df[col])
36
+
37
+ # Display the dataset after conversion
38
+ st.write("Dataset After Conversion:")
39
+ st.dataframe(df)
40
+
41
+ # Handle Null Values (Missing Data)
42
+ st.write("Handling Missing (Null) Values:")
43
+ fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
44
+ if fill_method == "Drop rows":
45
+ df = df.dropna()
46
+ elif fill_method == "Fill with mean/median":
47
+ for col in df.columns:
48
+ if df[col].dtype in ['float64', 'int64']:
49
+ df[col].fillna(df[col].mean(), inplace=True)
50
+ else:
51
+ df[col].fillna(df[col].mode()[0], inplace=True)
52
+
53
+ # Handle Outliers using IQR method
54
+ st.write("Handling Outliers:")
55
+ def remove_outliers_iqr(dataframe):
56
+ Q1 = dataframe.quantile(0.25)
57
+ Q3 = dataframe.quantile(0.75)
58
+ IQR = Q3 - Q1
59
+ return dataframe[~((dataframe < (Q1 - 1.5 * IQR)) | (dataframe > (Q3 + 1.5 * IQR))).any(axis=1)]
60
+
61
+ df = remove_outliers_iqr(df)
62
+
63
+ # Cap Extreme Values
64
+ st.write("Handling Extreme Values (Capping):")
65
+ def cap_extreme_values(dataframe):
66
+ for col in dataframe.select_dtypes(include=[np.number]).columns:
67
+ lower_limit = dataframe[col].quantile(0.05)
68
+ upper_limit = dataframe[col].quantile(0.95)
69
+ dataframe[col] = np.clip(dataframe[col], lower_limit, upper_limit)
70
+ return dataframe
71
+
72
+ df = cap_extreme_values(df)
73
+
74
+ # Show cleaned dataset
75
+ st.write("Cleaned Dataset:")
76
+ st.dataframe(df)
77
+
78
+ # Add clean data download option
79
+ st.subheader("Download Cleaned Dataset")
80
+ st.download_button(
81
+ label="Download Cleaned Dataset (CSV)",
82
+ data=df.to_csv(index=False),
83
+ file_name="cleaned_dataset.csv",
84
+ mime="text/csv"
85
+ )
86
+
87
+ # Correlation Heatmap
88
+ st.subheader("Correlation Heatmap")
89
+ corr = df.corr()
90
+ plt.figure(figsize=(10, 8))
91
+ sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", cbar=True)
92
+ st.pyplot(plt)
93
+
94
+ # Save heatmap as PNG
95
+ buf = BytesIO()
96
+ plt.savefig(buf, format="png")
97
+ buf.seek(0)
98
+ st.download_button(
99
+ label="Download Correlation Heatmap as PNG",
100
+ data=buf,
101
+ file_name="correlation_heatmap.png",
102
+ mime="image/png"
103
+ )
104
+
105
+ # Highlight highly correlated pairs
106
+ st.subheader("Highly Correlated Features")
107
+ high_corr = corr.abs().unstack().sort_values(ascending=False).drop_duplicates()
108
+ high_corr = high_corr[high_corr >= 0.8]
109
+ high_corr_df = high_corr[high_corr.index.get_level_values(0) != high_corr.index.get_level_values(1)]
110
+ st.write(high_corr_df)
111
+
112
+ target = st.selectbox("Select Target Variable", df.columns)
113
+ features = [col for col in df.columns if col != target]
114
+ X = df[features]
115
+ y = df[target]
116
+
117
+ if y.dtype == 'object' or len(y.unique()) <= 10: # Categorical target (classification)
118
+ st.subheader("Classification Model Training")
119
+ classifiers = {
120
+ 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
121
+ 'Decision Tree': DecisionTreeClassifier(),
122
+ 'Random Forest': RandomForestClassifier(),
123
+ 'Support Vector Machine (SVM)': SVC(),
124
+ 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
125
+ 'Naive Bayes': GaussianNB()
126
+ }
127
+
128
+ metrics = []
129
+ train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
130
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
131
+
132
+ for name, classifier in classifiers.items():
133
+ classifier.fit(X_train, y_train)
134
+ y_pred = classifier.predict(X_test)
135
+ metrics.append({
136
+ 'Model': name,
137
+ 'Accuracy': round(accuracy_score(y_test, y_pred), 2),
138
+ 'Precision': round(precision_score(y_test, y_pred, zero_division=1, average='macro'), 2),
139
+ 'Recall': round(recall_score(y_test, y_pred, zero_division=1, average='macro'), 2),
140
+ 'F1-Score': round(f1_score(y_test, y_pred, zero_division=1, average='macro'), 2)
141
+ })
142
+
143
+ metrics_df = pd.DataFrame(metrics)
144
+ st.subheader("Classification Model Performance Metrics")
145
+ st.dataframe(metrics_df)
146
+
147
+ # Save metrics as PNG
148
+ fig, ax = plt.subplots()
149
+ sns.barplot(data=metrics_df, x="Model", y="Accuracy", ax=ax)
150
+ ax.set_title("Classification Model Performance")
151
+ buf = BytesIO()
152
+ fig.savefig(buf, format="png")
153
+ buf.seek(0)
154
+ st.download_button(
155
+ label="Download Classification Report as PNG",
156
+ data=buf,
157
+ file_name="classification_report.png",
158
+ mime="image/png"
159
+ )
160
+
161
+ else: # Continuous target (regression)
162
+ st.subheader("Regression Model Training")
163
+ regressors = {
164
+ 'Linear Regression': LinearRegression(),
165
+ 'Decision Tree Regressor': DecisionTreeRegressor(),
166
+ 'Random Forest Regressor': RandomForestRegressor(),
167
+ 'Support Vector Regressor (SVR)': SVR(),
168
+ 'K-Nearest Neighbors Regressor (k-NN)': KNeighborsRegressor()
169
+ }
170
+
171
+ regression_metrics = []
172
+ train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
173
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
174
+
175
+ for name, regressor in regressors.items():
176
+ regressor.fit(X_train, y_train)
177
+ y_pred = regressor.predict(X_test)
178
+ regression_metrics.append({
179
+ 'Model': name,
180
+ 'Mean Squared Error (MSE)': round(mean_squared_error(y_test, y_pred), 2),
181
+ 'Mean Absolute Error (MAE)': round(mean_absolute_error(y_test, y_pred), 2),
182
+ 'R² Score': round(r2_score(y_test, y_pred), 2)
183
+ })
184
+
185
+ regression_metrics_df = pd.DataFrame(regression_metrics)
186
+ st.subheader("Regression Model Performance Metrics")
187
+ st.dataframe(regression_metrics_df)
188
+
189
+ # Save metrics as PNG
190
+ fig, ax = plt.subplots()
191
+ sns.barplot(data=regression_metrics_df, x="Model", y="R² Score", ax=ax)
192
+ ax.set_title("Regression Model Performance")
193
+ buf = BytesIO()
194
+ fig.savefig(buf, format="png")
195
+ buf.seek(0)
196
+ st.download_button(
197
+ label="Download Regression Report as PNG",
198
+ data=buf,
199
+ file_name="regression_report.png",
200
+ mime="image/png"
201
+ ) alse add the button to generate the report as image
202
+
203
+
204
+
205
+
206
+
207
+
208
  # After generating the metrics (classification or regression)
209
  # Add a button to generate the performance report as an image
210
  generate_report_button = st.button("Generate Performance Report as Image")