saherPervaiz commited on
Commit
f6cff1e
·
verified ·
1 Parent(s): 3d3a6dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -176
app.py CHANGED
@@ -1,198 +1,34 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from sklearn.model_selection import train_test_split
4
- from sklearn.preprocessing import LabelEncoder
5
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
6
- from sklearn.linear_model import LogisticRegression, LinearRegression
7
- from sklearn.svm import SVC, SVR
8
- from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
9
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
10
- from sklearn.naive_bayes import GaussianNB
11
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
12
- import numpy as np
13
- import matplotlib.pyplot as plt
14
- import seaborn as sns
15
- from io import BytesIO
16
 
17
- # File uploader
18
- st.title("Model Training with Metrics and Correlation Heatmap")
19
- uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
20
-
21
- if uploaded_file is not None:
22
- df = pd.read_csv(uploaded_file)
23
-
24
- # Show the dataset
25
- st.write("Dataset:")
26
- st.dataframe(df)
27
-
28
- # Convert categorical (str) data to numerical
29
- st.write("Converting Categorical Columns to Numerical Values:")
30
- label_encoder = LabelEncoder()
31
-
32
- for col in df.columns:
33
- if df[col].dtype == 'object' or len(df[col].unique()) <= 10:
34
- st.write(f"Encoding Column: **{col}**")
35
- df[col] = label_encoder.fit_transform(df[col])
36
-
37
- # Display the dataset after conversion
38
- st.write("Dataset After Conversion:")
39
- st.dataframe(df)
40
-
41
- # Handle Null Values (Missing Data)
42
- st.write("Handling Missing (Null) Values:")
43
- fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
44
- if fill_method == "Drop rows":
45
- df = df.dropna()
46
- elif fill_method == "Fill with mean/median":
47
- for col in df.columns:
48
- if df[col].dtype in ['float64', 'int64']:
49
- df[col].fillna(df[col].mean(), inplace=True)
50
- else:
51
- df[col].fillna(df[col].mode()[0], inplace=True)
52
-
53
- # Handle Outliers using IQR method
54
- st.write("Handling Outliers:")
55
- def remove_outliers_iqr(dataframe):
56
- Q1 = dataframe.quantile(0.25)
57
- Q3 = dataframe.quantile(0.75)
58
- IQR = Q3 - Q1
59
- return dataframe[~((dataframe < (Q1 - 1.5 * IQR)) | (dataframe > (Q3 + 1.5 * IQR))).any(axis=1)]
60
-
61
- df = remove_outliers_iqr(df)
62
-
63
- # Cap Extreme Values
64
- st.write("Handling Extreme Values (Capping):")
65
- def cap_extreme_values(dataframe):
66
- for col in dataframe.select_dtypes(include=[np.number]).columns:
67
- lower_limit = dataframe[col].quantile(0.05)
68
- upper_limit = dataframe[col].quantile(0.95)
69
- dataframe[col] = np.clip(dataframe[col], lower_limit, upper_limit)
70
- return dataframe
71
-
72
- df = cap_extreme_values(df)
73
-
74
- # Show cleaned dataset
75
- st.write("Cleaned Dataset:")
76
- st.dataframe(df)
77
-
78
- # Add clean data download option
79
- st.subheader("Download Cleaned Dataset")
80
- st.download_button(
81
- label="Download Cleaned Dataset (CSV)",
82
- data=df.to_csv(index=False),
83
- file_name="cleaned_dataset.csv",
84
- mime="text/csv"
85
- )
86
-
87
- # Correlation Heatmap
88
- st.subheader("Correlation Heatmap")
89
- corr = df.corr()
90
- plt.figure(figsize=(10, 8))
91
- sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", cbar=True)
92
- st.pyplot(plt)
93
-
94
- # Save heatmap as PNG
95
- buf = BytesIO()
96
- plt.savefig(buf, format="png")
97
- buf.seek(0)
98
- st.download_button(
99
- label="Download Correlation Heatmap as PNG",
100
- data=buf,
101
- file_name="correlation_heatmap.png",
102
- mime="image/png"
103
- )
104
-
105
- # Highlight highly correlated pairs
106
- st.subheader("Highly Correlated Features")
107
- high_corr = corr.abs().unstack().sort_values(ascending=False).drop_duplicates()
108
- high_corr = high_corr[high_corr >= 0.8]
109
- high_corr_df = high_corr[high_corr.index.get_level_values(0) != high_corr.index.get_level_values(1)]
110
- st.write(high_corr_df)
111
-
112
- target = st.selectbox("Select Target Variable", df.columns)
113
- features = [col for col in df.columns if col != target]
114
- X = df[features]
115
- y = df[target]
116
-
117
  if y.dtype == 'object' or len(y.unique()) <= 10: # Categorical target (classification)
118
- st.subheader("Classification Model Training")
119
- classifiers = {
120
- 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
121
- 'Decision Tree': DecisionTreeClassifier(),
122
- 'Random Forest': RandomForestClassifier(),
123
- 'Support Vector Machine (SVM)': SVC(),
124
- 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
125
- 'Naive Bayes': GaussianNB()
126
- }
127
-
128
- metrics = []
129
- train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
130
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
131
-
132
- for name, classifier in classifiers.items():
133
- classifier.fit(X_train, y_train)
134
- y_pred = classifier.predict(X_test)
135
- metrics.append({
136
- 'Model': name,
137
- 'Accuracy': round(accuracy_score(y_test, y_pred), 2),
138
- 'Precision': round(precision_score(y_test, y_pred, zero_division=1, average='macro'), 2),
139
- 'Recall': round(recall_score(y_test, y_pred, zero_division=1, average='macro'), 2),
140
- 'F1-Score': round(f1_score(y_test, y_pred, zero_division=1, average='macro'), 2)
141
- })
142
-
143
- metrics_df = pd.DataFrame(metrics)
144
- st.subheader("Classification Model Performance Metrics")
145
- st.dataframe(metrics_df)
146
-
147
- # Save metrics as PNG
148
- fig, ax = plt.subplots()
149
  sns.barplot(data=metrics_df, x="Model", y="Accuracy", ax=ax)
150
  ax.set_title("Classification Model Performance")
 
 
151
  buf = BytesIO()
152
  fig.savefig(buf, format="png")
153
  buf.seek(0)
 
154
  st.download_button(
155
  label="Download Classification Report as PNG",
156
  data=buf,
157
  file_name="classification_report.png",
158
  mime="image/png"
159
  )
160
-
161
  else: # Continuous target (regression)
162
- st.subheader("Regression Model Training")
163
- regressors = {
164
- 'Linear Regression': LinearRegression(),
165
- 'Decision Tree Regressor': DecisionTreeRegressor(),
166
- 'Random Forest Regressor': RandomForestRegressor(),
167
- 'Support Vector Regressor (SVR)': SVR(),
168
- 'K-Nearest Neighbors Regressor (k-NN)': KNeighborsRegressor()
169
- }
170
-
171
- regression_metrics = []
172
- train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
173
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
174
-
175
- for name, regressor in regressors.items():
176
- regressor.fit(X_train, y_train)
177
- y_pred = regressor.predict(X_test)
178
- regression_metrics.append({
179
- 'Model': name,
180
- 'Mean Squared Error (MSE)': round(mean_squared_error(y_test, y_pred), 2),
181
- 'Mean Absolute Error (MAE)': round(mean_absolute_error(y_test, y_pred), 2),
182
- 'R² Score': round(r2_score(y_test, y_pred), 2)
183
- })
184
-
185
- regression_metrics_df = pd.DataFrame(regression_metrics)
186
- st.subheader("Regression Model Performance Metrics")
187
- st.dataframe(regression_metrics_df)
188
-
189
- # Save metrics as PNG
190
- fig, ax = plt.subplots()
191
  sns.barplot(data=regression_metrics_df, x="Model", y="R² Score", ax=ax)
192
  ax.set_title("Regression Model Performance")
 
 
193
  buf = BytesIO()
194
  fig.savefig(buf, format="png")
195
  buf.seek(0)
 
196
  st.download_button(
197
  label="Download Regression Report as PNG",
198
  data=buf,
 
1
+ # After generating the metrics (classification or regression)
2
+ # Add a button to generate the performance report as an image
3
+ generate_report_button = st.button("Generate Performance Report as Image")
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ if generate_report_button:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  if y.dtype == 'object' or len(y.unique()) <= 10: # Categorical target (classification)
7
+ fig, ax = plt.subplots(figsize=(10, 6))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  sns.barplot(data=metrics_df, x="Model", y="Accuracy", ax=ax)
9
  ax.set_title("Classification Model Performance")
10
+
11
+ # Save the classification report as PNG
12
  buf = BytesIO()
13
  fig.savefig(buf, format="png")
14
  buf.seek(0)
15
+
16
  st.download_button(
17
  label="Download Classification Report as PNG",
18
  data=buf,
19
  file_name="classification_report.png",
20
  mime="image/png"
21
  )
 
22
  else: # Continuous target (regression)
23
+ fig, ax = plt.subplots(figsize=(10, 6))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  sns.barplot(data=regression_metrics_df, x="Model", y="R² Score", ax=ax)
25
  ax.set_title("Regression Model Performance")
26
+
27
+ # Save the regression report as PNG
28
  buf = BytesIO()
29
  fig.savefig(buf, format="png")
30
  buf.seek(0)
31
+
32
  st.download_button(
33
  label="Download Regression Report as PNG",
34
  data=buf,