YoneSlapWind80085 commited on
Commit
a14426e
·
verified ·
1 Parent(s): eaf0a9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -6
app.py CHANGED
@@ -15,13 +15,156 @@ df['MedHouseVal'] = california.target
15
  X = df[['MedInc']]
16
  y = df['MedHouseVal']
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Split the data into training and testing sets
19
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Train the model
22
  model = LinearRegression()
 
 
23
  model.fit(X_train, y_train)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Save the model
26
  with open("linear_regression_model.pkl", "wb") as file:
27
  pickle.dump(model, file)
@@ -30,14 +173,40 @@ with open("linear_regression_model.pkl", "wb") as file:
30
  with open("linear_regression_model.pkl", "rb") as file:
31
  model = pickle.load(file)
32
 
33
- # Streamlit app
34
- st.title('California Housing Price Prediction')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- med_inc = st.number_input('Enter Median Income:', min_value=0.0, step=0.01)
 
 
 
37
 
38
  if st.button('Predict'):
39
- X_new = np.array([[med_inc]])
40
- prediction = model.predict(X_new)
41
  st.write(f'Predicted Median House Value: {prediction[0]}')
42
 
43
  # Display data
 
15
  X = df[['MedInc']]
16
  y = df['MedHouseVal']
17
 
18
+ # Pairplot to visualize relationships between features and the target
19
+ plt.show()
20
+
21
+
22
+ plt.figure(figsize=(10, 8))
23
+ plt.show()
24
+
25
+ # Scatter plot for specific features against the target variable
26
+ features = ['MedInc', 'AveRooms', 'AveOccup', 'HouseAge']
27
+ for feature in features:
28
+ plt.figure(figsize=(6, 4))
29
+ plt.scatter(df[feature], df['MedHouseVal'], alpha=0.3)
30
+ plt.title(f'MedHouseVal vs {feature}')
31
+ plt.xlabel(feature)
32
+ plt.ylabel('MedHouseVal')
33
+ plt.show()
34
+ #5
35
+ # Select the predictor and target variable
36
+ X = df[['MedInc']]
37
+ y = df['MedHouseVal']
38
+
39
  # Split the data into training and testing sets
40
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
41
+ print("Training and testing data split done.")
42
+
43
+ #6 7 and 8
44
+ #lineare regression model
45
+ model = LinearRegression()
46
+
47
+ # Fitting the model on the training data
48
+ model.fit(X_train, y_train)
49
+
50
+ # Making predictions on the test data
51
+ y_pred = model.predict(X_test)
52
+
53
+ # Evaluating the model
54
+ mse = mean_squared_error(y_test, y_pred)
55
+ r2 = r2_score(y_test, y_pred)
56
+
57
+ print(f"Mean Squared Error: {mse}")
58
+ print(f"R-squared: {r2}")
59
+
60
+ # Plot the regression line
61
+ plt.figure(figsize=(8, 6))
62
+ plt.scatter(X_test, y_test, color='blue', alpha=0.3, label='Actual')
63
+ plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted')
64
+ plt.title('Simple Linear Regression: MedInc vs MedHouseVal')
65
+ plt.xlabel('MedInc')
66
+ plt.ylabel('MedHouseVal')
67
+ plt.legend()
68
+ plt.show()
69
+
70
+ #Split the data into training (80%) and testing (20%) sets
71
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
72
+
73
+ # Print the sizes of the training and testing sets
74
+ print(f"Training set size: {X_train.shape[0]} samples")
75
+ print(f"Testing set size: {X_test.shape[0]} samples")
76
 
77
+ # Create the linear regression model
78
  model = LinearRegression()
79
+
80
+ # Fit the model on the training data
81
  model.fit(X_train, y_train)
82
 
83
+ # Print the coefficients
84
+ print(f"Coefficients: {model.coef_}")
85
+ print(f"Intercept: {model.intercept_}")
86
+
87
+ # Make predictions on the test data
88
+ y_pred = model.predict(X_test)
89
+
90
+ # Calculate RMSE and R-squared
91
+ mse = mean_squared_error(y_test, y_pred)
92
+ rmse = np.sqrt(mse)
93
+ r2 = r2_score(y_test, y_pred)
94
+
95
+ print(f"Root Mean Squared Error (RMSE): {rmse}")
96
+ print(f"R-squared: {r2}")
97
+
98
+ # Scatter plot of actual vs. predicted values
99
+ plt.figure(figsize=(8, 6))
100
+ plt.scatter(y_test, y_pred, color='blue', alpha=0.3)
101
+ plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, color='green')
102
+ plt.title('Multilinear Regression: Actual vs. Predicted MedHouseVal')
103
+ plt.xlabel('Actual MedHouseVal')
104
+ plt.ylabel('Predicted MedHouseVal')
105
+ plt.show()
106
+
107
+ #comparing the performance between RMSE and R-squared values
108
+ # Simple Linear Regression
109
+ # Select a single predictor
110
+ X_single = df[['MedInc']]
111
+ y = df['MedHouseVal']
112
+
113
+ # Split the data into training and testing sets
114
+ X_train_single, X_test_single, y_train_single, y_test_single = train_test_split(X_single, y, test_size=0.2, random_state=42)
115
+
116
+ # Create the linear regression model
117
+ model_single = LinearRegression()
118
+
119
+ # Fit the model on the training data
120
+ model_single.fit(X_train_single, y_train_single)
121
+
122
+ # Make predictions on the test data
123
+ y_pred_single = model_single.predict(X_test_single)
124
+
125
+ # Evaluate the model
126
+ mse_single = mean_squared_error(y_test_single, y_pred_single)
127
+ rmse_single = np.sqrt(mse_single)
128
+ r2_single = r2_score(y_test_single, y_pred_single)
129
+
130
+ print(f"Simple Linear Regression - RMSE: {rmse_single}")
131
+ print(f"Simple Linear Regression - R-squared: {r2_single}")
132
+
133
+ # Multilinear Regression
134
+ # Select multiple predictors
135
+ X_multi = df[['MedInc', 'AveRooms', 'HouseAge', 'AveOccup']]
136
+ y = df['MedHouseVal']
137
+
138
+ # Split the data into training and testing sets
139
+ X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y, test_size=0.2, random_state=42)
140
+
141
+ # Create the linear regression model
142
+ model_multi = LinearRegression()
143
+
144
+ # Fit the model on the training data
145
+ model_multi.fit(X_train_multi, y_train_multi)
146
+
147
+ # Make predictions on the test data
148
+ y_pred_multi = model_multi.predict(X_test_multi)
149
+
150
+ # Evaluate the model
151
+ mse_multi = mean_squared_error(y_test_multi, y_pred_multi)
152
+ rmse_multi = np.sqrt(mse_multi)
153
+ r2_multi = r2_score(y_test_multi, y_pred_multi)
154
+
155
+ print(f"Multilinear Regression - RMSE: {rmse_multi}")
156
+ print(f"Multilinear Regression - R-squared: {r2_multi}")
157
+
158
+ #Residual Plot for Multilinear Regression
159
+ residuals = y_test_multi - y_pred_multi
160
+ plt.figure(figsize=(8, 6))
161
+ plt.scatter(y_pred_multi, residuals, color='blue', alpha=0.3)
162
+ plt.hlines(y=0, xmin=y_pred_multi.min(), xmax=y_pred_multi.max(), colors='red', linestyles='--', lw=2)
163
+ plt.title('Residual Plot: Multilinear Regression')
164
+ plt.xlabel('Predicted MedHouseVal')
165
+ plt.ylabel('Residuals')
166
+ plt.show()
167
+
168
  # Save the model
169
  with open("linear_regression_model.pkl", "wb") as file:
170
  pickle.dump(model, file)
 
173
  with open("linear_regression_model.pkl", "rb") as file:
174
  model = pickle.load(file)
175
 
176
+ # Sidebar for user input features
177
+ st.sidebar.header('User Input Features')
178
+ selected_feature = st.sidebar.selectbox('Select feature for visualization', df.columns)
179
+ selected_target = st.sidebar.selectbox('Select target variable', df.columns)
180
+
181
+ # Display the raw data if checkbox is selected
182
+ if st.checkbox('Show raw data'):
183
+ st.write(df)
184
+
185
+ # Visualization of selected feature
186
+ st.subheader(f'Distribution of {selected_feature}')
187
+ plt.figure(figsize=(10, 6))
188
+ plt.hist(df[selected_feature], bins=30, edgecolor='black')
189
+ st.pyplot(plt)
190
+
191
+ # Scatter plot of selected feature vs target
192
+ st.subheader(f'Scatter plot of {selected_feature} vs {selected_target}')
193
+ plt.figure(figsize=(10, 6))
194
+ plt.scatter(df[selected_feature], df[selected_target], alpha=0.3)
195
+ plt.xlabel(selected_feature)
196
+ plt.ylabel(selected_target)
197
+ st.pyplot(plt)
198
+
199
+ # Prediction
200
+ st.subheader('Predict Median House Value')
201
 
202
+ # Input values for prediction
203
+ input_values = {}
204
+ for feature in X.columns:
205
+ input_values[feature] = st.number_input(f'Enter {feature}', value=float(df[feature].mean()))
206
 
207
  if st.button('Predict'):
208
+ input_data = np.array([list(input_values.values())])
209
+ prediction = model.predict(input_data)
210
  st.write(f'Predicted Median House Value: {prediction[0]}')
211
 
212
  # Display data