Spaces:

YoneSlapWind80085
/

streamlittt

Sleeping

App Files Files Community

YoneSlapWind80085 commited on Jun 5, 2024

Commit

a14426e

verified ·

1 Parent(s): eaf0a9d

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -6

app.py CHANGED Viewed

@@ -15,13 +15,156 @@ df['MedHouseVal'] = california.target
 X = df[['MedInc']]
 y = df['MedHouseVal']
 # Split the data into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Train the model
 model = LinearRegression()
 model.fit(X_train, y_train)
 # Save the model
 with open("linear_regression_model.pkl", "wb") as file:
     pickle.dump(model, file)
@@ -30,14 +173,40 @@ with open("linear_regression_model.pkl", "wb") as file:
 with open("linear_regression_model.pkl", "rb") as file:
     model = pickle.load(file)
-# Streamlit app
-st.title('California Housing Price Prediction')
-med_inc = st.number_input('Enter Median Income:', min_value=0.0, step=0.01)
 if st.button('Predict'):
-    X_new = np.array([[med_inc]])
-    prediction = model.predict(X_new)
     st.write(f'Predicted Median House Value: {prediction[0]}')
 # Display data

 X = df[['MedInc']]
 y = df['MedHouseVal']
+# Pairplot to visualize relationships between features and the target
+plt.show()
+plt.figure(figsize=(10, 8))
+plt.show()
+# Scatter plot for specific features against the target variable
+features = ['MedInc', 'AveRooms', 'AveOccup', 'HouseAge']
+for feature in features:
+    plt.figure(figsize=(6, 4))
+    plt.scatter(df[feature], df['MedHouseVal'], alpha=0.3)
+    plt.title(f'MedHouseVal vs {feature}')
+    plt.xlabel(feature)
+    plt.ylabel('MedHouseVal')
+    plt.show()
+#5
+# Select the predictor and target variable
+X = df[['MedInc']]
+y = df['MedHouseVal']
 # Split the data into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+print("Training and testing data split done.")
+#6 7 and 8
+#lineare regression model
+model = LinearRegression()
+# Fitting the model on the training data
+model.fit(X_train, y_train)
+# Making predictions on the test data
+y_pred = model.predict(X_test)
+# Evaluating the model
+mse = mean_squared_error(y_test, y_pred)
+r2 = r2_score(y_test, y_pred)
+print(f"Mean Squared Error: {mse}")
+print(f"R-squared: {r2}")
+# Plot the regression line
+plt.figure(figsize=(8, 6))
+plt.scatter(X_test, y_test, color='blue', alpha=0.3, label='Actual')
+plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted')
+plt.title('Simple Linear Regression: MedInc vs MedHouseVal')
+plt.xlabel('MedInc')
+plt.ylabel('MedHouseVal')
+plt.legend()
+plt.show()
+ #Split the data into training (80%) and testing (20%) sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Print the sizes of the training and testing sets
+print(f"Training set size: {X_train.shape[0]} samples")
+print(f"Testing set size: {X_test.shape[0]} samples")
+# Create the linear regression model
 model = LinearRegression()
+# Fit the model on the training data
 model.fit(X_train, y_train)
+# Print the coefficients
+print(f"Coefficients: {model.coef_}")
+print(f"Intercept: {model.intercept_}")
+# Make predictions on the test data
+y_pred = model.predict(X_test)
+# Calculate RMSE and R-squared
+mse = mean_squared_error(y_test, y_pred)
+rmse = np.sqrt(mse)
+r2 = r2_score(y_test, y_pred)
+print(f"Root Mean Squared Error (RMSE): {rmse}")
+print(f"R-squared: {r2}")
+# Scatter plot of actual vs. predicted values
+plt.figure(figsize=(8, 6))
+plt.scatter(y_test, y_pred, color='blue', alpha=0.3)
+plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, color='green')
+plt.title('Multilinear Regression: Actual vs. Predicted MedHouseVal')
+plt.xlabel('Actual MedHouseVal')
+plt.ylabel('Predicted MedHouseVal')
+plt.show()
+#comparing the performance between RMSE and R-squared values
+# Simple Linear Regression
+# Select a single predictor
+X_single = df[['MedInc']]
+y = df['MedHouseVal']
+# Split the data into training and testing sets
+X_train_single, X_test_single, y_train_single, y_test_single = train_test_split(X_single, y, test_size=0.2, random_state=42)
+# Create the linear regression model
+model_single = LinearRegression()
+# Fit the model on the training data
+model_single.fit(X_train_single, y_train_single)
+# Make predictions on the test data
+y_pred_single = model_single.predict(X_test_single)
+# Evaluate the model
+mse_single = mean_squared_error(y_test_single, y_pred_single)
+rmse_single = np.sqrt(mse_single)
+r2_single = r2_score(y_test_single, y_pred_single)
+print(f"Simple Linear Regression - RMSE: {rmse_single}")
+print(f"Simple Linear Regression - R-squared: {r2_single}")
+# Multilinear Regression
+# Select multiple predictors
+X_multi = df[['MedInc', 'AveRooms', 'HouseAge', 'AveOccup']]
+y = df['MedHouseVal']
+# Split the data into training and testing sets
+X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y, test_size=0.2, random_state=42)
+# Create the linear regression model
+model_multi = LinearRegression()
+# Fit the model on the training data
+model_multi.fit(X_train_multi, y_train_multi)
+# Make predictions on the test data
+y_pred_multi = model_multi.predict(X_test_multi)
+# Evaluate the model
+mse_multi = mean_squared_error(y_test_multi, y_pred_multi)
+rmse_multi = np.sqrt(mse_multi)
+r2_multi = r2_score(y_test_multi, y_pred_multi)
+print(f"Multilinear Regression - RMSE: {rmse_multi}")
+print(f"Multilinear Regression - R-squared: {r2_multi}")
+#Residual Plot for Multilinear Regression
+residuals = y_test_multi - y_pred_multi
+plt.figure(figsize=(8, 6))
+plt.scatter(y_pred_multi, residuals, color='blue', alpha=0.3)
+plt.hlines(y=0, xmin=y_pred_multi.min(), xmax=y_pred_multi.max(), colors='red', linestyles='--', lw=2)
+plt.title('Residual Plot: Multilinear Regression')
+plt.xlabel('Predicted MedHouseVal')
+plt.ylabel('Residuals')
+plt.show()
 # Save the model
 with open("linear_regression_model.pkl", "wb") as file:
     pickle.dump(model, file)
 with open("linear_regression_model.pkl", "rb") as file:
     model = pickle.load(file)
+# Sidebar for user input features
+st.sidebar.header('User Input Features')
+selected_feature = st.sidebar.selectbox('Select feature for visualization', df.columns)
+selected_target = st.sidebar.selectbox('Select target variable', df.columns)
+# Display the raw data if checkbox is selected
+if st.checkbox('Show raw data'):
+    st.write(df)
+# Visualization of selected feature
+st.subheader(f'Distribution of {selected_feature}')
+plt.figure(figsize=(10, 6))
+plt.hist(df[selected_feature], bins=30, edgecolor='black')
+st.pyplot(plt)
+# Scatter plot of selected feature vs target
+st.subheader(f'Scatter plot of {selected_feature} vs {selected_target}')
+plt.figure(figsize=(10, 6))
+plt.scatter(df[selected_feature], df[selected_target], alpha=0.3)
+plt.xlabel(selected_feature)
+plt.ylabel(selected_target)
+st.pyplot(plt)
+# Prediction
+st.subheader('Predict Median House Value')
+# Input values for prediction
+input_values = {}
+for feature in X.columns:
+    input_values[feature] = st.number_input(f'Enter {feature}', value=float(df[feature].mean()))
 if st.button('Predict'):
+    input_data = np.array([list(input_values.values())])
+    prediction = model.predict(input_data)
     st.write(f'Predicted Median House Value: {prediction[0]}')
 # Display data