import pandas as pd from sklearn.datasets import fetch_california_housing from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, r2_score import seaborn as sns import matplotlib.pyplot as plt data = fetch_california_housing() X = pd.DataFrame(data.data, columns=data.feature_names) y= data.target X.columns y print(X.describe()) print(X.head()) print("Missing values:", X.isnull().sum()) # Visualize EDA sns.distplot(y) plt.xlabel("Median House Value") plt.ylabel("Density") plt.title("Distribution of median house values") plt.show() # data preprocessing scaler = StandardScaler() X_scaled = scaler.fit_transform(X) X_train,X_test,y_train,y_test=train_test_split(X_scaled,y,test_size=0.2,random_state=42) svr = SVR(kernel='rbf') svr.fit(X_train, y_train) y_pred = svr.predict(X_test) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) print("Meam Squared Error:", mse) print("R-squared:", r2) from sklearn.model_selection import GridSearchCV from sklearn.svm import SVR # Define a parameter grid to explore param_grid = { 'kernel': ['linear', 'rbf'], # Experiment with different kernels 'C': [0.01, 0.1, 1, 10, 100], # Regularization parameter 'gamma': [0.001, 0.01, 0.1, 1], # Gamma for RBF kernel (optional) } # Create the GridSearchCV object grid_search = GridSearchCV(SVR(), param_grid, cv=5) # 5-fold cross-validation # Fit the grid search to the training data grid_search.fit(X_train, y_train) # Get the best model and its parameters best_model = grid_search.best_estimator_ best_params = grid_search.best_params_ # Use the best model for prediction and evaluation y_pred = best_model.predict(X_test) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) print("Mean Squared Error:", mse) print("R-squared:", r2) print("Best Hyperparameters:", best_params)