import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn import datasets from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.metrics import classification_report, accuracy_score # Load the Iris dataset from sklearn iris = datasets.load_iris() # Convert the data into a DataFrame iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names) iris_df['target'] = iris.target # Exploratory Data Analysis (EDA) print(iris_df.describe()) # Summary statistics print(iris_df.head()) # View first few rows # Visualization (pairplot for all features) import seaborn as sns # EDA: Pairplot to visualize relationships between features sns.pairplot(iris_df, hue='target', palette='viridis', diag_kind='hist') plt.suptitle("Pairplot of Iris Dataset", y=1.02) plt.show() # Separate features (X) and target (y) from the DataFrame X = iris_df.drop('target', axis=1) # Features y = iris_df['target'] # Target (labels) # Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create the SVM model clf = SVC(kernel='linear') # Experiment with different kernels (e.g., 'rbf') # Train the model clf.fit(X_train, y_train) # Make predictions on the testing set y_pred = clf.predict(X_test) # Evaluate model performance print(classification_report(y_test, y_pred)) print("Training Accuracy:", accuracy_score(y_train, clf.predict(X_train))) print("Testing Accuracy:", accuracy_score(y_test, y_pred)) from sklearn.model_selection import GridSearchCV # Define a parameter grid to explore param_grid = {'kernel': ['linear', 'rbf'], 'C': [0.01, 0.1, 1, 10, 100]} # Create the GridSearchCV object grid_search = GridSearchCV(SVC(), param_grid, cv=5) # 5-fold cross-validation # Fit the grid search to the training data grid_search.fit(X_train, y_train) # Get the best model and its parameters best_model = grid_search.best_estimator_ best_params = grid_search.best_params_ print(best_params) # Use the best model for prediction and evaluation y_pred = best_model.predict(X_test) print(classification_report(y_test, y_pred)) print("Testing Accuracy:", accuracy_score(y_test, y_pred))