#5)Train and fine-tune a Decision Tree for Moon dataset. #1. Import Libraries and Generate Moons Dataset: import matplotlib.pyplot as plt import pandas as pd from sklearn.datasets import make_moons from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import GridSearchCV from sklearn.tree import plot_tree #Generate Data # Generate moons data X, y = make_moons(n_samples=1000, noise=0.3) #2. Exploratory Data Analysis (EDA) # 1. Data Shape and Description print("Data Shape:", X.shape) print("Description of first 5 rows:") print(X[:5]) print("Description of target variable:") print(pd.Series(y).value_counts()) # Convert y to pandas Series ################################ # 2. Check for Missing Values import numpy as np print("Missing values in features:", np.isnan(X).sum(axis=0)) ############### # 3. Visualize the moons data plt.scatter(X[:, 0], X[:, 1], c=y) plt.title("Moons Dataset") plt.xlabel("Feature 1") plt.ylabel("Feature 2") plt.show() ##################### #3. Train-Test Split: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #4. Define Decision Tree Classifier: # Define the decision tree classifier clf = DecisionTreeClassifier() #5. Hyperparameter Tuning with GridSearchCV: # Define hyperparameter grid param_grid = { 'max_depth': [2, 3, 4, 5], 'min_samples_split': [2, 5, 10] } # Create GridSearchCV object grid_clf = GridSearchCV(clf, param_grid, scoring='accuracy') # Train the model grid_clf.fit(X_train, y_train) # Get the best model best_model = grid_clf.best_estimator_ # Print the best hyperparameters print("Best Hyperparameters:", grid_clf.best_params_) ########################### #6. Evaluate the Model: # Predict on test set y_pred = best_model.predict(X_test) # Calculate accuracy from sklearn.metrics import accuracy_score accuracy = accuracy_score(y_test, y_pred) print("Test Accuracy:", accuracy) ########################## #7. Visualize the Decision Tree: # Visualize the decision tree plot_tree(best_model) plt.show()