# Install Required Libraries? !pip install pandas scikit-learn nltk import pandas as pd import string import nltk from nltk.corpus import stopwords from nltk.stem import PorterStemmer from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import classification_report, confusion_matrix # Download stopwords nltk.download('stopwords') stop_words = set(stopwords.words('english')) stemmer = PorterStemmer() from google.colab import files uploaded = files.upload() #https://drive.google.com/file/d/1MknPltQIslIYtGC_kKPSec9fHHgMPz2q/view?usp=sharing # Load CSV and rename columns df = pd.read_csv('/content/spam.csv', encoding='latin-1')[['v1', 'v2']] df.columns = ['label', 'text'] df['label'] = df['label'].map({'ham': 0, 'spam': 1}) # ham=0, spam=1 df.head() #Preprocess the Text def preprocess(text): text = text.lower() text = ''.join([char for char in text if char not in string.punctuation]) tokens = text.split() tokens = [stemmer.stem(word) for word in tokens if word not in stop_words] return ' '.join(tokens) df['clean_text'] = df['text'].apply(preprocess) #Convert to TF-IDF Vectors tfidf = TfidfVectorizer() X = tfidf.fit_transform(df['clean_text']) # Features y = df['label'] # Labels #Train-Test Split and Train Naive Bayes X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = MultinomialNB() model.fit(X_train, y_train) #Predict & Evaluate y_pred = model.predict(X_test) print("\n--- Classification Report ---") print(classification_report(y_test, y_pred)) print("\n--- Confusion Matrix ---") print(confusion_matrix(y_test, y_pred)) #Save Model or Predict on New Message msg = "You have won $1000 cash prize!!! Click here" msg_clean = preprocess(msg) msg_vector = tfidf.transform([msg_clean]) print("Prediction (1=spam, 0=ham):", model.predict(msg_vector)[0])