# Install Required Libraries?
!pip install pandas scikit-learn nltk

import pandas as pd
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix

# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

from google.colab import files
uploaded = files.upload()

#https://drive.google.com/file/d/1MknPltQIslIYtGC_kKPSec9fHHgMPz2q/view?usp=sharing

# Load CSV and rename columns
df = pd.read_csv('/content/spam.csv', encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'text']
df['label'] = df['label'].map({'ham': 0, 'spam': 1})  # ham=0, spam=1

df.head()

#Preprocess the Text
def preprocess(text):
    text = text.lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    tokens = text.split()
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

df['clean_text'] = df['text'].apply(preprocess)

#Convert to TF-IDF Vectors
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['clean_text'])  # Features
y = df['label']                            # Labels

#Train-Test Split and Train Naive Bayes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)

#Predict & Evaluate
y_pred = model.predict(X_test)

print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))

print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_test, y_pred))

#Save Model or Predict on New Message
msg = "You have won $1000 cash prize!!! Click here"
msg_clean = preprocess(msg)
msg_vector = tfidf.transform([msg_clean])
print("Prediction (1=spam, 0=ham):", model.predict(msg_vector)[0])