import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.svm import LinearSVC from sklearn.metrics import classification_report # Load the BBC News dataset data = pd.read_csv('bbc-news.csv') # Split the dataset into features (Article) and labels (Category) X = data['Article'] y = data['Category'] # Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Initialize the TF-IDF vectorizer vectorizer = TfidfVectorizer() # Transform the training set text into text-numeric vectors X_train_vectors = vectorizer.fit_transform(X_train) # Transform the testing set text into text-numeric vectors X_test_vectors = vectorizer.transform(X_test) # Initialize and train a linear support vector classifier (SVC) classifier = LinearSVC() classifier.fit(X_train_vectors, y_train) # Make predictions on the testing set y_pred = classifier.predict(X_test_vectors) # Evaluate the model performance print(classification_report(y_test, y_pred))