1. Write a Python code using NLP to Pre-Process the text data and convert Text-Numeric vectors. I. Use Tokenization, Stopword removal, Stemming/Lemmatization , text preprocess logic using NLTK II. Use SKLearn for converting Text-Numeric vectors using TF-IDF model consider novel.txt as text document for implementing question 1. ------------------------------------------------------------------------------------ Question 1 -------------- import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from nltk.stem import PorterStemmer, WordNetLemmatizer from sklearn.feature_extraction.text import TfidfVectorizer # Download NLTK resources nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') # Load the text document with open('novel.txt', 'r', encoding='utf-8') as file: text = file.read() # Tokenization tokens = word_tokenize(text) # Remove stopwords stop_words = set(stopwords.words('english')) filtered_tokens = [token for token in tokens if token.lower() not in stop_words] # Stemming or Lemmatization stemmer = PorterStemmer() lemmatizer = WordNetLemmatizer() stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens] lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens] # Text preprocessing logic preprocessed_text = ' '.join(lemmatized_tokens) # You can change this logic based on your requirements # Convert to text-numeric vectors using TF-IDF vectorizer = TfidfVectorizer() vectors = vectorizer.fit_transform([preprocessed_text]) # Print the text-numeric vectors print(vectors.toarray()) ----------------------------------------------------------------------------------- Question 2 ---------------- from sklearn.feature_extraction.text import TfidfVectorizer # Load the text document with open('novel.txt', 'r', encoding='utf-8') as file: text = file.read() # Initialize the TF-IDF vectorizer vectorizer = TfidfVectorizer() # Preprocess the text and convert to text-numeric vectors vectors = vectorizer.fit_transform([text]) # Print the text-numeric vectors print(vectors.toarray())