1. Write a Python code using NLP to Pre-Process the text data and convert Text-Numeric vectors.
   I. Use Tokenization, Stopword removal, Stemming/Lemmatization , text preprocess logic using NLTK
   II. Use SKLearn for converting Text-Numeric vectors using TF-IDF model

consider novel.txt as text document for implementing question 1.

------------------------------------------------------------------------------------
Question 1
--------------
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load the text document
with open('novel.txt', 'r', encoding='utf-8') as file:
    text = file.read()

# Tokenization
tokens = word_tokenize(text)

# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]

# Stemming or Lemmatization
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]

# Text preprocessing logic
preprocessed_text = ' '.join(lemmatized_tokens)  # You can change this logic based on your requirements

# Convert to text-numeric vectors using TF-IDF
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform([preprocessed_text])

# Print the text-numeric vectors
print(vectors.toarray())

-----------------------------------------------------------------------------------
Question 2
----------------
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the text document
with open('novel.txt', 'r', encoding='utf-8') as file:
    text = file.read()

# Initialize the TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Preprocess the text and convert to text-numeric vectors
vectors = vectorizer.fit_transform([text])

# Print the text-numeric vectors
print(vectors.toarray())