import nltk from nltk.tokenize import word_tokenize

# Tokenize tokens = word_tokenize(text)

# Replace '+' with spaces for proper tokenization text = text.replace("+", " ")