[Kaggle] ๋ค์ด๋ฒ ์ํ ๋ฆฌ๋ทฐ ๋ถ๋ฅ(2)
ยท
๐ฃ๏ธ Natural Language Processing
# ์ ์ฒ๋ฆฌ ํจ์ ์์ฑ ํ ์ ์ฉ def preprocessing(data,stopword): rm = re.compile('[:;\\'\\"\\[\\]\\(\\)\\.,@]') rm_data = data.astype(str).apply(lambda x: re.sub(rm, '', x)) word_token = [word_tokenize(x) for x in rm_data] remove_stopwords_tokens = [] for sentence in word_token: temp = [] for word in sentence: if word not in stopword: temp.append(word) remove_stopwords_tokens.append(temp) return remove_stopwo..