📍개요
Take Me Home, Country Roads 노래 가사를 워드 클라우드로 만들어 어떤 단어가 자주 등장하는지 알아보기
📍전체 코드
import wordcloud
from matplotlib import pyplot as plt
sample_text = """Almost heaven, West Virginia
Blue Ridge Mountains, Shenandoah River
Life is old there, older than the trees
Younger than the mountains, growin' like a breeze
Country roads, take me home
To the place I belong
West Virginia, mountain mama
Take me home, country roads
All my memories gather 'round her
Miner's lady, stranger to blue water
Dark and dusty, painted on the sky
Misty taste of moonshine, teardrop in my eye
Country roads, take me home
To the place I belong
West Virginia, mountain mama
Take me home, country roads
I hear her voice in the mornin' hour, she calls me
The radio reminds me of my home far away
Drivin' down the road, I get a feelin'
That I should've been home yesterday, yesterday
Country roads, take me home
To the place I belong
West Virginia, mountain mama
Take me home, country roads
Country roads, take me home
To the place I belong
West Virginia, mountain mama
Take me home, country roads
Take me home, (down) country roads
Take me home, (down) country roads
"""
def calculate_frequencies(text):
# 필터링할 문장 부호
punctuations = '''!()-[]{};:'",<>./?@#$%^&*_~\\'''
# 필터링할 의미없는 명사
uninteresting_words = ["the", "a", "to", "if", "is", "it", "of", "and", "or", "an", "as", "i", "me", "my", "we",
"our", "ours", "you", "your", "yours", "he", "she", "him", "his", "her", "hers", "its",
"they", "them", "their", "what", "which", "who", "whom", "this", "that", "am", "are", "was",
"were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "but", "at", "by",
"with", "from", "here", "when", "where", "how", "all", "any", "both", "each", "few", "more",
"some", "such", "no", "nor",
"too", "very", "can", "will", "just"]
result = {}
word_arr = text.split()
for word in word_arr:
new_word = word.lower()
for char in new_word:
if char in punctuations:
new_word = new_word.replace(char, "")
if new_word in uninteresting_words:
continue
if new_word not in result:
result[new_word] = 1
else:
result[new_word] = result[new_word] + 1
cloud = wordcloud.WordCloud()
cloud.generate_from_frequencies(result)
plt.imshow(cloud.to_array(), interpolation='nearest')
plt.axis('off')
plt.show()
# cloud.to_file("myfile.jpg")
calculate_frequencies(sample_text)