描述:连接数据库,查询某一字段,根据该字段内容关键字频次制作词云图
1.导包
from os import path
import matplotlib.pyplot as plt
import pymysql
from wordcloud import WordCloud
import numpy as np
from PIL import Image
import jieba
2.数据库连接类
class sqlHelper(object):
def __init__(self, host, user, password, database):
# 打开数据库连接
self.conn = pymysql.connect(host, user, password, database, charset="utf8")
# 使用 cursor() 方法创建一个游标对象 cursor
self.cursor = self.conn.cursor()
# 析构函数关闭连接
def __del__(self):
self.cursor.close()
self.conn.close()
def getAll(self, table_name):
sql = "SELECT * FROM %s" % table_name
try:
self.cursor.execute(sql)
# 获取所有记录列表
results = self.cursor.fetchall()
return results
except Exception as e:
print("Error:", e)
3.使用jieba+WordCloud制作词云图
def create_wordcloud(table_name):
sql = sqlHelper("localhost", "root", "123456", "mydb")
datas: tuple = sql.getAll(table_name)
print('开始加载文本')
text = ''
for data in datas:
# 8对应拉招聘公司的职位要求描述字段
if not data[8]:
continue # 不存在就跳过
else:
text += data[8]
text = text.replace("'", "")
# 将text的元素包括标点符号一个个切出返回列表
items = [i for i in jieba.cut(text)] # jieba.cut(text)
text = " ".join(jieba.cut(text))
# 获取当前文件夹中的字体
d = path.dirname(__file__)
font = path.join(path.dirname(__file__), "consola.ttf")
# background 是一个三维数组,让词云图的显示以图片上颜色为背景
background = np.array(Image.open(path.join(d, "logo.png")))
print('加载图片成功!')
wc = WordCloud(background_color="white", max_words=200, font_path=font, width=300, height=150, mask=background, max_font_size=300, min_font_size=20, margin=2).generate(text)
# 保存生成的词云图
wc.to_file('词云图.png')
plt.figure()
plt.imshow(wc)
plt.axis('off')
plt.show()
if __name__ == '__main__':
create_wordcloud('job_info')
4.jieba的简单使用介绍参考
jieba的简单使用