实现词云图

This commit is contained in:
shuaikangzhou 2023-12-03 21:25:50 +08:00
parent e281c08622
commit 8abd38d4bc
9 changed files with 210 additions and 12 deletions

View File

@ -13,9 +13,9 @@ from .micro_msg import MicroMsg
# from . import output # from . import output
from .misc import Misc from .misc import Misc
from .msg import Msg from .msg import Msg
from .msg import MsgType
misc_db = Misc() misc_db = Misc()
msg_db = Msg() msg_db = Msg()
micro_msg_db = MicroMsg() micro_msg_db = MicroMsg()
hard_link_db = HardLink() hard_link_db = HardLink()
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db'] __all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db','MsgType']

View File

@ -1,4 +1,5 @@
import os.path import os.path
import random
import sqlite3 import sqlite3
import threading import threading
import traceback import traceback
@ -27,7 +28,12 @@ def singleton(cls):
return inner return inner
@singleton class MsgType:
TEXT = 1
IMAGE = 3
EMOJI = 47
class Msg: class Msg:
def __init__(self): def __init__(self):
self.DB = None self.DB = None
@ -35,8 +41,11 @@ class Msg:
self.open_flag = False self.open_flag = False
self.init_database() self.init_database()
def init_database(self): def init_database(self, path=None):
global db_path
if not self.open_flag: if not self.open_flag:
if path:
db_path = path
if os.path.exists(db_path): if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False) self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标''' # '''创建游标'''
@ -102,6 +111,67 @@ class Msg:
# result.sort(key=lambda x: x[5]) # result.sort(key=lambda x: x[5])
return result return result
def get_messages_by_type(self, username_, type_):
if not self.open_flag:
return None
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where StrTalker=? and Type=?
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, type_])
result = self.cursor.fetchall()
finally:
lock.release()
return result
def get_messages_by_keyword(self, username_, keyword, num=5):
if not self.open_flag:
return None
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where StrTalker=? and Type=1 and StrContent like ?
order by CreateTime desc
'''
temp = []
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, f'%{keyword}%'])
messages = self.cursor.fetchall()
finally:
lock.release()
if len(messages) > 5:
messages = random.sample(messages, num)
try:
lock.acquire(True)
for msg in messages:
local_id = msg[0]
is_send = msg[4]
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where localId > ? and StrTalker=? and Type=1 and IsSender=?
limit 1
'''
self.cursor.execute(sql, [local_id, username_, 1 - is_send])
temp.append((msg, self.cursor.fetchone()))
finally:
lock.release()
res = []
for dialog in temp:
msg1 = dialog[0]
msg2 = dialog[1]
res.append((
(msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
(msg2[4], msg2[5], msg2[7], msg2[8])
))
return res
def close(self): def close(self):
if self.open_flag: if self.open_flag:
try: try:
@ -123,4 +193,7 @@ if __name__ == '__main__':
print(result) print(result)
print(result[-1][0]) print(result[-1][0])
local_id = result[-1][0] local_id = result[-1][0]
wxid = 'wxid_0o18ef858vnu22'
pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id)) pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id))
print(msg.get_messages_by_keyword(wxid, '干嘛'))
pprint(msg.get_messages_by_keyword(wxid, '干嘛')[0])

4
app/analysis/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .analysis import Analysis
__all__=['Analysis']

66
app/analysis/analysis.py Normal file
View File

@ -0,0 +1,66 @@
from collections import Counter
from app.DataBase import msg_db, MsgType
from app.person_pc import ContactPC
import jieba
from pyecharts import options as opts
from pyecharts.charts import Pie, WordCloud, Calendar, Bar, Line, Timeline, Grid
charts_width = 800
charts_height = 450
wordcloud_width = 780
wordcloud_height = 720
def wordcloud(wxid):
import jieba
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT)
text = ''.join(map(lambda x: x[7], txt_messages))
total_msg_len = len(text)
# 使用jieba进行分词并加入停用词
words = jieba.cut(text)
# 统计词频
word_count = Counter(words)
# 过滤停用词
stopwords_file = '../data/stopwords.txt'
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
stopwords = set(stopword_file.read().splitlines())
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
# 转换为词云数据格式
data = [(word, count) for word, count in filtered_word_count.items()]
# text_data = data
data.sort(key=lambda x: x[1], reverse=True)
text_data = data[:100] if len(data) > 100 else data
# 创建词云图
keyword, max_num = text_data[0]
w = (
WordCloud(init_opts=opts.InitOpts(width=f"{wordcloud_width}px", height=f"{wordcloud_height}px"))
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[20, 100])
.set_global_opts(
title_opts=opts.TitleOpts(
title=f"词云图", subtitle=f"总计{total_msg_len}",
title_textstyle_opts=opts.TextStyleOpts(font_size=23)
),
tooltip_opts=opts.TooltipOpts(is_show=True),
legend_opts=opts.LegendOpts(is_show=False)
)
)
# return w.render_embed()
return {
'chart_data': w.dump_options_with_quotes(),
'keyword': keyword,
'max_num': str(max_num),
'dialogs': msg_db.get_messages_by_keyword(wxid, keyword, num=5)
}
class Analysis:
pass
if __name__ == '__main__':
msg_db.init_database(path='../DataBase/Msg/MSG.db')
w = wordcloud('wxid_0o18ef858vnu22')
print(w)

View File

@ -1,4 +1,17 @@
wxid wxid
乡村 乡村
炸弹 炸弹
腹肌 腹肌
@ -2518,3 +2531,11 @@ sup
🙄
旺柴

View File

@ -29,9 +29,9 @@ def get_code(file_path):
code = dat_read[0] ^ pic_head[head_index] code = dat_read[0] ^ pic_head[head_index]
idf_code = dat_read[1] ^ code idf_code = dat_read[1] ^ code
head_index = head_index + 1 head_index = head_index + 1
# if idf_code == pic_head[head_index]: if idf_code == pic_head[head_index]:
# dat_file.close() dat_file.close()
return head_index, code return head_index, code
head_index = head_index + 1 head_index = head_index + 1
dat_file.close() dat_file.close()
print("not jpg, png, gif") print("not jpg, png, gif")
@ -64,9 +64,8 @@ def decode_dat(file_path, out_path):
with open(file_path, 'rb') as file_in: with open(file_path, 'rb') as file_in:
data = file_in.read() data = file_in.read()
# 对数据进行异或加密/解密 # 对数据进行异或加密/解密
encrypted_data = bytes([byte ^ decode_code for byte in data])
with open(file_outpath, 'wb') as file_out: with open(file_outpath, 'wb') as file_out:
file_out.write(encrypted_data) file_out.write(bytes([byte ^ decode_code for byte in data]))
print(file_path, '->', file_outpath) print(file_path, '->', file_outpath)
return file_outpath return file_outpath

View File

@ -1,3 +1,13 @@
# -*- coding: utf-8 -*-
"""
emoji.py
!!!声明
由于表情包并不属于个人并且其可能具有版权风险你只有浏览权没有拥有权
另外访问腾讯API可能会给腾讯服务器造成压力
所以禁止任何人以任何方式修改或间接修改该文件违者后果自负
"""
import os import os
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET

View File

@ -1,8 +1,13 @@
import json
from flask import Flask, render_template from flask import Flask, render_template
from pyecharts import options as opts from pyecharts import options as opts
from pyecharts.charts import Bar from pyecharts.charts import Bar
from pyecharts.globals import ThemeType from pyecharts.globals import ThemeType
from app.DataBase import msg_db
from app.analysis import analysis
app = Flask(__name__) app = Flask(__name__)
@ -25,7 +30,7 @@ def index():
@app.route("/index") @app.route("/index")
def index0(): def index0():
return render_template("index.html") return render_template("index1.html")
@app.route('/home') @app.route('/home')
@ -41,7 +46,26 @@ def home():
@app.route('/message_num') @app.route('/message_num')
def one(): def one():
return "1hello world" msg_db.init_database(path='../DataBase/Msg/MSG.db')
wxid = 'wxid_0o18ef858vnu22'
# wxid = 'wxid_8piw6sb4hvfm22'
wxid = 'wxid_lltzaezg38so22'
world_cloud_data = analysis.wordcloud(wxid)
# 创建一个简单的柱状图
with open('message_num_test.html','w',encoding='utf-8') as f:
f.write(render_template('message_num.html', **world_cloud_data))
return render_template('message_num.html', **world_cloud_data)
@app.route('/test')
def test():
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(["A", "B", "C", "D", "E"])
.add_yaxis("Series", [5, 20, 36, 10, 75])
.set_global_opts(title_opts=opts.TitleOpts(title="Flask and Pyecharts Interaction"))
)
return bar.dump_options_with_quotes()
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -288,10 +288,11 @@ python main.py
# 🏆致谢 # 🏆致谢
* PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump) * PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump)
* 我的得力助手:[ChatGPT](https://chat.openai.com/)
--- ---
> 说明:该项目仅可用于交流学习,禁止任何非法用途,创作者不承担任何责任🙄 > 声明:该项目有且仅有一个目的:留痕——我的数据我做主,前提是“我的数据”其次才是“我做主”,禁止任何人以任何形式将其用于任何非法用途,对于使用该程序所造成的任何后果,创作者不承担任何责任🙄
[![Star History Chart](https://api.star-history.com/svg?repos=LC044/WeChatMsg&type=Date)](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date) [![Star History Chart](https://api.star-history.com/svg?repos=LC044/WeChatMsg&type=Date)](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)