mirror of
https://github.com/LC044/WeChatMsg
synced 2025-02-22 02:22:17 +08:00
实现词云图
This commit is contained in:
parent
e281c08622
commit
8abd38d4bc
@ -13,9 +13,9 @@ from .micro_msg import MicroMsg
|
|||||||
# from . import output
|
# from . import output
|
||||||
from .misc import Misc
|
from .misc import Misc
|
||||||
from .msg import Msg
|
from .msg import Msg
|
||||||
|
from .msg import MsgType
|
||||||
misc_db = Misc()
|
misc_db = Misc()
|
||||||
msg_db = Msg()
|
msg_db = Msg()
|
||||||
micro_msg_db = MicroMsg()
|
micro_msg_db = MicroMsg()
|
||||||
hard_link_db = HardLink()
|
hard_link_db = HardLink()
|
||||||
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db']
|
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db','MsgType']
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import os.path
|
import os.path
|
||||||
|
import random
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import threading
|
import threading
|
||||||
import traceback
|
import traceback
|
||||||
@ -27,7 +28,12 @@ def singleton(cls):
|
|||||||
return inner
|
return inner
|
||||||
|
|
||||||
|
|
||||||
@singleton
|
class MsgType:
|
||||||
|
TEXT = 1
|
||||||
|
IMAGE = 3
|
||||||
|
EMOJI = 47
|
||||||
|
|
||||||
|
|
||||||
class Msg:
|
class Msg:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.DB = None
|
self.DB = None
|
||||||
@ -35,8 +41,11 @@ class Msg:
|
|||||||
self.open_flag = False
|
self.open_flag = False
|
||||||
self.init_database()
|
self.init_database()
|
||||||
|
|
||||||
def init_database(self):
|
def init_database(self, path=None):
|
||||||
|
global db_path
|
||||||
if not self.open_flag:
|
if not self.open_flag:
|
||||||
|
if path:
|
||||||
|
db_path = path
|
||||||
if os.path.exists(db_path):
|
if os.path.exists(db_path):
|
||||||
self.DB = sqlite3.connect(db_path, check_same_thread=False)
|
self.DB = sqlite3.connect(db_path, check_same_thread=False)
|
||||||
# '''创建游标'''
|
# '''创建游标'''
|
||||||
@ -102,6 +111,67 @@ class Msg:
|
|||||||
# result.sort(key=lambda x: x[5])
|
# result.sort(key=lambda x: x[5])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def get_messages_by_type(self, username_, type_):
|
||||||
|
if not self.open_flag:
|
||||||
|
return None
|
||||||
|
sql = '''
|
||||||
|
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
|
||||||
|
from MSG
|
||||||
|
where StrTalker=? and Type=?
|
||||||
|
order by CreateTime
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
lock.acquire(True)
|
||||||
|
self.cursor.execute(sql, [username_, type_])
|
||||||
|
result = self.cursor.fetchall()
|
||||||
|
finally:
|
||||||
|
lock.release()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_messages_by_keyword(self, username_, keyword, num=5):
|
||||||
|
if not self.open_flag:
|
||||||
|
return None
|
||||||
|
sql = '''
|
||||||
|
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
|
||||||
|
from MSG
|
||||||
|
where StrTalker=? and Type=1 and StrContent like ?
|
||||||
|
order by CreateTime desc
|
||||||
|
'''
|
||||||
|
temp = []
|
||||||
|
try:
|
||||||
|
lock.acquire(True)
|
||||||
|
self.cursor.execute(sql, [username_, f'%{keyword}%'])
|
||||||
|
messages = self.cursor.fetchall()
|
||||||
|
finally:
|
||||||
|
lock.release()
|
||||||
|
if len(messages) > 5:
|
||||||
|
messages = random.sample(messages, num)
|
||||||
|
try:
|
||||||
|
lock.acquire(True)
|
||||||
|
for msg in messages:
|
||||||
|
local_id = msg[0]
|
||||||
|
is_send = msg[4]
|
||||||
|
sql = '''
|
||||||
|
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
|
||||||
|
from MSG
|
||||||
|
where localId > ? and StrTalker=? and Type=1 and IsSender=?
|
||||||
|
limit 1
|
||||||
|
'''
|
||||||
|
self.cursor.execute(sql, [local_id, username_, 1 - is_send])
|
||||||
|
temp.append((msg, self.cursor.fetchone()))
|
||||||
|
finally:
|
||||||
|
lock.release()
|
||||||
|
res = []
|
||||||
|
for dialog in temp:
|
||||||
|
msg1 = dialog[0]
|
||||||
|
msg2 = dialog[1]
|
||||||
|
res.append((
|
||||||
|
(msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
|
||||||
|
(msg2[4], msg2[5], msg2[7], msg2[8])
|
||||||
|
))
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.open_flag:
|
if self.open_flag:
|
||||||
try:
|
try:
|
||||||
@ -123,4 +193,7 @@ if __name__ == '__main__':
|
|||||||
print(result)
|
print(result)
|
||||||
print(result[-1][0])
|
print(result[-1][0])
|
||||||
local_id = result[-1][0]
|
local_id = result[-1][0]
|
||||||
|
wxid = 'wxid_0o18ef858vnu22'
|
||||||
pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id))
|
pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id))
|
||||||
|
print(msg.get_messages_by_keyword(wxid, '干嘛'))
|
||||||
|
pprint(msg.get_messages_by_keyword(wxid, '干嘛')[0])
|
||||||
|
4
app/analysis/__init__.py
Normal file
4
app/analysis/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
from .analysis import Analysis
|
||||||
|
|
||||||
|
__all__=['Analysis']
|
66
app/analysis/analysis.py
Normal file
66
app/analysis/analysis.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from app.DataBase import msg_db, MsgType
|
||||||
|
from app.person_pc import ContactPC
|
||||||
|
import jieba
|
||||||
|
from pyecharts import options as opts
|
||||||
|
from pyecharts.charts import Pie, WordCloud, Calendar, Bar, Line, Timeline, Grid
|
||||||
|
|
||||||
|
charts_width = 800
|
||||||
|
charts_height = 450
|
||||||
|
wordcloud_width = 780
|
||||||
|
wordcloud_height = 720
|
||||||
|
|
||||||
|
|
||||||
|
def wordcloud(wxid):
|
||||||
|
import jieba
|
||||||
|
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT)
|
||||||
|
text = ''.join(map(lambda x: x[7], txt_messages))
|
||||||
|
total_msg_len = len(text)
|
||||||
|
# 使用jieba进行分词,并加入停用词
|
||||||
|
words = jieba.cut(text)
|
||||||
|
# 统计词频
|
||||||
|
word_count = Counter(words)
|
||||||
|
# 过滤停用词
|
||||||
|
stopwords_file = '../data/stopwords.txt'
|
||||||
|
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
|
||||||
|
stopwords = set(stopword_file.read().splitlines())
|
||||||
|
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
|
||||||
|
|
||||||
|
# 转换为词云数据格式
|
||||||
|
data = [(word, count) for word, count in filtered_word_count.items()]
|
||||||
|
# text_data = data
|
||||||
|
data.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
text_data = data[:100] if len(data) > 100 else data
|
||||||
|
# 创建词云图
|
||||||
|
keyword, max_num = text_data[0]
|
||||||
|
w = (
|
||||||
|
WordCloud(init_opts=opts.InitOpts(width=f"{wordcloud_width}px", height=f"{wordcloud_height}px"))
|
||||||
|
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[20, 100])
|
||||||
|
.set_global_opts(
|
||||||
|
title_opts=opts.TitleOpts(
|
||||||
|
title=f"词云图", subtitle=f"总计{total_msg_len}字",
|
||||||
|
title_textstyle_opts=opts.TextStyleOpts(font_size=23)
|
||||||
|
),
|
||||||
|
tooltip_opts=opts.TooltipOpts(is_show=True),
|
||||||
|
legend_opts=opts.LegendOpts(is_show=False)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# return w.render_embed()
|
||||||
|
return {
|
||||||
|
'chart_data': w.dump_options_with_quotes(),
|
||||||
|
'keyword': keyword,
|
||||||
|
'max_num': str(max_num),
|
||||||
|
'dialogs': msg_db.get_messages_by_keyword(wxid, keyword, num=5)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Analysis:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
msg_db.init_database(path='../DataBase/Msg/MSG.db')
|
||||||
|
w = wordcloud('wxid_0o18ef858vnu22')
|
||||||
|
print(w)
|
@ -1,4 +1,17 @@
|
|||||||
wxid
|
wxid
|
||||||
|
就
|
||||||
|
说
|
||||||
|
啥
|
||||||
|
好
|
||||||
|
干
|
||||||
|
哦
|
||||||
|
好
|
||||||
|
嗯
|
||||||
|
恩
|
||||||
|
噢
|
||||||
|
喔
|
||||||
|
行
|
||||||
|
拿
|
||||||
乡村
|
乡村
|
||||||
炸弹
|
炸弹
|
||||||
腹肌
|
腹肌
|
||||||
@ -2518,3 +2531,11 @@ sup
|
|||||||
他
|
他
|
||||||
她
|
她
|
||||||
它
|
它
|
||||||
|
听
|
||||||
|
哪
|
||||||
|
想
|
||||||
|
打
|
||||||
|
🙄
|
||||||
|
奥
|
||||||
|
真
|
||||||
|
旺柴
|
@ -29,9 +29,9 @@ def get_code(file_path):
|
|||||||
code = dat_read[0] ^ pic_head[head_index]
|
code = dat_read[0] ^ pic_head[head_index]
|
||||||
idf_code = dat_read[1] ^ code
|
idf_code = dat_read[1] ^ code
|
||||||
head_index = head_index + 1
|
head_index = head_index + 1
|
||||||
# if idf_code == pic_head[head_index]:
|
if idf_code == pic_head[head_index]:
|
||||||
# dat_file.close()
|
dat_file.close()
|
||||||
return head_index, code
|
return head_index, code
|
||||||
head_index = head_index + 1
|
head_index = head_index + 1
|
||||||
dat_file.close()
|
dat_file.close()
|
||||||
print("not jpg, png, gif")
|
print("not jpg, png, gif")
|
||||||
@ -64,9 +64,8 @@ def decode_dat(file_path, out_path):
|
|||||||
with open(file_path, 'rb') as file_in:
|
with open(file_path, 'rb') as file_in:
|
||||||
data = file_in.read()
|
data = file_in.read()
|
||||||
# 对数据进行异或加密/解密
|
# 对数据进行异或加密/解密
|
||||||
encrypted_data = bytes([byte ^ decode_code for byte in data])
|
|
||||||
with open(file_outpath, 'wb') as file_out:
|
with open(file_outpath, 'wb') as file_out:
|
||||||
file_out.write(encrypted_data)
|
file_out.write(bytes([byte ^ decode_code for byte in data]))
|
||||||
print(file_path, '->', file_outpath)
|
print(file_path, '->', file_outpath)
|
||||||
return file_outpath
|
return file_outpath
|
||||||
|
|
||||||
|
@ -1,3 +1,13 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
emoji.py
|
||||||
|
|
||||||
|
!!!声明:
|
||||||
|
由于表情包并不属于个人,并且其可能具有版权风险,你只有浏览权没有拥有权
|
||||||
|
另外访问腾讯API可能会给腾讯服务器造成压力
|
||||||
|
所以禁止任何人以任何方式修改或间接修改该文件,违者后果自负
|
||||||
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
@ -1,8 +1,13 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
from flask import Flask, render_template
|
from flask import Flask, render_template
|
||||||
from pyecharts import options as opts
|
from pyecharts import options as opts
|
||||||
from pyecharts.charts import Bar
|
from pyecharts.charts import Bar
|
||||||
from pyecharts.globals import ThemeType
|
from pyecharts.globals import ThemeType
|
||||||
|
|
||||||
|
from app.DataBase import msg_db
|
||||||
|
from app.analysis import analysis
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -25,7 +30,7 @@ def index():
|
|||||||
|
|
||||||
@app.route("/index")
|
@app.route("/index")
|
||||||
def index0():
|
def index0():
|
||||||
return render_template("index.html")
|
return render_template("index1.html")
|
||||||
|
|
||||||
|
|
||||||
@app.route('/home')
|
@app.route('/home')
|
||||||
@ -41,7 +46,26 @@ def home():
|
|||||||
|
|
||||||
@app.route('/message_num')
|
@app.route('/message_num')
|
||||||
def one():
|
def one():
|
||||||
return "1hello world"
|
msg_db.init_database(path='../DataBase/Msg/MSG.db')
|
||||||
|
wxid = 'wxid_0o18ef858vnu22'
|
||||||
|
# wxid = 'wxid_8piw6sb4hvfm22'
|
||||||
|
wxid = 'wxid_lltzaezg38so22'
|
||||||
|
world_cloud_data = analysis.wordcloud(wxid)
|
||||||
|
# 创建一个简单的柱状图
|
||||||
|
with open('message_num_test.html','w',encoding='utf-8') as f:
|
||||||
|
f.write(render_template('message_num.html', **world_cloud_data))
|
||||||
|
return render_template('message_num.html', **world_cloud_data)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/test')
|
||||||
|
def test():
|
||||||
|
bar = (
|
||||||
|
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
|
||||||
|
.add_xaxis(["A", "B", "C", "D", "E"])
|
||||||
|
.add_yaxis("Series", [5, 20, 36, 10, 75])
|
||||||
|
.set_global_opts(title_opts=opts.TitleOpts(title="Flask and Pyecharts Interaction"))
|
||||||
|
)
|
||||||
|
return bar.dump_options_with_quotes()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -288,10 +288,11 @@ python main.py
|
|||||||
# 🏆致谢
|
# 🏆致谢
|
||||||
|
|
||||||
* PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump)
|
* PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump)
|
||||||
|
* 我的得力助手:[ChatGPT](https://chat.openai.com/)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
> 说明:该项目仅可用于交流学习,禁止任何非法用途,创作者不承担任何责任🙄
|
> 声明:该项目有且仅有一个目的:留痕——我的数据我做主,前提是“我的数据”其次才是“我做主”,禁止任何人以任何形式将其用于任何非法用途,对于使用该程序所造成的任何后果,创作者不承担任何责任🙄
|
||||||
|
|
||||||
[](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)
|
[](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user