mirror of
https://github.com/LC044/WeChatMsg
synced 2025-02-21 01:52:35 +08:00
实现词云图
This commit is contained in:
parent
e281c08622
commit
8abd38d4bc
@ -13,9 +13,9 @@ from .micro_msg import MicroMsg
|
||||
# from . import output
|
||||
from .misc import Misc
|
||||
from .msg import Msg
|
||||
|
||||
from .msg import MsgType
|
||||
misc_db = Misc()
|
||||
msg_db = Msg()
|
||||
micro_msg_db = MicroMsg()
|
||||
hard_link_db = HardLink()
|
||||
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db']
|
||||
__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db','MsgType']
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os.path
|
||||
import random
|
||||
import sqlite3
|
||||
import threading
|
||||
import traceback
|
||||
@ -27,7 +28,12 @@ def singleton(cls):
|
||||
return inner
|
||||
|
||||
|
||||
@singleton
|
||||
class MsgType:
|
||||
TEXT = 1
|
||||
IMAGE = 3
|
||||
EMOJI = 47
|
||||
|
||||
|
||||
class Msg:
|
||||
def __init__(self):
|
||||
self.DB = None
|
||||
@ -35,8 +41,11 @@ class Msg:
|
||||
self.open_flag = False
|
||||
self.init_database()
|
||||
|
||||
def init_database(self):
|
||||
def init_database(self, path=None):
|
||||
global db_path
|
||||
if not self.open_flag:
|
||||
if path:
|
||||
db_path = path
|
||||
if os.path.exists(db_path):
|
||||
self.DB = sqlite3.connect(db_path, check_same_thread=False)
|
||||
# '''创建游标'''
|
||||
@ -102,6 +111,67 @@ class Msg:
|
||||
# result.sort(key=lambda x: x[5])
|
||||
return result
|
||||
|
||||
def get_messages_by_type(self, username_, type_):
|
||||
if not self.open_flag:
|
||||
return None
|
||||
sql = '''
|
||||
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
|
||||
from MSG
|
||||
where StrTalker=? and Type=?
|
||||
order by CreateTime
|
||||
'''
|
||||
try:
|
||||
lock.acquire(True)
|
||||
self.cursor.execute(sql, [username_, type_])
|
||||
result = self.cursor.fetchall()
|
||||
finally:
|
||||
lock.release()
|
||||
return result
|
||||
|
||||
def get_messages_by_keyword(self, username_, keyword, num=5):
|
||||
if not self.open_flag:
|
||||
return None
|
||||
sql = '''
|
||||
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
|
||||
from MSG
|
||||
where StrTalker=? and Type=1 and StrContent like ?
|
||||
order by CreateTime desc
|
||||
'''
|
||||
temp = []
|
||||
try:
|
||||
lock.acquire(True)
|
||||
self.cursor.execute(sql, [username_, f'%{keyword}%'])
|
||||
messages = self.cursor.fetchall()
|
||||
finally:
|
||||
lock.release()
|
||||
if len(messages) > 5:
|
||||
messages = random.sample(messages, num)
|
||||
try:
|
||||
lock.acquire(True)
|
||||
for msg in messages:
|
||||
local_id = msg[0]
|
||||
is_send = msg[4]
|
||||
sql = '''
|
||||
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
|
||||
from MSG
|
||||
where localId > ? and StrTalker=? and Type=1 and IsSender=?
|
||||
limit 1
|
||||
'''
|
||||
self.cursor.execute(sql, [local_id, username_, 1 - is_send])
|
||||
temp.append((msg, self.cursor.fetchone()))
|
||||
finally:
|
||||
lock.release()
|
||||
res = []
|
||||
for dialog in temp:
|
||||
msg1 = dialog[0]
|
||||
msg2 = dialog[1]
|
||||
res.append((
|
||||
(msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
|
||||
(msg2[4], msg2[5], msg2[7], msg2[8])
|
||||
))
|
||||
|
||||
return res
|
||||
|
||||
def close(self):
|
||||
if self.open_flag:
|
||||
try:
|
||||
@ -123,4 +193,7 @@ if __name__ == '__main__':
|
||||
print(result)
|
||||
print(result[-1][0])
|
||||
local_id = result[-1][0]
|
||||
wxid = 'wxid_0o18ef858vnu22'
|
||||
pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id))
|
||||
print(msg.get_messages_by_keyword(wxid, '干嘛'))
|
||||
pprint(msg.get_messages_by_keyword(wxid, '干嘛')[0])
|
||||
|
4
app/analysis/__init__.py
Normal file
4
app/analysis/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
from .analysis import Analysis
|
||||
|
||||
__all__=['Analysis']
|
66
app/analysis/analysis.py
Normal file
66
app/analysis/analysis.py
Normal file
@ -0,0 +1,66 @@
|
||||
from collections import Counter
|
||||
|
||||
from app.DataBase import msg_db, MsgType
|
||||
from app.person_pc import ContactPC
|
||||
import jieba
|
||||
from pyecharts import options as opts
|
||||
from pyecharts.charts import Pie, WordCloud, Calendar, Bar, Line, Timeline, Grid
|
||||
|
||||
charts_width = 800
|
||||
charts_height = 450
|
||||
wordcloud_width = 780
|
||||
wordcloud_height = 720
|
||||
|
||||
|
||||
def wordcloud(wxid):
|
||||
import jieba
|
||||
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT)
|
||||
text = ''.join(map(lambda x: x[7], txt_messages))
|
||||
total_msg_len = len(text)
|
||||
# 使用jieba进行分词,并加入停用词
|
||||
words = jieba.cut(text)
|
||||
# 统计词频
|
||||
word_count = Counter(words)
|
||||
# 过滤停用词
|
||||
stopwords_file = '../data/stopwords.txt'
|
||||
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
|
||||
stopwords = set(stopword_file.read().splitlines())
|
||||
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
|
||||
|
||||
# 转换为词云数据格式
|
||||
data = [(word, count) for word, count in filtered_word_count.items()]
|
||||
# text_data = data
|
||||
data.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
text_data = data[:100] if len(data) > 100 else data
|
||||
# 创建词云图
|
||||
keyword, max_num = text_data[0]
|
||||
w = (
|
||||
WordCloud(init_opts=opts.InitOpts(width=f"{wordcloud_width}px", height=f"{wordcloud_height}px"))
|
||||
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[20, 100])
|
||||
.set_global_opts(
|
||||
title_opts=opts.TitleOpts(
|
||||
title=f"词云图", subtitle=f"总计{total_msg_len}字",
|
||||
title_textstyle_opts=opts.TextStyleOpts(font_size=23)
|
||||
),
|
||||
tooltip_opts=opts.TooltipOpts(is_show=True),
|
||||
legend_opts=opts.LegendOpts(is_show=False)
|
||||
)
|
||||
)
|
||||
# return w.render_embed()
|
||||
return {
|
||||
'chart_data': w.dump_options_with_quotes(),
|
||||
'keyword': keyword,
|
||||
'max_num': str(max_num),
|
||||
'dialogs': msg_db.get_messages_by_keyword(wxid, keyword, num=5)
|
||||
}
|
||||
|
||||
|
||||
class Analysis:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
msg_db.init_database(path='../DataBase/Msg/MSG.db')
|
||||
w = wordcloud('wxid_0o18ef858vnu22')
|
||||
print(w)
|
@ -1,4 +1,17 @@
|
||||
wxid
|
||||
就
|
||||
说
|
||||
啥
|
||||
好
|
||||
干
|
||||
哦
|
||||
好
|
||||
嗯
|
||||
恩
|
||||
噢
|
||||
喔
|
||||
行
|
||||
拿
|
||||
乡村
|
||||
炸弹
|
||||
腹肌
|
||||
@ -2518,3 +2531,11 @@ sup
|
||||
他
|
||||
她
|
||||
它
|
||||
听
|
||||
哪
|
||||
想
|
||||
打
|
||||
🙄
|
||||
奥
|
||||
真
|
||||
旺柴
|
@ -29,9 +29,9 @@ def get_code(file_path):
|
||||
code = dat_read[0] ^ pic_head[head_index]
|
||||
idf_code = dat_read[1] ^ code
|
||||
head_index = head_index + 1
|
||||
# if idf_code == pic_head[head_index]:
|
||||
# dat_file.close()
|
||||
return head_index, code
|
||||
if idf_code == pic_head[head_index]:
|
||||
dat_file.close()
|
||||
return head_index, code
|
||||
head_index = head_index + 1
|
||||
dat_file.close()
|
||||
print("not jpg, png, gif")
|
||||
@ -64,9 +64,8 @@ def decode_dat(file_path, out_path):
|
||||
with open(file_path, 'rb') as file_in:
|
||||
data = file_in.read()
|
||||
# 对数据进行异或加密/解密
|
||||
encrypted_data = bytes([byte ^ decode_code for byte in data])
|
||||
with open(file_outpath, 'wb') as file_out:
|
||||
file_out.write(encrypted_data)
|
||||
file_out.write(bytes([byte ^ decode_code for byte in data]))
|
||||
print(file_path, '->', file_outpath)
|
||||
return file_outpath
|
||||
|
||||
|
@ -1,3 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
emoji.py
|
||||
|
||||
!!!声明:
|
||||
由于表情包并不属于个人,并且其可能具有版权风险,你只有浏览权没有拥有权
|
||||
另外访问腾讯API可能会给腾讯服务器造成压力
|
||||
所以禁止任何人以任何方式修改或间接修改该文件,违者后果自负
|
||||
"""
|
||||
|
||||
import os
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
@ -1,8 +1,13 @@
|
||||
import json
|
||||
|
||||
from flask import Flask, render_template
|
||||
from pyecharts import options as opts
|
||||
from pyecharts.charts import Bar
|
||||
from pyecharts.globals import ThemeType
|
||||
|
||||
from app.DataBase import msg_db
|
||||
from app.analysis import analysis
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@ -25,7 +30,7 @@ def index():
|
||||
|
||||
@app.route("/index")
|
||||
def index0():
|
||||
return render_template("index.html")
|
||||
return render_template("index1.html")
|
||||
|
||||
|
||||
@app.route('/home')
|
||||
@ -41,7 +46,26 @@ def home():
|
||||
|
||||
@app.route('/message_num')
|
||||
def one():
|
||||
return "1hello world"
|
||||
msg_db.init_database(path='../DataBase/Msg/MSG.db')
|
||||
wxid = 'wxid_0o18ef858vnu22'
|
||||
# wxid = 'wxid_8piw6sb4hvfm22'
|
||||
wxid = 'wxid_lltzaezg38so22'
|
||||
world_cloud_data = analysis.wordcloud(wxid)
|
||||
# 创建一个简单的柱状图
|
||||
with open('message_num_test.html','w',encoding='utf-8') as f:
|
||||
f.write(render_template('message_num.html', **world_cloud_data))
|
||||
return render_template('message_num.html', **world_cloud_data)
|
||||
|
||||
|
||||
@app.route('/test')
|
||||
def test():
|
||||
bar = (
|
||||
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
|
||||
.add_xaxis(["A", "B", "C", "D", "E"])
|
||||
.add_yaxis("Series", [5, 20, 36, 10, 75])
|
||||
.set_global_opts(title_opts=opts.TitleOpts(title="Flask and Pyecharts Interaction"))
|
||||
)
|
||||
return bar.dump_options_with_quotes()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -288,10 +288,11 @@ python main.py
|
||||
# 🏆致谢
|
||||
|
||||
* PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump)
|
||||
* 我的得力助手:[ChatGPT](https://chat.openai.com/)
|
||||
|
||||
---
|
||||
|
||||
> 说明:该项目仅可用于交流学习,禁止任何非法用途,创作者不承担任何责任🙄
|
||||
> 声明:该项目有且仅有一个目的:留痕——我的数据我做主,前提是“我的数据”其次才是“我做主”,禁止任何人以任何形式将其用于任何非法用途,对于使用该程序所造成的任何后果,创作者不承担任何责任🙄
|
||||
|
||||
[data:image/s3,"s3://crabby-images/cc172/cc172ac22cf3c28ef13617be9d5ce6867a917253" alt="Star History Chart"](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user