实现词云图

2025-02-22 02:22:17 +08:00 · 2023-12-03 21:25:50 +08:00 · 2023-12-03 21:25:50 +08:00 · 8abd38d4bc
commit 8abd38d4bc
parent e281c08622
9 changed files with 210 additions and 12 deletions
--- a/app/DataBase/init.py
+++ b/app/DataBase/init.py
@ -13,9 +13,9 @@ from .micro_msg import MicroMsg
 # from . import output
 from .misc import Misc
 from .msg import Msg
-
+from .msg import MsgType
 misc_db = Misc()
 msg_db = Msg()
 micro_msg_db = MicroMsg()
 hard_link_db = HardLink()
-__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db']
+__all__ = ["data", 'output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db','MsgType']
--- a/app/DataBase/msg.py
+++ b/app/DataBase/msg.py
@ -1,4 +1,5 @@
 import os.path
 import random
 import sqlite3
 import threading
 import traceback
@ -27,7 +28,12 @@ def singleton(cls):
    return inner
-@singleton
+class MsgType:
    TEXT = 1
    IMAGE = 3
    EMOJI = 47
 class Msg:
    def __init__(self):
        self.DB = None
@ -35,8 +41,11 @@ class Msg:
        self.open_flag = False
        self.init_database()
-    def init_database(self):
+    def init_database(self, path=None):
        global db_path
        if not self.open_flag:
            if path:
                db_path = path
            if os.path.exists(db_path):
                self.DB = sqlite3.connect(db_path, check_same_thread=False)
                # '''创建游标'''
@ -102,6 +111,67 @@ class Msg:
        # result.sort(key=lambda x: x[5])
        return result
    def get_messages_by_type(self, username_, type_):
        if not self.open_flag:
            return None
        sql = '''
            select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
            from MSG
            where StrTalker=? and Type=?
            order by CreateTime
        '''
        try:
            lock.acquire(True)
            self.cursor.execute(sql, [username_, type_])
            result = self.cursor.fetchall()
        finally:
            lock.release()
        return result
    def get_messages_by_keyword(self, username_, keyword, num=5):
        if not self.open_flag:
            return None
        sql = '''
            select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
            from MSG
            where StrTalker=? and Type=1 and StrContent like ?
            order by CreateTime desc
        '''
        temp = []
        try:
            lock.acquire(True)
            self.cursor.execute(sql, [username_, f'%{keyword}%'])
            messages = self.cursor.fetchall()
        finally:
            lock.release()
        if len(messages) > 5:
            messages = random.sample(messages, num)
        try:
            lock.acquire(True)
            for msg in messages:
                local_id = msg[0]
                is_send = msg[4]
                sql = '''
                    select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
                    from MSG
                    where localId > ? and StrTalker=? and Type=1 and IsSender=?
                    limit 1
                '''
                self.cursor.execute(sql, [local_id, username_, 1 - is_send])
                temp.append((msg, self.cursor.fetchone()))
        finally:
            lock.release()
        res = []
        for dialog in temp:
            msg1 = dialog[0]
            msg2 = dialog[1]
            res.append((
                (msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
                (msg2[4], msg2[5], msg2[7], msg2[8])
            ))
        return res
    def close(self):
        if self.open_flag:
            try:
@ -123,4 +193,7 @@ if __name__ == '__main__':
    print(result)
    print(result[-1][0])
    local_id = result[-1][0]
    wxid = 'wxid_0o18ef858vnu22'
    pprint(msg.get_message_by_num('wxid_0o18ef858vnu22', local_id))
    print(msg.get_messages_by_keyword(wxid, '干嘛'))
    pprint(msg.get_messages_by_keyword(wxid, '干嘛')[0])
--- a/app/analysis/init.py
+++ b/app/analysis/init.py
@ -0,0 +1,4 @@
 from .analysis import Analysis
 __all__=['Analysis']
--- a/app/analysis/analysis.py
+++ b/app/analysis/analysis.py
@ -0,0 +1,66 @@
 from collections import Counter
 from app.DataBase import msg_db, MsgType
 from app.person_pc import ContactPC
 import jieba
 from pyecharts import options as opts
 from pyecharts.charts import Pie, WordCloud, Calendar, Bar, Line, Timeline, Grid
 charts_width = 800
 charts_height = 450
 wordcloud_width = 780
 wordcloud_height = 720
 def wordcloud(wxid):
    import jieba
    txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT)
    text = ''.join(map(lambda x: x[7], txt_messages))
    total_msg_len = len(text)
    # 使用jieba进行分词，并加入停用词
    words = jieba.cut(text)
    # 统计词频
    word_count = Counter(words)
    # 过滤停用词
    stopwords_file = '../data/stopwords.txt'
    with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
        stopwords = set(stopword_file.read().splitlines())
    filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
    # 转换为词云数据格式
    data = [(word, count) for word, count in filtered_word_count.items()]
    # text_data = data
    data.sort(key=lambda x: x[1], reverse=True)
    text_data = data[:100] if len(data) > 100 else data
    # 创建词云图
    keyword, max_num = text_data[0]
    w = (
        WordCloud(init_opts=opts.InitOpts(width=f"{wordcloud_width}px", height=f"{wordcloud_height}px"))
        .add(series_name="聊天文字", data_pair=text_data, word_size_range=[20, 100])
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title=f"词云图", subtitle=f"总计{total_msg_len}字",
                title_textstyle_opts=opts.TextStyleOpts(font_size=23)
            ),
            tooltip_opts=opts.TooltipOpts(is_show=True),
            legend_opts=opts.LegendOpts(is_show=False)
        )
    )
    # return w.render_embed()
    return {
        'chart_data': w.dump_options_with_quotes(),
        'keyword': keyword,
        'max_num': str(max_num),
        'dialogs': msg_db.get_messages_by_keyword(wxid, keyword, num=5)
    }
 class Analysis:
    pass
 if __name__ == '__main__':
    msg_db.init_database(path='../DataBase/Msg/MSG.db')
    w = wordcloud('wxid_0o18ef858vnu22')
    print(w)
--- a/app/data/stopwords.txt
+++ b/app/data/stopwords.txt
@ -1,4 +1,17 @@
 wxid
 就
 说
 啥
 好
 干
 哦
 好
 嗯
 恩
 噢
 喔
 行
 拿
 乡村
 炸弹
 腹肌
@ -2518,3 +2531,11 @@ sup
 他
 她
 它
 听
 哪
 想
 打
 🙄
 奥
 真
 旺柴
--- a/app/util/dat2pic.py
+++ b/app/util/dat2pic.py
@ -29,9 +29,9 @@ def get_code(file_path):
        code = dat_read[0] ^ pic_head[head_index]
        idf_code = dat_read[1] ^ code
        head_index = head_index + 1
-        # if idf_code == pic_head[head_index]:
+        if idf_code == pic_head[head_index]:
-        #     dat_file.close()
+            dat_file.close()
-        return head_index, code
+            return head_index, code
        head_index = head_index + 1
    dat_file.close()
    print("not jpg, png, gif")
@ -64,9 +64,8 @@ def decode_dat(file_path, out_path):
    with open(file_path, 'rb') as file_in:
        data = file_in.read()
    # 对数据进行异或加密/解密
    encrypted_data = bytes([byte ^ decode_code for byte in data])
    with open(file_outpath, 'wb') as file_out:
-        file_out.write(encrypted_data)
+        file_out.write(bytes([byte ^ decode_code for byte in data]))
    print(file_path, '->', file_outpath)
    return file_outpath
--- a/app/util/emoji.py
+++ b/app/util/emoji.py
@ -1,3 +1,13 @@
 # -*- coding: utf-8 -*-
 """
 emoji.py
 !!!声明：
 由于表情包并不属于个人，并且其可能具有版权风险，你只有浏览权没有拥有权
 另外访问腾讯API可能会给腾讯服务器造成压力
 所以禁止任何人以任何方式修改或间接修改该文件，违者后果自负
 """
 import os
 import xml.etree.ElementTree as ET
--- a/app/web_ui/web.py
+++ b/app/web_ui/web.py
@ -1,8 +1,13 @@
 import json
 from flask import Flask, render_template
 from pyecharts import options as opts
 from pyecharts.charts import Bar
 from pyecharts.globals import ThemeType
 from app.DataBase import msg_db
 from app.analysis import analysis
 app = Flask(__name__)
@ -25,7 +30,7 @@ def index():
@app.route("/index")
 def index0():
-    return render_template("index.html")
+    return render_template("index1.html")
@app.route('/home')
@ -41,7 +46,26 @@ def home():
@app.route('/message_num')
 def one():
-    return "1hello world"
+    msg_db.init_database(path='../DataBase/Msg/MSG.db')
    wxid = 'wxid_0o18ef858vnu22'
    # wxid = 'wxid_8piw6sb4hvfm22'
    wxid = 'wxid_lltzaezg38so22'
    world_cloud_data = analysis.wordcloud(wxid)
    # 创建一个简单的柱状图
    with open('message_num_test.html','w',encoding='utf-8') as f:
        f.write(render_template('message_num.html', **world_cloud_data))
    return render_template('message_num.html', **world_cloud_data)
@app.route('/test')
 def test():
    bar = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
        .add_xaxis(["A", "B", "C", "D", "E"])
        .add_yaxis("Series", [5, 20, 36, 10, 75])
        .set_global_opts(title_opts=opts.TitleOpts(title="Flask and Pyecharts Interaction"))
    )
    return bar.dump_options_with_quotes()
 if __name__ == "__main__":
--- a/readme.md
+++ b/readme.md
@ -288,10 +288,11 @@ python main.py
 # 🏆致谢
 * PC微信解密工具:[https://github.com/xaoyaoo/PyWxDump](https://github.com/xaoyaoo/PyWxDump)
 * 我的得力助手:[ChatGPT](https://chat.openai.com/)
 ---
-> 说明：该项目仅可用于交流学习，禁止任何非法用途，创作者不承担任何责任🙄
+> 声明：该项目有且仅有一个目的：留痕——我的数据我做主，前提是“我的数据”其次才是“我做主”，禁止任何人以任何形式将其用于任何非法用途，对于使用该程序所造成的任何后果，创作者不承担任何责任🙄
 [![Star History Chart](https://api.star-history.com/svg?repos=LC044/WeChatMsg&type=Date)](https://star-history.com/?utm_source=bestxtools.com#LC044/WeChatMsg&Date)
		`@ -0,0 +1,4 @@`

							`from .analysis import Analysis`

							`__all__=['Analysis']`
 wxid
+就
+说
+啥
+好
+干
+哦
+好
+嗯
+恩
+噢
+喔
+行
+拿
 乡村
 炸弹
 腹肌
 他
 她
 它
+听
+哪
+想
+打
+🙄
+奥
+真
+旺柴