From 9e82782ed166144e95c072cc8e93412af0378c14 Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Wed, 20 Dec 2023 22:23:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B9=B4=E5=BA=A6=E6=8A=A5=E5=91=8A=E5=9C=A3?= =?UTF-8?q?=E8=AF=9E=E7=89=B9=E5=88=AB=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/__init__.py | 2 +- app/DataBase/msg.py | 183 ++++++++++------ app/analysis/analysis.py | 75 ++++++- app/person.py | 12 ++ app/ui/contact/contactInfo.py | 5 +- app/util/emoji.py | 30 +++ app/web_ui/templates/christmas.html | 324 ++++++++++++++++++++++++++++ app/web_ui/web.py | 97 ++++++--- main.py | 9 +- 9 files changed, 633 insertions(+), 104 deletions(-) create mode 100644 app/web_ui/templates/christmas.html diff --git a/app/DataBase/__init__.py b/app/DataBase/__init__.py index d805bc6..0207b6f 100644 --- a/app/DataBase/__init__.py +++ b/app/DataBase/__init__.py @@ -37,4 +37,4 @@ def init_db(): media_msg_db.init_database() -__all__ = ['output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db', 'MsgType', "media_msg_db"] +__all__ = ['output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db', 'MsgType', "media_msg_db","close_db"] diff --git a/app/DataBase/msg.py b/app/DataBase/msg.py index 7ae8daa..db3f09b 100644 --- a/app/DataBase/msg.py +++ b/app/DataBase/msg.py @@ -147,47 +147,53 @@ class Msg: # result.sort(key=lambda x: x[5]) return result - def get_messages_by_type(self, username_, type_, is_Annual_report_=False, year_='2023'): + def get_messages_by_type(self, username_, type_, year_='all'): if not self.open_flag: return None - if is_Annual_report_: + if year_ == 'all': sql = ''' - select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent - from MSG - where StrTalker=? and Type=? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ? - order by CreateTime - ''' + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent + from MSG + where StrTalker=? and Type=? + order by CreateTime + ''' + try: + lock.acquire(True) + self.cursor.execute(sql, [username_, type_]) + finally: + lock.release() + result = self.cursor.fetchall() else: sql = ''' - select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent - from MSG - where StrTalker=? and Type=? - order by CreateTime - ''' - try: - lock.acquire(True) - if is_Annual_report_: + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent + from MSG + where StrTalker=? and Type=? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ? + order by CreateTime + ''' + try: + lock.acquire(True) self.cursor.execute(sql, [username_, type_, year_]) - else: - self.cursor.execute(sql, [username_, type_]) - result = self.cursor.fetchall() - finally: - lock.release() + finally: + lock.release() + result = self.cursor.fetchall() return result - def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10): + def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10, year_='all'): if not self.open_flag: return None - sql = ''' + sql = f''' select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra from MSG where StrTalker=? and Type=1 and LENGTH(StrContent) list: """ 统计自己发的各类型消息条数,按条数降序,精确到subtype\n @@ -401,6 +448,28 @@ class Msg: lock.release() return result + def get_messages_number(self, username_, year_="all") -> int: + sql = f""" + SELECT Count(MsgSvrID) + from MSG + where StrTalker = ? + {"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""} + group by type, subtype + order by Count(MsgSvrID) desc + """ + result = None + if not self.open_flag: + return None + try: + lock.acquire(True) + self.cursor.execute(sql, [username_,year_] if year_ != "all" else [username_]) + result = self.cursor.fetchone() + except sqlite3.DatabaseError: + logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试') + finally: + lock.release() + return result[0] if result else 0 + def get_chatted_top_contacts(self, year_="all", contain_chatroom=False, top_n=10) -> list: """ 统计聊天最多的 n 个联系人(默认不包含群组),按条数降序\n @@ -429,7 +498,7 @@ class Msg: finally: lock.release() return result - + def get_send_messages_length(self, year_="all") -> int: """ 统计自己总共发消息的字数,包含type=1的文本和type=49,subtype=57里面自己发的文本 @@ -539,15 +608,5 @@ if __name__ == '__main__': db_path = "./app/database/Msg/MSG.db" msg = Msg() msg.init_database() - print(msg.get_chatted_top_contacts(year_="2023")) - print(msg.get_chatted_top_contacts(year_="2023", contain_chatroom=True, top_n=20)) - # result = msg.get_message_by_num("wxid_vtz9jk9ulzjt22", 9999999) - # print(result) - # result = msg.get_messages_by_type("wxid_vtz9jk9ulzjt22", 49) - # for r in result: - # type_ = r[2] - # sub_type = r[3] - # if type_ == 49 and sub_type == 57: - # print(r) - # print(r[-1]) - # break + print(msg.get_latest_time_of_message('wxid_0o18ef858vnu22', year_='2023')) + print(msg.get_messages_number('wxid_0o18ef858vnu22', year_='2023')) diff --git a/app/analysis/analysis.py b/app/analysis/analysis.py index 4de8f2d..71e68ea 100644 --- a/app/analysis/analysis.py +++ b/app/analysis/analysis.py @@ -20,7 +20,7 @@ wordcloud_height = 720 def wordcloud(wxid, is_Annual_report=False, year='2023', who='1'): import jieba - txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, is_Annual_report, year) + txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year) if not txt_messages: return { 'chart_data': None, @@ -72,9 +72,70 @@ def wordcloud(wxid, is_Annual_report=False, year='2023', who='1'): } -def calendar_chart(wxid, is_Annual_report=False, year='2023'): - calendar_data = msg_db.get_messages_by_days(wxid, is_Annual_report, year) +def wordcloud_christmas(wxid, year='2023'): + import jieba + txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year) + if not txt_messages: + return { + 'chart_data': None, + 'keyword': "没有聊天你想分析啥", + 'max_num': '0', + 'dialogs': [], + 'total_num': 0, + } + text = ''.join(map(lambda x: x[7], txt_messages)) + total_msg_len = len(text) + # 使用jieba进行分词,并加入停用词 + words = jieba.cut(text) + # 统计词频 + word_count = Counter(words) + # 过滤停用词 + stopwords_file = './app/data/stopwords.txt' + with open(stopwords_file, "r", encoding="utf-8") as stopword_file: + stopwords1 = set(stopword_file.read().splitlines()) + file = QFile(':/data/stopwords.txt') + stopwords = set() + if file.open(QIODevice.ReadOnly | QIODevice.Text): + stream = QTextStream(file) + stream.setCodec('utf-8') + content = stream.readAll() + file.close() + stopwords = set(content.splitlines()) + stopwords = stopwords.union(stopwords1) + filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords} + # 转换为词云数据格式 + data = [(word, count) for word, count in filtered_word_count.items()] + # text_data = data + data.sort(key=lambda x: x[1], reverse=True) + + text_data = data[:100] if len(data) > 100 else data + # 创建词云图 + keyword, max_num = text_data[0] + w = ( + WordCloud() + .add(series_name="聊天文字", data_pair=text_data, word_size_range=[5, 40]) + ) + # return w.render_embed() + dialogs = msg_db.get_messages_by_keyword(wxid, keyword, num=3, max_len=12,year_=year) + + return { + 'wordcloud_chart_data': w.dump_options_with_quotes(), + 'keyword': keyword, + 'keyword_max_num': str(max_num), + 'dialogs':dialogs , + 'total_num': total_msg_len, + } + + +def calendar_chart(wxid, is_Annual_report=False, year='2023'): + try: + calendar_data = msg_db.get_messages_by_days(wxid, is_Annual_report, year) + except: + return { + 'calendar_chart_data': None, + 'chat_days':0, + } if not calendar_data: return False min_ = min(map(lambda x: x[1], calendar_data)) @@ -89,20 +150,17 @@ def calendar_chart(wxid, is_Annual_report=False, year='2023'): calendar_days = (start_date_, end_date_) calendar_title = '和Ta的聊天情况' c = ( - Calendar(init_opts=opts.InitOpts(width=f"{charts_width}px", height=f"{charts_height}px")) + Calendar() .add( "", calendar_data, calendar_opts=opts.CalendarOpts(range_=calendar_days) ) .set_global_opts( - title_opts=opts.TitleOpts(title=calendar_title), visualmap_opts=opts.VisualMapOpts( max_=max_, min_=min_, orient="horizontal", - # is_piecewise=True, - # pos_top="200px", pos_bottom="0px", pos_left="0px", ), @@ -110,7 +168,8 @@ def calendar_chart(wxid, is_Annual_report=False, year='2023'): ) ) return { - 'chart_data': c + 'calendar_chart_data': c.dump_options_with_quotes(), + 'chat_days':len(calendar_data), } diff --git a/app/person.py b/app/person.py index 1b735e7..e1a49d2 100644 --- a/app/person.py +++ b/app/person.py @@ -38,6 +38,18 @@ class MePC: else: self.avatar.loadFromData(img_bytes, format='jfif') + def save_avatar(self, path=None): + if not self.avatar: + return + if path: + save_path = path + else: + os.makedirs('./data/avatar', exist_ok=True) + save_path = os.path.join(f'data/avatar/', self.wxid + '.png') + self.avatar_path = save_path + self.avatar.save(save_path) + print('保存头像', save_path) + class ContactPC: def __init__(self, contact_info: Dict): diff --git a/app/ui/contact/contactInfo.py b/app/ui/contact/contactInfo.py index d9651b6..ed17f49 100644 --- a/app/ui/contact/contactInfo.py +++ b/app/ui/contact/contactInfo.py @@ -6,7 +6,7 @@ from app.DataBase.output_pc import Output from app.ui.Icon import Icon from .contactInfoUi import Ui_Form from .userinfo import userinfo -from ...person import ContactPC +from ...person import ContactPC, MePC from .export_dialog import ExportDialog @@ -78,10 +78,11 @@ class ContactInfo(QWidget, Ui_Form): ) return self.contact.save_avatar() + MePC().save_avatar() self.report_thread = ReportThread(self.contact) self.report_thread.okSignal.connect(lambda x: QDesktopServices.openUrl(QUrl("http://127.0.0.1:21314"))) self.report_thread.start() - QDesktopServices.openUrl(QUrl("http://127.0.0.1:21314/")) + QDesktopServices.openUrl(QUrl("http://127.0.0.1:21314/christmas")) def emotionale_Analysis(self): if 'room' in self.contact.wxid: diff --git a/app/util/emoji.py b/app/util/emoji.py index 31b6c03..bbe618e 100644 --- a/app/util/emoji.py +++ b/app/util/emoji.py @@ -24,6 +24,7 @@ if not os.path.exists('./data'): if not os.path.exists(root_path): os.mkdir(root_path) + @log def get_image_format(header): # 定义图片格式的 magic numbers @@ -41,6 +42,7 @@ def get_image_format(header): # 如果无法识别格式,返回 None return None + @log def parser_xml(xml_string): assert type(xml_string) == str @@ -69,9 +71,11 @@ def parser_xml(xml_string): 'md5': (md5 if md5 else androidmd5).lower(), } + lock = threading.Lock() db_path = "./app/Database/Msg/Emotion.db" + class Emotion: def __init__(self): self.DB = None @@ -137,6 +141,7 @@ class Emotion: def __del__(self): self.close() + @log def download(url, output_dir, name, thumb=False): if not url: @@ -156,6 +161,31 @@ def download(url, output_dir, name, thumb=False): return output_path +def get_most_emoji(messages): + dic = {} + for msg in messages: + str_content = msg[7] + emoji_info = parser_xml(str_content) + md5 = emoji_info['md5'] + if not md5: + continue + try: + dic[md5][0] += 1 + except: + dic[md5] = [1, emoji_info] + md5_nums = [(num[0], key, num[1]) for key, num in dic.items()] + md5_nums.sort(key=lambda x: x[0],reverse=True) + if not md5_nums: + return '' + md5 = md5_nums[0][1] + num = md5_nums[0][0] + emoji_info = md5_nums[0][2] + url = emoji_info['cdnurl'] + if not url or url == "": + url = Emotion().get_emoji_url(md5, False) + return url, num + + def get_emoji(xml_string, thumb=True, output_path=root_path) -> str: try: emoji_info = parser_xml(xml_string) diff --git a/app/web_ui/templates/christmas.html b/app/web_ui/templates/christmas.html new file mode 100644 index 0000000..2cad95f --- /dev/null +++ b/app/web_ui/templates/christmas.html @@ -0,0 +1,324 @@ + + + + + + 微信年度聊天报告 + + + + + + + + +
+ +
+
+
+ Mountain Image + Top Left Light + Bottom Right Light + Left Bottom Image + Right Top Image + +
+
+ +
+
+
+ + {{my_nickname}} +
+ + Right Top Image +
+ + {{ta_nickname}} +
+
+
+

我们第一次聊天在

+

{{first_time}}

+

距今已有

+
+ + + + +
+
+ +
+
+
+
+
+ Mountain Image + Top Left Light + Bottom Right Light + 礼袋 + 圣诞老人 +
+
+
+
+
+
二〇二三
+
你们说的最多的是
+
“{{keyword}}”{{keyword_max_num}}
+ +
+
+ {% for dialog in dialogs %} +
{{dialog[0][3]}}
+ {% if dialog[0][0]==0: %} +
+ +
+ {% for p in dialog[0][2][:-1] %} + {{p}}{{keyword}} + {% endfor %} + {{dialog[0][2][-1]}} +
+
+
+
+ {{dialog[1][2]}} +
+ +
+ {% endif %} + {% if dialog[0][0]==1: %} +
+
+ {% for p in dialog[0][2][:-1] %} + {{p}}{{keyword}} + {% endfor %} + {{dialog[0][2][-1]}} +
+ +
+
+ +
+ {{dialog[1][2]}} +
+
+ {% endif %} + {% endfor %} +
+
+
+
+ Mountain Image + Top Left Light + Bottom Right Light + Left Bottom Image + Right Top Image + 圣诞老人 + Right Top Image +
+
{{latest_time}}
+
这么晚了你们还在聊天
+
那天一定有你们难忘的回忆
+

+
你们都是{{chat_time_label}}
+
{{chat_time}}
+
你们一共发送了{{chat_time_num}}条消息
+
+
+
+
{{latest_time}}
+ {% for dialog in latest_time_dialog %} + {% if dialog[0]==0: %} +
+ +
+ {{dialog[1]}} +
+
+ {% endif %} + {% if dialog[0]==1: %} +
+
+ {{dialog[1]}} +
+ +
+ {% endif %} + {% endfor %} +
+
+
+
+
+
+ Mountain Image + Top Left Light + Bottom Right Light + Left Bottom Image + 滑雪的小女孩 + Right Top Image + Right Top Image +
+
过去的一年里
+
你们一共发送了{{total_msg_num}}条消息
+
总计{{total_num}}
+

+
你们的聊天似乎没有规律
+
+ {{max_month}} + 一共发送了 + {{max_month_num}} + 条消息 +
+
对你的话说不完
+

+
+ {{min_month}} + 只有 + {{min_month_num}} + 条消息 +
+
有时候你们也想静静
+
+
+
+
+
+ Mountain Image + Top Left Light + Bottom Right Light + 包饺子 + Right Top Image +
+
{{year}}年
+
+ 我们有 + {{chat_days}} + 天在聊天 +
+
有你在的日子里
+
+ 都很有 + 意义! +
+

+
这一年
+
+ 一共发送了 + {{emoji_total_num}} + 个表情包 +
+
Ta最常用的表情包是
+ +
+ 一共 + {{emoji_num}}次 +
+
+
+
+
+
+

敬请期待

+
+ +
+ + + + + + + + + diff --git a/app/web_ui/web.py b/app/web_ui/web.py index 03211ac..9fe327f 100644 --- a/app/web_ui/web.py +++ b/app/web_ui/web.py @@ -6,6 +6,7 @@ from flask import Flask, render_template, send_file from app.DataBase import msg_db from app.analysis import analysis from app.person import ContactPC, MePC +from app.util.emoji import get_most_emoji app = Flask(__name__) @@ -19,6 +20,73 @@ def index(): return render_template("index.html") +@app.route("/christmas") +def christmas(): + # 渲染模板,并传递图表的 HTML 到模板中 + try: + first_message, first_time = msg_db.get_first_time_of_message(contact.wxid) + except TypeError: + first_time = '2023-01-01 00:00:00' + data = { + 'ta_avatar_path': contact.avatar_path, + 'my_avatar_path': MePC().avatar_path, + 'ta_nickname': contact.remark, + 'my_nickname': MePC().name, + 'first_time': first_time, + } + wordcloud_cloud_data = analysis.wordcloud_christmas(contact.wxid) + msg_data = msg_db.get_messages_by_hour(contact.wxid, year_="2023") + msg_data.sort(key=lambda x: x[1], reverse=True) + desc = { + '夜猫子': {'22:00', '23:00', '00:00', '01:00', '02:00', '03:00', '04:00', '05:00'}, + '正常作息': {'06:00', "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", + "17:00", "18:00", "19:00", "20:00", "21:00"}, + } + time_, num = msg_data[0] + chat_time = f"凌晨{time_}" if time_ in {'00:00', '01:00', '02:00', '03:00', '04:00', '05:00'} else time_ + label = '夜猫子' + for key, item in desc.items(): + if time_ in item: + label = key + latest_dialog = msg_db.get_latest_time_of_message(contact.wxid, year_='2023') + latest_time = latest_dialog[0][2] if latest_dialog else '' + time_data = { + 'latest_time': latest_time, + 'latest_time_dialog': latest_dialog, + 'chat_time_label': label, + 'chat_time': chat_time, + 'chat_time_num': num, + } + month_data = msg_db.get_messages_by_month(contact.wxid, True, year_='2023') + + if month_data: + month_data.sort(key=lambda x: x[1]) + max_month, max_num = month_data[-1] + min_month, min_num = month_data[0] + min_month = min_month[-2:].lstrip('0')+'月' + max_month = max_month[-2:].lstrip('0')+'月' + else: + max_month, max_num = '月份', 0 + min_month, min_num = '月份', 0 + month_data = { + 'year':'2023', + 'total_msg_num':msg_db.get_messages_number(contact.wxid,'2023'), + 'max_month':max_month, + 'min_month':min_month, + 'max_month_num':max_num, + 'min_month_num':min_num, + } + calendar_data = analysis.calendar_chart(contact.wxid,True,year='2023') + emoji_msgs = msg_db.get_messages_by_type(contact.wxid,47,year_='2023') + url,num = get_most_emoji(emoji_msgs) + emoji_data = { + 'emoji_total_num':len(emoji_msgs), + 'emoji_url':url, + 'emoji_num':num, + } + return render_template("christmas.html", **data, **wordcloud_cloud_data, **time_data,**month_data,**calendar_data,**emoji_data) + + # @app.route("/") # def cindex(): # # 渲染模板,并传递图表的 HTML 到模板中 @@ -37,24 +105,10 @@ def home(): 'nickname': contact.remark, 'first_time': first_time, } + return render_template('home.html', **data) -@app.route('/home') -def chome(): - try: - first_message, first_time = msg_db.get_first_time_of_message(contact.wxid) - except TypeError: - return set_text('咱就是说,一次都没聊过就别分析了') - data = { - 'sub_title': '二零二三年度报告', - 'avatar_path': contact.avatar_path, - 'nickname': contact.remark, - 'first_time': first_time, - } - return render_template('chome.html', **data) - - @app.route('/wordcloud//') def one(who): wxid = contact.wxid @@ -68,19 +122,6 @@ def one(who): return render_template('wordcloud.html', **world_cloud_data, who=who) -@app.route('/wordcloud') -def cone(): - # wxid = contact.wxid - # # wxid = 'wxid_lltzaezg38so22' - # # print('wxid:'+wxid) - # world_cloud_data = analysis.wordcloud(wxid, who=who) # 获取与Ta的对话数据 - # # print(world_cloud_data) - # who = "你" if who == '1' else "TA" - # with open('wordcloud.html', 'w', encoding='utf-8') as f: - # f.write(render_template('wordcloud.html', **world_cloud_data)) - return render_template('cwordcloud.html') - - def set_text(text): html = ''' diff --git a/main.py b/main.py index cdf200e..fcd605e 100644 --- a/main.py +++ b/main.py @@ -12,9 +12,11 @@ from app.log import logger from app.ui import mainview from app.ui.tool.pc_decrypt import pc_decrypt from app.config import version + ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID("WeChatReport") -QApplication.setAttribute(Qt.AA_EnableHighDpiScaling,True) -QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps,True) +QApplication.setAttribute(Qt.AA_EnableHighDpiScaling, True) +QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps, True) + class ViewController(QWidget): def __init__(self): @@ -57,7 +59,8 @@ class ViewController(QWidget): def close(self) -> bool: close_db() super().close() - + + if __name__ == '__main__': app = QApplication(sys.argv) font = QFont('微软雅黑', 12) # 使用 Times New Roman 字体,字体大小为 14