From 9e82782ed166144e95c072cc8e93412af0378c14 Mon Sep 17 00:00:00 2001 From: shuaikangzhou <863909694@qq.com> Date: Wed, 20 Dec 2023 22:23:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B9=B4=E5=BA=A6=E6=8A=A5=E5=91=8A=E5=9C=A3?= =?UTF-8?q?=E8=AF=9E=E7=89=B9=E5=88=AB=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/__init__.py | 2 +- app/DataBase/msg.py | 183 ++++++++++------ app/analysis/analysis.py | 75 ++++++- app/person.py | 12 ++ app/ui/contact/contactInfo.py | 5 +- app/util/emoji.py | 30 +++ app/web_ui/templates/christmas.html | 324 ++++++++++++++++++++++++++++ app/web_ui/web.py | 97 ++++++--- main.py | 9 +- 9 files changed, 633 insertions(+), 104 deletions(-) create mode 100644 app/web_ui/templates/christmas.html diff --git a/app/DataBase/__init__.py b/app/DataBase/__init__.py index d805bc6..0207b6f 100644 --- a/app/DataBase/__init__.py +++ b/app/DataBase/__init__.py @@ -37,4 +37,4 @@ def init_db(): media_msg_db.init_database() -__all__ = ['output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db', 'MsgType', "media_msg_db"] +__all__ = ['output', 'misc_db', 'micro_msg_db', 'msg_db', 'hard_link_db', 'MsgType', "media_msg_db","close_db"] diff --git a/app/DataBase/msg.py b/app/DataBase/msg.py index 7ae8daa..db3f09b 100644 --- a/app/DataBase/msg.py +++ b/app/DataBase/msg.py @@ -147,47 +147,53 @@ class Msg: # result.sort(key=lambda x: x[5]) return result - def get_messages_by_type(self, username_, type_, is_Annual_report_=False, year_='2023'): + def get_messages_by_type(self, username_, type_, year_='all'): if not self.open_flag: return None - if is_Annual_report_: + if year_ == 'all': sql = ''' - select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent - from MSG - where StrTalker=? and Type=? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ? - order by CreateTime - ''' + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent + from MSG + where StrTalker=? and Type=? + order by CreateTime + ''' + try: + lock.acquire(True) + self.cursor.execute(sql, [username_, type_]) + finally: + lock.release() + result = self.cursor.fetchall() else: sql = ''' - select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent - from MSG - where StrTalker=? and Type=? - order by CreateTime - ''' - try: - lock.acquire(True) - if is_Annual_report_: + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent + from MSG + where StrTalker=? and Type=? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ? + order by CreateTime + ''' + try: + lock.acquire(True) self.cursor.execute(sql, [username_, type_, year_]) - else: - self.cursor.execute(sql, [username_, type_]) - result = self.cursor.fetchall() - finally: - lock.release() + finally: + lock.release() + result = self.cursor.fetchall() return result - def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10): + def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10, year_='all'): if not self.open_flag: return None - sql = ''' + sql = f''' select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra from MSG where StrTalker=? and Type=1 and LENGTH(StrContent) and StrContent like ? + {"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""} order by CreateTime desc ''' temp = [] try: lock.acquire(True) - self.cursor.execute(sql, [username_, max_len, f'%{keyword}%']) + self.cursor.execute(sql, [username_, max_len, f'%{keyword}%'] if year_ == "all" else [username_, max_len, + f'%{keyword}%', + year_]) messages = self.cursor.fetchall() finally: lock.release() @@ -222,6 +228,11 @@ class Msg: ('', '', ['', ''], ''), ('', '', '', '') )) + """ + 返回值为一个列表,每个列表元素是一个对话 + 每个对话是一个元组数据 + ('is_send','时间戳','以关键词为分割符的消息内容','格式化时间') + """ return res def get_contact(self, contacts): @@ -317,18 +328,11 @@ class Msg: # result.sort(key=lambda x: x[5]) return result - def get_messages_by_hour(self, username_, is_Annual_report_=False, year_='2023'): - if is_Annual_report_: - sql = ''' - SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID) - from ( - SELECT MsgSvrID, CreateTime - FROM MSG - where StrTalker = ? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ? - ) - group by hours - ''' - else: + def get_messages_by_hour(self, username_, year_='all'): + result = [] + if not self.open_flag: + return result + if year_ == 'all': sql = ''' SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID) from ( @@ -338,21 +342,32 @@ class Msg: ) group by hours ''' - result = None - if not self.open_flag: - return None - try: - lock.acquire(True) - if is_Annual_report_: - self.cursor.execute(sql, [username_, year_]) - else: + try: + lock.acquire(True) self.cursor.execute(sql, [username_]) - result = self.cursor.fetchall() - except sqlite3.DatabaseError: - logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试') - finally: - lock.release() - # result.sort(key=lambda x: x[5]) + except sqlite3.DatabaseError: + logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试') + finally: + lock.release() + result = self.cursor.fetchall() + else: + sql = ''' + SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID) + from ( + SELECT MsgSvrID, CreateTime + FROM MSG + where StrTalker = ? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ? + ) + group by hours + ''' + try: + lock.acquire(True) + self.cursor.execute(sql, [username_, year_]) + except sqlite3.DatabaseError: + logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试') + finally: + lock.release() + result = self.cursor.fetchall() return result def get_first_time_of_message(self, username_): @@ -373,6 +388,38 @@ class Msg: lock.release() return result + def get_latest_time_of_message(self, username_, year_='all'): + if not self.open_flag: + return None + sql = f''' + SELECT isSender,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime, + strftime('%H:%M:%S', CreateTime,'unixepoch','localtime') as hour + FROM MSG + WHERE StrTalker = ? AND Type=1 AND + hour BETWEEN '00:00:00' AND '05:00:00' + {"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""} + ORDER BY hour DESC + LIMIT 20; + ''' + try: + lock.acquire(True) + self.cursor.execute(sql, [username_, year_] if year_ != "all" else [username_]) + except sqlite3.DatabaseError: + logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试') + finally: + lock.release() + result = self.cursor.fetchall() + if not result: + return [] + res = [] + is_sender = result[0][0] + res.append(result[0]) + for msg in result[1:]: + if msg[0] != is_sender: + res.append(msg) + break + return res + def get_send_messages_type_number(self, year_="all") -> list: """ 统计自己发的各类型消息条数,按条数降序,精确到subtype\n @@ -401,6 +448,28 @@ class Msg: lock.release() return result + def get_messages_number(self, username_, year_="all") -> int: + sql = f""" + SELECT Count(MsgSvrID) + from MSG + where StrTalker = ? + {"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""} + group by type, subtype + order by Count(MsgSvrID) desc + """ + result = None + if not self.open_flag: + return None + try: + lock.acquire(True) + self.cursor.execute(sql, [username_,year_] if year_ != "all" else [username_]) + result = self.cursor.fetchone() + except sqlite3.DatabaseError: + logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试') + finally: + lock.release() + return result[0] if result else 0 + def get_chatted_top_contacts(self, year_="all", contain_chatroom=False, top_n=10) -> list: """ 统计聊天最多的 n 个联系人(默认不包含群组),按条数降序\n @@ -429,7 +498,7 @@ class Msg: finally: lock.release() return result - + def get_send_messages_length(self, year_="all") -> int: """ 统计自己总共发消息的字数,包含type=1的文本和type=49,subtype=57里面自己发的文本 @@ -539,15 +608,5 @@ if __name__ == '__main__': db_path = "./app/database/Msg/MSG.db" msg = Msg() msg.init_database() - print(msg.get_chatted_top_contacts(year_="2023")) - print(msg.get_chatted_top_contacts(year_="2023", contain_chatroom=True, top_n=20)) - # result = msg.get_message_by_num("wxid_vtz9jk9ulzjt22", 9999999) - # print(result) - # result = msg.get_messages_by_type("wxid_vtz9jk9ulzjt22", 49) - # for r in result: - # type_ = r[2] - # sub_type = r[3] - # if type_ == 49 and sub_type == 57: - # print(r) - # print(r[-1]) - # break + print(msg.get_latest_time_of_message('wxid_0o18ef858vnu22', year_='2023')) + print(msg.get_messages_number('wxid_0o18ef858vnu22', year_='2023')) diff --git a/app/analysis/analysis.py b/app/analysis/analysis.py index 4de8f2d..71e68ea 100644 --- a/app/analysis/analysis.py +++ b/app/analysis/analysis.py @@ -20,7 +20,7 @@ wordcloud_height = 720 def wordcloud(wxid, is_Annual_report=False, year='2023', who='1'): import jieba - txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, is_Annual_report, year) + txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year) if not txt_messages: return { 'chart_data': None, @@ -72,9 +72,70 @@ def wordcloud(wxid, is_Annual_report=False, year='2023', who='1'): } -def calendar_chart(wxid, is_Annual_report=False, year='2023'): - calendar_data = msg_db.get_messages_by_days(wxid, is_Annual_report, year) +def wordcloud_christmas(wxid, year='2023'): + import jieba + txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year) + if not txt_messages: + return { + 'chart_data': None, + 'keyword': "没有聊天你想分析啥", + 'max_num': '0', + 'dialogs': [], + 'total_num': 0, + } + text = ''.join(map(lambda x: x[7], txt_messages)) + total_msg_len = len(text) + # 使用jieba进行分词,并加入停用词 + words = jieba.cut(text) + # 统计词频 + word_count = Counter(words) + # 过滤停用词 + stopwords_file = './app/data/stopwords.txt' + with open(stopwords_file, "r", encoding="utf-8") as stopword_file: + stopwords1 = set(stopword_file.read().splitlines()) + file = QFile(':/data/stopwords.txt') + stopwords = set() + if file.open(QIODevice.ReadOnly | QIODevice.Text): + stream = QTextStream(file) + stream.setCodec('utf-8') + content = stream.readAll() + file.close() + stopwords = set(content.splitlines()) + stopwords = stopwords.union(stopwords1) + filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords} + # 转换为词云数据格式 + data = [(word, count) for word, count in filtered_word_count.items()] + # text_data = data + data.sort(key=lambda x: x[1], reverse=True) + + text_data = data[:100] if len(data) > 100 else data + # 创建词云图 + keyword, max_num = text_data[0] + w = ( + WordCloud() + .add(series_name="聊天文字", data_pair=text_data, word_size_range=[5, 40]) + ) + # return w.render_embed() + dialogs = msg_db.get_messages_by_keyword(wxid, keyword, num=3, max_len=12,year_=year) + + return { + 'wordcloud_chart_data': w.dump_options_with_quotes(), + 'keyword': keyword, + 'keyword_max_num': str(max_num), + 'dialogs':dialogs , + 'total_num': total_msg_len, + } + + +def calendar_chart(wxid, is_Annual_report=False, year='2023'): + try: + calendar_data = msg_db.get_messages_by_days(wxid, is_Annual_report, year) + except: + return { + 'calendar_chart_data': None, + 'chat_days':0, + } if not calendar_data: return False min_ = min(map(lambda x: x[1], calendar_data)) @@ -89,20 +150,17 @@ def calendar_chart(wxid, is_Annual_report=False, year='2023'): calendar_days = (start_date_, end_date_) calendar_title = '和Ta的聊天情况' c = ( - Calendar(init_opts=opts.InitOpts(width=f"{charts_width}px", height=f"{charts_height}px")) + Calendar() .add( "", calendar_data, calendar_opts=opts.CalendarOpts(range_=calendar_days) ) .set_global_opts( - title_opts=opts.TitleOpts(title=calendar_title), visualmap_opts=opts.VisualMapOpts( max_=max_, min_=min_, orient="horizontal", - # is_piecewise=True, - # pos_top="200px", pos_bottom="0px", pos_left="0px", ), @@ -110,7 +168,8 @@ def calendar_chart(wxid, is_Annual_report=False, year='2023'): ) ) return { - 'chart_data': c + 'calendar_chart_data': c.dump_options_with_quotes(), + 'chat_days':len(calendar_data), } diff --git a/app/person.py b/app/person.py index 1b735e7..e1a49d2 100644 --- a/app/person.py +++ b/app/person.py @@ -38,6 +38,18 @@ class MePC: else: self.avatar.loadFromData(img_bytes, format='jfif') + def save_avatar(self, path=None): + if not self.avatar: + return + if path: + save_path = path + else: + os.makedirs('./data/avatar', exist_ok=True) + save_path = os.path.join(f'data/avatar/', self.wxid + '.png') + self.avatar_path = save_path + self.avatar.save(save_path) + print('保存头像', save_path) + class ContactPC: def __init__(self, contact_info: Dict): diff --git a/app/ui/contact/contactInfo.py b/app/ui/contact/contactInfo.py index d9651b6..ed17f49 100644 --- a/app/ui/contact/contactInfo.py +++ b/app/ui/contact/contactInfo.py @@ -6,7 +6,7 @@ from app.DataBase.output_pc import Output from app.ui.Icon import Icon from .contactInfoUi import Ui_Form from .userinfo import userinfo -from ...person import ContactPC +from ...person import ContactPC, MePC from .export_dialog import ExportDialog @@ -78,10 +78,11 @@ class ContactInfo(QWidget, Ui_Form): ) return self.contact.save_avatar() + MePC().save_avatar() self.report_thread = ReportThread(self.contact) self.report_thread.okSignal.connect(lambda x: QDesktopServices.openUrl(QUrl("http://127.0.0.1:21314"))) self.report_thread.start() - QDesktopServices.openUrl(QUrl("http://127.0.0.1:21314/")) + QDesktopServices.openUrl(QUrl("http://127.0.0.1:21314/christmas")) def emotionale_Analysis(self): if 'room' in self.contact.wxid: diff --git a/app/util/emoji.py b/app/util/emoji.py index 31b6c03..bbe618e 100644 --- a/app/util/emoji.py +++ b/app/util/emoji.py @@ -24,6 +24,7 @@ if not os.path.exists('./data'): if not os.path.exists(root_path): os.mkdir(root_path) + @log def get_image_format(header): # 定义图片格式的 magic numbers @@ -41,6 +42,7 @@ def get_image_format(header): # 如果无法识别格式,返回 None return None + @log def parser_xml(xml_string): assert type(xml_string) == str @@ -69,9 +71,11 @@ def parser_xml(xml_string): 'md5': (md5 if md5 else androidmd5).lower(), } + lock = threading.Lock() db_path = "./app/Database/Msg/Emotion.db" + class Emotion: def __init__(self): self.DB = None @@ -137,6 +141,7 @@ class Emotion: def __del__(self): self.close() + @log def download(url, output_dir, name, thumb=False): if not url: @@ -156,6 +161,31 @@ def download(url, output_dir, name, thumb=False): return output_path +def get_most_emoji(messages): + dic = {} + for msg in messages: + str_content = msg[7] + emoji_info = parser_xml(str_content) + md5 = emoji_info['md5'] + if not md5: + continue + try: + dic[md5][0] += 1 + except: + dic[md5] = [1, emoji_info] + md5_nums = [(num[0], key, num[1]) for key, num in dic.items()] + md5_nums.sort(key=lambda x: x[0],reverse=True) + if not md5_nums: + return '' + md5 = md5_nums[0][1] + num = md5_nums[0][0] + emoji_info = md5_nums[0][2] + url = emoji_info['cdnurl'] + if not url or url == "": + url = Emotion().get_emoji_url(md5, False) + return url, num + + def get_emoji(xml_string, thumb=True, output_path=root_path) -> str: try: emoji_info = parser_xml(xml_string) diff --git a/app/web_ui/templates/christmas.html b/app/web_ui/templates/christmas.html new file mode 100644 index 0000000..2cad95f --- /dev/null +++ b/app/web_ui/templates/christmas.html @@ -0,0 +1,324 @@ + + +
+ + +我们第一次聊天在
+{{first_time}}
+距今已有
+