修复年度报告部分只有2023年数据的问题#374

2025-05-20 22:58:39 +08:00 · 2024-02-18 14:15:19 +08:00 · 2024-02-18 14:15:19 +08:00 · c40b85e631
commit c40b85e631
parent c76f5dde80
3 changed files with 40 additions and 79 deletions
--- a/app/DataBase/msg.py
+++ b/app/DataBase/msg.py
@ -329,14 +329,16 @@ class Msg:
                result = self.cursor.fetchall()
        return result

-    def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10, year_='all'):
+    def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10,time_range=None, year_='all'):
        if not self.open_flag:
            return None
+        if time_range:
+            start_time, end_time = convert_to_timestamp(time_range)
        sql = f'''
            select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
            from MSG
            where StrTalker=? and Type=1 and LENGTH(StrContent)<? and StrContent like ?
-            {"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""}
+            {'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
            order by CreateTime desc
        '''
        temp = []
@ -489,46 +491,30 @@ class Msg:
            lock.release()
        return result

-    def get_messages_by_hour(self, username_, year_='all'):
+    def get_messages_by_hour(self, username_, time_range=None,year_='all'):
        result = []
        if not self.open_flag:
            return result
-        if year_ == 'all':
-            sql = '''
-                SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
-                from (
-                    SELECT MsgSvrID, CreateTime
-                    FROM MSG
-                    where StrTalker = ?
-                )
-                group by hours
-            '''
-            try:
-                lock.acquire(True)
-                self.cursor.execute(sql, [username_])
-            except sqlite3.DatabaseError:
-                logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
-            finally:
-                lock.release()
-                result = self.cursor.fetchall()
-        else:
-            sql = '''
-                SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
-                from (
-                    SELECT MsgSvrID, CreateTime
-                    FROM MSG
-                    where StrTalker = ? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?
-                )
-                group by hours
-                '''
-            try:
-                lock.acquire(True)
-                self.cursor.execute(sql, [username_, year_])
-            except sqlite3.DatabaseError:
-                logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
-            finally:
-                lock.release()
-                result = self.cursor.fetchall()
+        if time_range:
+            start_time, end_time = convert_to_timestamp(time_range)
+        sql = f'''
+            SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
+            from (
+                SELECT MsgSvrID, CreateTime
+                FROM MSG
+                where StrTalker = ?
+                {'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
+            )
+            group by hours
+        '''
+        try:
+            lock.acquire(True)
+            self.cursor.execute(sql, [username_])
+        except sqlite3.DatabaseError:
+            logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
+        finally:
+            lock.release()
+            result = self.cursor.fetchall()
        return result

    def get_first_time_of_message(self, username_=''):
@ -549,9 +535,11 @@ class Msg:
            lock.release()
        return result

-    def get_latest_time_of_message(self, username_='', year_='all'):
+    def get_latest_time_of_message(self, username_='', time_range=None,year_='all'):
        if not self.open_flag:
            return None
+        if time_range:
+            start_time, end_time = convert_to_timestamp(time_range)
        sql = f'''
                SELECT isSender,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,
                strftime('%H:%M:%S', CreateTime,'unixepoch','localtime') as hour
@ -559,7 +547,7 @@ class Msg:
                WHERE Type=1 AND 
                {'StrTalker = ? AND ' if username_ else f"'{username_}'=? AND "} 
                hour BETWEEN '00:00:00' AND '05:00:00'
-                {"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""}
+                {'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
                ORDER BY hour DESC
                LIMIT 20;
            '''
--- a/app/analysis/analysis.py
+++ b/app/analysis/analysis.py
@ -114,9 +114,10 @@ def get_wordcloud(text):
    }


-def wordcloud_christmas(wxid, year='2023'):
+def wordcloud_christmas(wxid,time_range=None, year='2023'):
    import jieba
-    txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year)
+
+    txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, time_range=time_range)
    if not txt_messages:
        return {
            'wordcloud_chart_data': None,
@ -127,42 +128,14 @@ def wordcloud_christmas(wxid, year='2023'):
        }
    text = ''.join(map(lambda x: x[7], txt_messages))
    total_msg_len = len(text)
-    # 使用jieba进行分词，并加入停用词
-    words = jieba.cut(text)
-    # 统计词频
-    word_count = Counter(words)
-    # 过滤停用词
-    stopwords_file = './app/data/stopwords.txt'
-    with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
-        stopwords1 = set(stopword_file.read().splitlines())
-    # 构建 FFmpeg 可执行文件的路径
-    stopwords = set()
-    stopwords_file = './app/resources/data/stopwords.txt'
-    if not os.path.exists(stopwords_file):
-        resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
-        stopwords_file = os.path.join(resource_dir, 'app', 'resources', 'data', 'stopwords.txt')
-    with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
-        stopwords = set(stopword_file.read().splitlines())
-        stopwords = stopwords.union(stopwords1)
-
-    filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
-    # 转换为词云数据格式
-    data = [(word, count) for word, count in filtered_word_count.items()]
-    # text_data = data
-    data.sort(key=lambda x: x[1], reverse=True)
-
-    text_data = data[:100] if len(data) > 100 else data
-    # 创建词云图
-    keyword, max_num = text_data[0]
-    w = (
-        WordCloud()
-        .add(series_name="聊天文字", data_pair=text_data, word_size_range=[5, 40])
-    )
+    wordcloud_data = get_wordcloud(text)
    # return w.render_embed()
-    dialogs = msg_db.get_messages_by_keyword(wxid, keyword, num=3, max_len=12, year_=year)
+    keyword = wordcloud_data.get('keyword')
+    max_num = wordcloud_data.get('keyword_max_num')
+    dialogs = msg_db.get_messages_by_keyword(wxid, keyword, num=3, max_len=12, time_range=time_range)

    return {
-        'wordcloud_chart_data': w.dump_options_with_quotes(),
+        'wordcloud_chart_data': wordcloud_data.get('chart_data_wordcloud'),
        'keyword': keyword,
        'keyword_max_num': str(max_num),
        'dialogs': dialogs,
--- a/app/web_ui/web.py
+++ b/app/web_ui/web.py
@ -94,8 +94,8 @@ def christmas(wxid):
        'my_nickname': Me().name,
        'first_time': first_time,
    }
-    wordcloud_cloud_data = analysis.wordcloud_christmas(contact.wxid)
-    msg_data = msg_db.get_messages_by_hour(contact.wxid, year_="2023")
+    wordcloud_cloud_data = analysis.wordcloud_christmas(contact.wxid,time_range=time_range)
+    msg_data = msg_db.get_messages_by_hour(contact.wxid, time_range=time_range)
    msg_data.sort(key=lambda x: x[1], reverse=True)
    desc = {
        '夜猫子': {'22:00', '23:00', '00:00', '01:00', '02:00', '03:00', '04:00', '05:00'},
@ -108,7 +108,7 @@ def christmas(wxid):
    for key, item in desc.items():
        if time_ in item:
            label = key
-    latest_dialog = msg_db.get_latest_time_of_message(contact.wxid, year_='2023')
+    latest_dialog = msg_db.get_latest_time_of_message(contact.wxid, time_range=time_range)
    latest_time = latest_dialog[0][2] if latest_dialog else ''
    time_data = {
        'latest_time': latest_time,