修复年度报告部分只有2023年数据的问题#374

This commit is contained in:
SiYuan 2024-02-18 14:15:19 +08:00
parent c76f5dde80
commit c40b85e631
3 changed files with 40 additions and 79 deletions

View File

@ -329,14 +329,16 @@ class Msg:
result = self.cursor.fetchall()
return result
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10, year_='all'):
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10,time_range=None, year_='all'):
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
from MSG
where StrTalker=? and Type=1 and LENGTH(StrContent)<? and StrContent like ?
{"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""}
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
order by CreateTime desc
'''
temp = []
@ -489,46 +491,30 @@ class Msg:
lock.release()
return result
def get_messages_by_hour(self, username_, year_='all'):
def get_messages_by_hour(self, username_, time_range=None,year_='all'):
result = []
if not self.open_flag:
return result
if year_ == 'all':
sql = '''
SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
where StrTalker = ?
)
group by hours
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
result = self.cursor.fetchall()
else:
sql = '''
SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
where StrTalker = ? and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?
)
group by hours
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, year_])
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
result = self.cursor.fetchall()
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
where StrTalker = ?
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
)
group by hours
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
result = self.cursor.fetchall()
return result
def get_first_time_of_message(self, username_=''):
@ -549,9 +535,11 @@ class Msg:
lock.release()
return result
def get_latest_time_of_message(self, username_='', year_='all'):
def get_latest_time_of_message(self, username_='', time_range=None,year_='all'):
if not self.open_flag:
return None
if time_range:
start_time, end_time = convert_to_timestamp(time_range)
sql = f'''
SELECT isSender,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,
strftime('%H:%M:%S', CreateTime,'unixepoch','localtime') as hour
@ -559,7 +547,7 @@ class Msg:
WHERE Type=1 AND
{'StrTalker = ? AND ' if username_ else f"'{username_}'=? AND "}
hour BETWEEN '00:00:00' AND '05:00:00'
{"and strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?" if year_ != "all" else ""}
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
ORDER BY hour DESC
LIMIT 20;
'''

View File

@ -114,9 +114,10 @@ def get_wordcloud(text):
}
def wordcloud_christmas(wxid, year='2023'):
def wordcloud_christmas(wxid,time_range=None, year='2023'):
import jieba
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, year)
txt_messages = msg_db.get_messages_by_type(wxid, MsgType.TEXT, time_range=time_range)
if not txt_messages:
return {
'wordcloud_chart_data': None,
@ -127,42 +128,14 @@ def wordcloud_christmas(wxid, year='2023'):
}
text = ''.join(map(lambda x: x[7], txt_messages))
total_msg_len = len(text)
# 使用jieba进行分词并加入停用词
words = jieba.cut(text)
# 统计词频
word_count = Counter(words)
# 过滤停用词
stopwords_file = './app/data/stopwords.txt'
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
stopwords1 = set(stopword_file.read().splitlines())
# 构建 FFmpeg 可执行文件的路径
stopwords = set()
stopwords_file = './app/resources/data/stopwords.txt'
if not os.path.exists(stopwords_file):
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
stopwords_file = os.path.join(resource_dir, 'app', 'resources', 'data', 'stopwords.txt')
with open(stopwords_file, "r", encoding="utf-8") as stopword_file:
stopwords = set(stopword_file.read().splitlines())
stopwords = stopwords.union(stopwords1)
filtered_word_count = {word: count for word, count in word_count.items() if len(word) > 1 and word not in stopwords}
# 转换为词云数据格式
data = [(word, count) for word, count in filtered_word_count.items()]
# text_data = data
data.sort(key=lambda x: x[1], reverse=True)
text_data = data[:100] if len(data) > 100 else data
# 创建词云图
keyword, max_num = text_data[0]
w = (
WordCloud()
.add(series_name="聊天文字", data_pair=text_data, word_size_range=[5, 40])
)
wordcloud_data = get_wordcloud(text)
# return w.render_embed()
dialogs = msg_db.get_messages_by_keyword(wxid, keyword, num=3, max_len=12, year_=year)
keyword = wordcloud_data.get('keyword')
max_num = wordcloud_data.get('keyword_max_num')
dialogs = msg_db.get_messages_by_keyword(wxid, keyword, num=3, max_len=12, time_range=time_range)
return {
'wordcloud_chart_data': w.dump_options_with_quotes(),
'wordcloud_chart_data': wordcloud_data.get('chart_data_wordcloud'),
'keyword': keyword,
'keyword_max_num': str(max_num),
'dialogs': dialogs,

View File

@ -94,8 +94,8 @@ def christmas(wxid):
'my_nickname': Me().name,
'first_time': first_time,
}
wordcloud_cloud_data = analysis.wordcloud_christmas(contact.wxid)
msg_data = msg_db.get_messages_by_hour(contact.wxid, year_="2023")
wordcloud_cloud_data = analysis.wordcloud_christmas(contact.wxid,time_range=time_range)
msg_data = msg_db.get_messages_by_hour(contact.wxid, time_range=time_range)
msg_data.sort(key=lambda x: x[1], reverse=True)
desc = {
'夜猫子': {'22:00', '23:00', '00:00', '01:00', '02:00', '03:00', '04:00', '05:00'},
@ -108,7 +108,7 @@ def christmas(wxid):
for key, item in desc.items():
if time_ in item:
label = key
latest_dialog = msg_db.get_latest_time_of_message(contact.wxid, year_='2023')
latest_dialog = msg_db.get_latest_time_of_message(contact.wxid, time_range=time_range)
latest_time = latest_dialog[0][2] if latest_dialog else ''
time_data = {
'latest_time': latest_time,