From c3c604c6fb95198d532c784c525261f8326b721c Mon Sep 17 00:00:00 2001
From: Sawyer <35255629+shaoyie@users.noreply.github.com>
Date: Mon, 11 Dec 2023 21:17:29 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E5=8E=8B=E7=BC=A9?=
=?UTF-8?q?=E5=86=85=E5=AE=B9=E8=A7=A3=E6=9E=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
app/DataBase/msg.py | 77 ++++++++++++++++++++++++++++++++++-----
requirements.txt | Bin 936 -> 946 bytes
requirements_decrypt.txt | 3 +-
requirements_pc.txt | 1 +
4 files changed, 71 insertions(+), 10 deletions(-)
diff --git a/app/DataBase/msg.py b/app/DataBase/msg.py
index d6d616b..a02d564 100644
--- a/app/DataBase/msg.py
+++ b/app/DataBase/msg.py
@@ -1,9 +1,13 @@
import os.path
import random
+import html
import sqlite3
import threading
import traceback
from pprint import pprint
+import lz4.block
+import html
+import re
from app.log import logger
@@ -25,6 +29,61 @@ def singleton(cls):
return inner
+def decompress_CompressContent(data):
+ """
+ 解压缩Msg:CompressContent内容
+ :param data:
+ :return:
+ """
+ if data is None or not isinstance(data, bytes):
+ return None
+
+ try:
+ dst = lz4.block.decompress(data, uncompressed_size=len(data) << 10)
+ decoded_string = dst.decode().replace('\x00', '') # Remove any null characters
+ except lz4.block.LZ4BlockError:
+ print("Decompression failed: potentially corrupt input or insufficient buffer size.")
+ return None
+
+ # 处理 HTML 转义字符串如 > 等。可能会递归嵌套,我们只考虑原会话和第一级引用会话,不考虑更深的引用,故只执行两遍。
+ uncompressed_data = html.unescape(decoded_string)
+ uncompressed_data = html.unescape(uncompressed_data)
+
+ return uncompressed_data
+
+def transferMessages(messages, compress_content_column=-1, content_column=7):
+ """
+ 将 MSG 中压缩的聊天内容(包含引用的聊天),解压后,以简单形式放入 content (只取前两级会话主题)
+ :param compress_content_column: 压缩聊天所在列,-1 则为最后一列
+ :param content_column: 聊天内容所在列
+ :return:
+ """
+ new_messages = []
+ for row in messages:
+ mutable_row = list(row)
+ type = row[2]
+ sub_type = row[3]
+ addition_idx = len(mutable_row) - 1 if compress_content_column == -1 else compress_content_column
+
+ if type == 49 and sub_type == 57 and mutable_row[addition_idx] is not None:
+ decoded_string = decompress_CompressContent(mutable_row[addition_idx])
+
+ # 使用正则表达式查找所有的
标签内容
+ title_regex = r'(.*?)'
+ titles = re.findall(title_regex, decoded_string)
+
+ if len(titles) >= 2:
+ # 如果找到了至少两个 title,就把他们结合起来
+ decoded_string = titles[0] + '
引用:' + titles[1]
+ # 否则,如果只找到一个 title,就只保留这一个
+ elif len(titles) == 1:
+ decoded_string = titles[0]
+
+ mutable_row[content_column] = decoded_string
+ row = tuple(mutable_row)
+ new_messages.append(row)
+ return new_messages
+
class MsgType:
TEXT = 1
@@ -56,7 +115,7 @@ class Msg:
if not self.open_flag:
return None
sql = '''
- select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
+ select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
where StrTalker=?
order by CreateTime
@@ -68,11 +127,11 @@ class Msg:
finally:
lock.release()
result.sort(key=lambda x: x[5])
- return result
+ return transferMessages(result)
def get_messages_all(self):
sql = '''
- select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,StrTalker,Reserved1
+ select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,StrTalker,Reserved1,CompressContent
from MSG
order by CreateTime
'''
@@ -85,7 +144,7 @@ class Msg:
finally:
lock.release()
result.sort(key=lambda x: x[5])
- return result
+ return transferMessages(result)
def get_messages_length(self):
sql = '''
@@ -106,7 +165,7 @@ class Msg:
def get_message_by_num(self, username_, local_id):
sql = '''
- select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
+ select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
where StrTalker = ? and localId < ?
order by CreateTime desc
@@ -124,21 +183,21 @@ class Msg:
finally:
lock.release()
# result.sort(key=lambda x: x[5])
- return result
+ return transferMessages(result)
def get_messages_by_type(self, username_, type_, is_Annual_report_=False, year_='2023'):
if not self.open_flag:
return None
if is_Annual_report_:
sql = '''
- select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
+ select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
where StrTalker=? and Type=? and strftime('%Y',CreateTime,'unixepoch','localtime') = ?
order by CreateTime
'''
else:
sql = '''
- select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
+ select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
where StrTalker=? and Type=?
order by CreateTime
@@ -152,7 +211,7 @@ class Msg:
result = self.cursor.fetchall()
finally:
lock.release()
- return result
+ return transferMessages(result)
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10):
if not self.open_flag:
diff --git a/requirements.txt b/requirements.txt
index 917bb83aebb523af977f0a8f8a89970e7f0c1b27..f596530a83ac4e6a61094c33a6165dd1cf27f3dd 100644
GIT binary patch
delta 18
ZcmZ3%zKMOq3T7@|1}=sihAIXV1^_Td1OEU3
delta 7
OcmdnQzJh(j3T6Nc)&j%;
diff --git a/requirements_decrypt.txt b/requirements_decrypt.txt
index 35358e7..57b92af 100644
--- a/requirements_decrypt.txt
+++ b/requirements_decrypt.txt
@@ -6,4 +6,5 @@ pymem
silk-python
pyaudio
fuzzywuzzy
-python-Levenshtein
\ No newline at end of file
+python-Levenshtein
+lz4
\ No newline at end of file
diff --git a/requirements_pc.txt b/requirements_pc.txt
index ed617a2..e54af3a 100644
--- a/requirements_pc.txt
+++ b/requirements_pc.txt
@@ -15,3 +15,4 @@ jieba==0.42.1
google==3.0.0
protobuf==4.25.1
soupsieve==2.5
+lz4
\ No newline at end of file