From 2cb3f4e5b84ed5a53c6173e84a63c2fd10b6f783 Mon Sep 17 00:00:00 2001 From: STDquantum <405720329@qq.com> Date: Sun, 10 Dec 2023 11:19:22 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E5=AF=B9BytesExtra?= =?UTF-8?q?=E7=9A=84=E8=A7=A3=E6=9E=90=EF=BC=8C=E5=9F=BA=E6=9C=AC=E8=BE=BE?= =?UTF-8?q?=E5=88=B0=E5=9B=BE=E7=89=87=E8=A7=86=E9=A2=91=E5=BE=AE=E4=BF=A1?= =?UTF-8?q?=E8=83=BD=E7=9C=8B=E6=9C=AC=E5=9C=B0=E5=B0=B1=E8=83=BD=E7=9C=8B?= =?UTF-8?q?=E7=9A=84=E7=A8=8B=E5=BA=A6=E3=80=82=E5=85=B6=E4=BB=96=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=E6=B6=88=E6=81=AF=E7=9A=84BytesExtra=E4=B9=9F?= =?UTF-8?q?=E9=A1=BA=E5=B8=A6=E8=A7=A3=E6=9E=90=E4=BA=86=EF=BC=88=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E9=81=93=E7=90=86=EF=BC=8C=E4=BB=A5=E5=90=8E=E5=8F=AF?= =?UTF-8?q?=E4=BB=A5=E4=BB=8E=E8=BF=99=E9=87=8C=E6=89=A9=E5=B1=95=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 +- app/DataBase/hard_link.py | 149 +++++++++++++++++++++++++++++++++++++- app/DataBase/msg.py | 2 +- app/DataBase/output_pc.py | 5 +- 4 files changed, 153 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index b027ad4..1dd9b9f 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ app/DataBase/Msg/* *.pyc *.log *.spec -test* \ No newline at end of file +test* +wordcloud.html \ No newline at end of file diff --git a/app/DataBase/hard_link.py b/app/DataBase/hard_link.py index fdc4014..90c77ea 100644 --- a/app/DataBase/hard_link.py +++ b/app/DataBase/hard_link.py @@ -30,6 +30,139 @@ def get_md5_from_xml(content, type_='img'): return None + +class tencent_struct: + def __setVals__(self, data, off): + if data: + self.__data = data + if self.__data: + self.__size = len(self.__data) + self.__off = off + + def __readString(self): + try: + length = self.__readUleb() + res = self.__data[self.__off : self.__off + length] + self.__add(length) + except: + raise + return res.decode("utf-8") + + def __readUleb(self): + try: + i = self.__data[self.__off] + self.__add() + if i & 0x80: + j = self.__data[self.__off] + i = i & 0x7F + i = i | (j << 7) + self.__add() + if i & 0x4000: + j = self.__data[self.__off] + i = i & 0x3FFF + i = i | (j << 14) + self.__add() + if i & 0x200000: + j = self.__data[self.__off] + i = i & 0x1FFFFF + i = i | (j << 21) + self.__add() + if i & 0x10000000: + j = self.__data[self.__off] + i = i & 0xFFFFFFF + i = i | (j << 28) + self.__add() + return i + except: + raise + + def __readData(self): + try: + length = self.__readUleb() + data = self.__data[self.__off : self.__off + length] + self.__add(length) + return data + except: + raise + + def __init__(self, data=None, off=0): + self.__data = data + self.__off = off + if self.__data: + self.__size = len(self.__data) + else: + self.__size = 0 + + def __add(self, value=1): + self.__off += value + if self.__off > self.__size: + raise "偏移量超出size" + + def readStruct(self, struct_type): + current_dict = None + if isinstance(struct_type, str): + current_dict = getattr(self, struct_type) + else: + current_dict = struct_type + res = {} + try: + while self.__off < self.__size: + key = self.__readUleb() + key = key >> 3 + if key == 0: + break + op = None + fieldName = "" + if key in current_dict: + op = current_dict[key][1] + fieldName = current_dict[key][0] + else: + break + if isinstance(op, dict): + if not key in res: + res[key] = [] + current_struct = self.__readData() + recursion = tencent_struct(current_struct) + res[key].append((fieldName, recursion.readStruct(op))) + elif op != "": + res[key] = (fieldName, self.__contenttype__[op](self)) + else: + break + except: + raise + return res + + __struct1__ = {1: ("", "I"), 2: ("", "I")} + + __msgInfo__ = {1: ("", "I"), 2: ("msg_info", "s")} + + __bytesExtra__ = { + 1: ("", __struct1__), + 3: ("msg_info_struct", __msgInfo__), + } + + def get_bytesExta_Content(self, data=None, off=0): + self.__setVals__(data, off) + try: + return self.readStruct("__bytesExtra__") + except: + raise + + __contenttype__ = { + "s": __readString, + "I": __readUleb, + "P": __readData, + } + + +def parseBytes(content: bytes): + try: + bytesExtra = tencent_struct().get_bytesExta_Content(content) + return bytesExtra + except: + pass + + def singleton(cls): _instance = {} @@ -115,7 +248,13 @@ class HardLink: finally: video_db_lock.release() - def get_image(self, content, thumb=False): + def get_image(self, content, bytesExtra, thumb=False): + bytesDict = parseBytes(bytesExtra) + for msginfo in bytesDict[3]: + if msginfo[1][1][1] == (3 if thumb else 4): + pathh = msginfo[1][2][1] # wxid\FileStorage\... + pathh = "\\".join(pathh.split('\\')[1:]) + return pathh md5 = get_md5_from_xml(content) if not md5: return None @@ -129,7 +268,13 @@ class HardLink: dat_image = os.path.join(root_path, dir1, dir0, dir2, data_image) return dat_image - def get_video(self, content, thumb=False): + def get_video(self, content, bytesExtra, thumb=False): + bytesDict = parseBytes(bytesExtra) + for msginfo in bytesDict[3]: + if msginfo[1][1][1] == (3 if thumb else 4): + pathh = msginfo[1][2][1] # wxid\FileStorage\... + pathh = "\\".join(pathh.split('\\')[1:]) + return pathh md5 = get_md5_from_xml(content, type_='video') if not md5: return None diff --git a/app/DataBase/msg.py b/app/DataBase/msg.py index c08c673..e4410ee 100644 --- a/app/DataBase/msg.py +++ b/app/DataBase/msg.py @@ -56,7 +56,7 @@ class Msg: if not self.open_flag: return None sql = ''' - select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID + select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra from MSG where StrTalker=? order by CreateTime diff --git a/app/DataBase/output_pc.py b/app/DataBase/output_pc.py index d41eccb..806d315 100644 --- a/app/DataBase/output_pc.py +++ b/app/DataBase/output_pc.py @@ -637,6 +637,7 @@ const chatMessages = [ str_time = message[8] # print(type_, type(type_)) is_send = message[4] + BytesExtra = message[10] # avatar = MePC().avatar_path if is_send else self.contact.avatar_path # avatar = avatar.replace('\\', '\\\\') avatar = 'myhead.png' if is_send else 'tahead.png' @@ -657,7 +658,7 @@ const chatMessages = [ f'''{{ type:{type_}, text: '{str_content}',is_send:{is_send},avatar_path:'{avatar}'}},''' ) elif type_ == 3: - image_path = hard_link_db.get_image(content=str_content, thumb=False) + image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False) image_path = path.get_relative_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image') image_path = image_path.replace('\\', '/') # print(f"tohtml:---{image_path}") @@ -669,7 +670,7 @@ const chatMessages = [ f'''{{ type:{type_}, text: '{image_path}',is_send:{is_send},avatar_path:'{avatar}'}},''' ) elif type_ == 43: - video_path = hard_link_db.get_video(content=str_content, thumb=False) + video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False) video_path = f'{MePC().wx_dir}/{video_path}' if os.path.exists(video_path): new_path = origin_docx_path + '/video/' + os.path.basename(video_path)