From e3efabc3a71b4ef8de1f415caec17a85c7c59123 Mon Sep 17 00:00:00 2001 From: STDquantum <405720329@qq.com> Date: Wed, 3 Jan 2024 14:47:53 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E6=9E=90Contact=E8=A1=A8=E9=87=8C?= =?UTF-8?q?=E7=9A=84ExtraBuf?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/DataBase/hard_link.py | 124 +++++++++++++++++++++++++++++++------- 1 file changed, 102 insertions(+), 22 deletions(-) diff --git a/app/DataBase/hard_link.py b/app/DataBase/hard_link.py index 90c77ea..7cb93cb 100644 --- a/app/DataBase/hard_link.py +++ b/app/DataBase/hard_link.py @@ -10,19 +10,19 @@ image_db_lock = threading.Lock() video_db_lock = threading.Lock() image_db_path = "./app/Database/Msg/HardLinkImage.db" video_db_path = "./app/Database/Msg/HardLinkVideo.db" -root_path = 'FileStorage/MsgAttach/' -video_root_path = 'FileStorage/Video/' +root_path = "FileStorage/MsgAttach/" +video_root_path = "FileStorage/Video/" @log -def get_md5_from_xml(content, type_='img'): +def get_md5_from_xml(content, type_="img"): try: # 解析XML root = ET.fromstring(content) - if type_ == 'img': + if type_ == "img": # 提取md5的值 md5_value = root.find(".//img").get("md5") - elif type_ == 'video': + elif type_ == "video": md5_value = root.find(".//videomsg").get("md5") # print(md5_value) return md5_value @@ -30,7 +30,6 @@ def get_md5_from_xml(content, type_='img'): return None - class tencent_struct: def __setVals__(self, data, off): if data: @@ -138,7 +137,13 @@ class tencent_struct: __bytesExtra__ = { 1: ("", __struct1__), - 3: ("msg_info_struct", __msgInfo__), + 3: ("msg_info_struct", "s"), + } + + __struct2__ = {1: ("", "s"), 2: ("", "s")} + + __extraBuf__ = { + 1: ("", __struct2__), } def get_bytesExta_Content(self, data=None, off=0): @@ -148,6 +153,13 @@ class tencent_struct: except: raise + def get_extraBuf_Content(self, data=None, off=0): + self.__setVals__(data, off) + try: + return self.readStruct("__extraBuf__") + except: + raise + __contenttype__ = { "s": __readString, "I": __readUleb, @@ -163,6 +175,74 @@ def parseBytes(content: bytes): pass +def parseExtraBuf(content: bytes): + try: + extraBuf = tencent_struct().get_extraBuf_Content(content) + return extraBuf + except: + pass + + +def decodeExtraBuf(extra_buf_content: bytes): + off = 0 + types = [b"\x04", b"\x18", b"\x17", b"\x02", b"\x05"] + trunkName = { + "46CF10C4": "个性签名", + "A4D9024A": "国家", + "E2EAA8D1": "省份", + "1D025BBF": "市", + "81AE19B4": "朋友圈背景url", + "F917BCC0": "公司名称", + "4EB96D85": "企业微信属性", + "0E719F13": "备注图片", + "759378AD": "手机号", + } + res = {} + while off < len(extra_buf_content): + length = 4 # 块头 + trunk_head = extra_buf_content[off : off + length] + off += length + trunk_head = binascii.hexlify(trunk_head).decode().upper() + if trunk_head in trunkName: + trunk_head = trunkName[trunk_head] + res[trunk_head] = {} + char = extra_buf_content[off : off + 1] + off += 1 + field = binascii.hexlify(char).decode() + if char == b"\x04": # 四个字节的int,小端序 + length = 4 + intContent = extra_buf_content[off : off + length] + off += 4 + intContent = int.from_bytes(intContent, "little") + res[trunk_head][field] = intContent + elif char == b"\x18": # utf-16字符串 + length = 4 + lengthContent = extra_buf_content[off : off + length] + off += 4 + lengthContent = int.from_bytes(lengthContent, "little") + strContent = extra_buf_content[off : off + lengthContent] + off += lengthContent + res[trunk_head][field] = strContent.decode("utf-16").rstrip("\x00") + elif char == b"\x17": # utf-8 protobuf + length = 4 + lengthContent = extra_buf_content[off : off + length] + off += 4 + lengthContent = int.from_bytes(lengthContent, "little") + strContent = extra_buf_content[off : off + lengthContent] + off += lengthContent + res[trunk_head][field] = parseExtraBuf(strContent) + elif char == b"\x02": # 一个字节的int + content = extra_buf_content[off : off + 1] + off += 1 + res[trunk_head][field] = int.from_bytes(content, "little") + elif char == b"\x05": # 暂时不知道有啥用,固定8个字节 + length = 8 + content = extra_buf_content[off : off + length] + off += length + res[trunk_head][field] = int.from_bytes(content, "little") + return res + + def singleton(cls): _instance = {} @@ -206,13 +286,13 @@ class HardLink: return None if not self.open_flag: return None - sql = ''' + sql = """ select Md5Hash,MD5,FileName,HardLinkImageID.Dir as DirName1,HardLinkImageID2.Dir as DirName2 from HardLinkImageAttribute join HardLinkImageID on HardLinkImageAttribute.DirID1 = HardLinkImageID.DirID join HardLinkImageID as HardLinkImageID2 on HardLinkImageAttribute.DirID2 = HardLinkImageID2.DirID where MD5 = ?; - ''' + """ try: image_db_lock.acquire(True) try: @@ -230,12 +310,12 @@ class HardLink: return None if not self.open_flag: return None - sql = ''' + sql = """ select Md5Hash,MD5,FileName,HardLinkVideoID2.Dir as DirName2 from HardLinkVideoAttribute join HardLinkVideoID as HardLinkVideoID2 on HardLinkVideoAttribute.DirID2 = HardLinkVideoID2.DirID where MD5 = ?; - ''' + """ try: video_db_lock.acquire(True) try: @@ -252,8 +332,8 @@ class HardLink: bytesDict = parseBytes(bytesExtra) for msginfo in bytesDict[3]: if msginfo[1][1][1] == (3 if thumb else 4): - pathh = msginfo[1][2][1] # wxid\FileStorage\... - pathh = "\\".join(pathh.split('\\')[1:]) + pathh = msginfo[1][2][1] # wxid\FileStorage\... + pathh = "\\".join(pathh.split("\\")[1:]) return pathh md5 = get_md5_from_xml(content) if not md5: @@ -264,7 +344,7 @@ class HardLink: dir1 = result[3] dir2 = result[4] data_image = result[2] - dir0 = 'Thumb' if thumb else 'Image' + dir0 = "Thumb" if thumb else "Image" dat_image = os.path.join(root_path, dir1, dir0, dir2, data_image) return dat_image @@ -272,16 +352,16 @@ class HardLink: bytesDict = parseBytes(bytesExtra) for msginfo in bytesDict[3]: if msginfo[1][1][1] == (3 if thumb else 4): - pathh = msginfo[1][2][1] # wxid\FileStorage\... - pathh = "\\".join(pathh.split('\\')[1:]) + pathh = msginfo[1][2][1] # wxid\FileStorage\... + pathh = "\\".join(pathh.split("\\")[1:]) return pathh - md5 = get_md5_from_xml(content, type_='video') + md5 = get_md5_from_xml(content, type_="video") if not md5: return None result = self.get_video_by_md5(binascii.unhexlify(md5)) if result: dir2 = result[3] - data_image = result[2].split('.')[0] + '.jpg' if thumb else result[2] + data_image = result[2].split(".")[0] + ".jpg" if thumb else result[2] # dir0 = 'Thumb' if thumb else 'Image' dat_image = os.path.join(video_root_path, dir2, data_image) return dat_image @@ -304,8 +384,8 @@ class HardLink: # 6b02292eecea118f06be3a5b20075afc_t -if __name__ == '__main__': - msg_root_path = './Msg/' +if __name__ == "__main__": + msg_root_path = "./Msg/" image_db_path = "./Msg/HardLinkImage.db" video_db_path = "./Msg/HardLinkVideo.db" hard_link_db = HardLink() @@ -315,10 +395,10 @@ if __name__ == '__main__': # print(hard_link_db.get_image(content, thumb=False)) # result = get_md5_from_xml(content) # print(result) - content = ''' + content = """ -''' +""" print(hard_link_db.get_video(content)) print(hard_link_db.get_video(content, thumb=True))