import binascii import os.path import sqlite3 import threading import xml.etree.ElementTree as ET from app.log import log image_db_lock = threading.Lock() video_db_lock = threading.Lock() image_db_path = "./app/Database/Msg/HardLinkImage.db" video_db_path = "./app/Database/Msg/HardLinkVideo.db" root_path = 'FileStorage/MsgAttach/' video_root_path = 'FileStorage/Video/' @log def get_md5_from_xml(content, type_='img'): try: # 解析XML root = ET.fromstring(content) if type_ == 'img': # 提取md5的值 md5_value = root.find(".//img").get("md5") elif type_ == 'video': md5_value = root.find(".//videomsg").get("md5") # print(md5_value) return md5_value except ET.ParseError: return None class tencent_struct: def __setVals__(self, data, off): if data: self.__data = data if self.__data: self.__size = len(self.__data) self.__off = off def __readString(self): try: length = self.__readUleb() res = self.__data[self.__off : self.__off + length] self.__add(length) except: raise return res.decode("utf-8") def __readUleb(self): try: i = self.__data[self.__off] self.__add() if i & 0x80: j = self.__data[self.__off] i = i & 0x7F i = i | (j << 7) self.__add() if i & 0x4000: j = self.__data[self.__off] i = i & 0x3FFF i = i | (j << 14) self.__add() if i & 0x200000: j = self.__data[self.__off] i = i & 0x1FFFFF i = i | (j << 21) self.__add() if i & 0x10000000: j = self.__data[self.__off] i = i & 0xFFFFFFF i = i | (j << 28) self.__add() return i except: raise def __readData(self): try: length = self.__readUleb() data = self.__data[self.__off : self.__off + length] self.__add(length) return data except: raise def __init__(self, data=None, off=0): self.__data = data self.__off = off if self.__data: self.__size = len(self.__data) else: self.__size = 0 def __add(self, value=1): self.__off += value if self.__off > self.__size: raise "偏移量超出size" def readStruct(self, struct_type): current_dict = None if isinstance(struct_type, str): current_dict = getattr(self, struct_type) else: current_dict = struct_type res = {} try: while self.__off < self.__size: key = self.__readUleb() key = key >> 3 if key == 0: break op = None fieldName = "" if key in current_dict: op = current_dict[key][1] fieldName = current_dict[key][0] else: break if isinstance(op, dict): if not key in res: res[key] = [] current_struct = self.__readData() recursion = tencent_struct(current_struct) res[key].append((fieldName, recursion.readStruct(op))) elif op != "": res[key] = (fieldName, self.__contenttype__[op](self)) else: break except: raise return res __struct1__ = {1: ("", "I"), 2: ("", "I")} __msgInfo__ = {1: ("", "I"), 2: ("msg_info", "s")} __bytesExtra__ = { 1: ("", __struct1__), 3: ("msg_info_struct", __msgInfo__), } def get_bytesExta_Content(self, data=None, off=0): self.__setVals__(data, off) try: return self.readStruct("__bytesExtra__") except: raise __contenttype__ = { "s": __readString, "I": __readUleb, "P": __readData, } def parseBytes(content: bytes): try: bytesExtra = tencent_struct().get_bytesExta_Content(content) return bytesExtra except: pass def singleton(cls): _instance = {} def inner(): if cls not in _instance: _instance[cls] = cls() return _instance[cls] return inner @singleton class HardLink: def __init__(self): self.imageDB = None self.videoDB = None self.image_cursor = None self.video_cursor = None self.open_flag = False self.init_database() def init_database(self): if not self.open_flag: if os.path.exists(image_db_path): self.imageDB = sqlite3.connect(image_db_path, check_same_thread=False) # '''创建游标''' self.image_cursor = self.imageDB.cursor() self.open_flag = True if image_db_lock.locked(): image_db_lock.release() if os.path.exists(video_db_path): self.videoDB = sqlite3.connect(video_db_path, check_same_thread=False) # '''创建游标''' self.video_cursor = self.videoDB.cursor() self.open_flag = True if video_db_lock.locked(): video_db_lock.release() def get_image_by_md5(self, md5: bytes): if not md5: return None if not self.open_flag: return None sql = ''' select Md5Hash,MD5,FileName,HardLinkImageID.Dir as DirName1,HardLinkImageID2.Dir as DirName2 from HardLinkImageAttribute join HardLinkImageID on HardLinkImageAttribute.DirID1 = HardLinkImageID.DirID join HardLinkImageID as HardLinkImageID2 on HardLinkImageAttribute.DirID2 = HardLinkImageID2.DirID where MD5 = ?; ''' try: image_db_lock.acquire(True) try: self.image_cursor.execute(sql, [md5]) except AttributeError: self.init_database() self.image_cursor.execute(sql, [md5]) result = self.image_cursor.fetchone() return result finally: image_db_lock.release() def get_video_by_md5(self, md5: bytes): if not md5: return None if not self.open_flag: return None sql = ''' select Md5Hash,MD5,FileName,HardLinkVideoID2.Dir as DirName2 from HardLinkVideoAttribute join HardLinkVideoID as HardLinkVideoID2 on HardLinkVideoAttribute.DirID2 = HardLinkVideoID2.DirID where MD5 = ?; ''' try: video_db_lock.acquire(True) try: self.video_cursor.execute(sql, [md5]) except AttributeError: self.init_database() self.video_cursor.execute(sql, [md5]) result = self.video_cursor.fetchone() return result finally: video_db_lock.release() def get_image(self, content, bytesExtra, thumb=False): bytesDict = parseBytes(bytesExtra) for msginfo in bytesDict[3]: if msginfo[1][1][1] == (3 if thumb else 4): pathh = msginfo[1][2][1] # wxid\FileStorage\... pathh = "\\".join(pathh.split('\\')[1:]) return pathh md5 = get_md5_from_xml(content) if not md5: return None result = self.get_image_by_md5(binascii.unhexlify(md5)) if result: dir1 = result[3] dir2 = result[4] data_image = result[2] dir0 = 'Thumb' if thumb else 'Image' dat_image = os.path.join(root_path, dir1, dir0, dir2, data_image) return dat_image def get_video(self, content, bytesExtra, thumb=False): bytesDict = parseBytes(bytesExtra) for msginfo in bytesDict[3]: if msginfo[1][1][1] == (3 if thumb else 4): pathh = msginfo[1][2][1] # wxid\FileStorage\... pathh = "\\".join(pathh.split('\\')[1:]) return pathh md5 = get_md5_from_xml(content, type_='video') if not md5: return None result = self.get_video_by_md5(binascii.unhexlify(md5)) if result: dir2 = result[3] data_image = result[2].split('.')[0] + '.jpg' if thumb else result[2] # dir0 = 'Thumb' if thumb else 'Image' dat_image = os.path.join(video_root_path, dir2, data_image) return dat_image def close(self): if self.open_flag: try: image_db_lock.acquire(True) video_db_lock.acquire(True) self.open_flag = False self.imageDB.close() self.videoDB.close() finally: image_db_lock.release() video_db_lock.release() def __del__(self): self.close() # 6b02292eecea118f06be3a5b20075afc_t if __name__ == '__main__': msg_root_path = './Msg/' image_db_path = "./Msg/HardLinkImage.db" video_db_path = "./Msg/HardLinkVideo.db" hard_link_db = HardLink() hard_link_db.init_database() # content = '''\n\t\n\t\n\t\n\n''' # print(hard_link_db.get_image(content)) # print(hard_link_db.get_image(content, thumb=False)) # result = get_md5_from_xml(content) # print(result) content = ''' ''' print(hard_link_db.get_video(content)) print(hard_link_db.get_video(content, thumb=True))