WeChatMsg/wxManager/db_v4/hardlink.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
@Time        : 2024/12/8 17:30
@Author      : SiYuan
@Email       : 863909694@qq.com
@File        : MemoTrace-hardlink.py
@Description :
"""
import hashlib
import os
import traceback
from lxml import etree

from wxManager import Me
from wxManager.merge import increase_data
from wxManager.model.db_model import DataBaseBase
from wxManager.log import logger
from wxManager.model.message import Message
from wxManager.parser.util.protocbuf import file_info_pb2
from google.protobuf.json_format import MessageToJson, MessageToDict

image_root_path = "msg\\attach\\"
video_root_path = "msg\\video\\"
file_root_path = "msg\\file\\"


def get_md5_from_xml(content, type_="img"):
    if not content:
        return None
    try:
        content = content.strip('null:').strip().replace(' length="0" ', ' ')  # 哪个天才在xml里写两个一样的字段 length="0"
        # 解析XML
        parser = etree.XMLParser(recover=True)
        root = etree.fromstring(content, parser=parser)
        if type_ == "img":
            # 提取md5的值
            md5_value = root.find(".//img").get("md5")
        elif type_ == "video":
            md5_value = root.find(".//videomsg").get("md5")
        else:
            md5_value = None
        # print(md5_value)
        return md5_value
    except:
        logger.error(traceback.format_exc())
        logger.error(content)
        return None


class HardLinkDB(DataBaseBase):
    def get_image_path(self):
        pass

    def create_index(self):
        sql = "CREATE INDEX IF NOT EXISTS image_hardlink_info_v3_md5 ON image_hardlink_info_v3(md5);"
        try:
            cursor = self.DB.cursor()
            cursor.execute(sql)
            self.commit()
            cursor.close()
        except:
            pass

        sql = "CREATE INDEX IF NOT EXISTS video_hardlink_info_v3_md5 ON video_hardlink_info_v3(md5);"
        try:
            cursor = self.DB.cursor()
            cursor.execute(sql)
            self.commit()
            cursor.close()
        except:
            pass

        sql = "CREATE INDEX IF NOT EXISTS file_hardlink_info_v3_md5 ON file_hardlink_info_v3(md5);"
        try:
            cursor = self.DB.cursor()
            cursor.execute(sql)
            self.commit()
            cursor.close()
        except:
            pass

    def get_image_by_md5(self, md5: str):
        sql = '''
        select file_size,type,file_name,dir2id.username,dir2id2.username,_rowid_,modify_time,extra_buffer
        from image_hardlink_info_v3
        join dir2id on dir2id.rowid = dir1
        join dir2id as dir2id2 on dir2id2.rowid=dir2
        where md5=?
        '''
        cursor = self.DB.cursor()
        cursor.execute(sql, [md5])
        result = cursor.fetchall()
        if result:
            return result[0]
        return None

    def get_video_by_md5(self, md5: str):
        sql = '''
        SELECT file_size, type, file_name, dir2id.username, dir2id2.username, _rowid_, modify_time, extra_buffer
        FROM video_hardlink_info_v3
        JOIN dir2id ON dir2id.rowid = dir1
        LEFT JOIN dir2id AS dir2id2 ON dir2id2.rowid = dir2 AND dir2 != 0
        WHERE md5 = ?
        '''
        cursor = self.DB.cursor()
        cursor.execute(sql, [md5])
        result = cursor.fetchall()
        if result:
            return result[0]
        return None

    def get_file_by_md5(self, md5: str):
        sql = '''
        select file_size,type,file_name,dir2id.username,dir2id2.username,_rowid_,modify_time,extra_buffer
        from file_hardlink_info_v3
        join dir2id on dir2id.rowid = dir1
        LEFT JOIN dir2id AS dir2id2 ON dir2id2.rowid = dir2 AND dir2 != 0
        where md5=?
        '''
        cursor = self.DB.cursor()
        cursor.execute(sql, [md5])
        result = cursor.fetchall()
        if result:
            return result[0]
        return None

    def get_video(self, md5, thumb=False):
        video_info = self.get_video_by_md5(md5)
        if video_info:
            type_ = video_info[1]
            if type_ == 5:
                dir1 = video_info[3]
                dir2 = video_info[4]
                extra_buffer = video_info[7]
                # 创建顶级消息对象
                message = file_info_pb2.FileInfoData()
                # 解析二进制数据
                message.ParseFromString(extra_buffer)
                extra_dic = MessageToDict(message)
                dir3 = extra_dic.get('dir3', '')
                file_name = video_info[2]
                result = os.path.join(video_root_path, dir1, dir2, 'Rec', dir3, 'V', file_name)
            else:
                dir1 = video_info[3]
                data_image = video_info[2].split('.')[0] + '_thumb.jpg' if thumb else video_info[2]
                dat_image = os.path.join(video_root_path, dir1, data_image)
                result = dat_image
            return result
        return ''

    def get_image_thumb(self, message: Message, talker_username):
        """
        @param message:
        @param talker_username: 聊天对象的wxid
        @return:
        """
        dir1 = hashlib.md5(talker_username.encode('utf-8')).hexdigest()
        str_time = message.str_time
        dir2 = str_time[:7]  # 2024-12
        dir0 = "Img"
        local_id = message.local_id
        create_time = message.timestamp
        data_image = f'{message.file_name}_t.dat' if message.file_name else f'{local_id}_{create_time}_t.dat'
        return os.path.join(image_root_path, dir1, dir2, dir0, data_image)

    def get_image_by_time(self, message: Message, talker_username):
        """
        @param message:
        @param talker_username: 聊天对象的wxid
        @return:
        """
        dir1 = hashlib.md5(talker_username.encode('utf-8')).hexdigest()
        str_time = message.str_time
        dir2 = str_time[:7]  # 2024-12
        dir0 = "Img"
        local_id = message.local_id
        create_time = message.timestamp
        data_image = f'{message.file_name}_W.dat' if message.file_name else f'{local_id}_{create_time}_W.dat'
        path1 = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
        if os.path.exists(os.path.join(Me().wx_dir, path1)):
            return path1
        else:
            data_image = f'{message.file_name}.dat' if message.file_name else f'{local_id}_{create_time}.dat'
            path1 = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
            return path1

    def get_image(self, content, message, up_dir="", md5=None, thumb=False, talker_username='') -> str:
        """
        @param content: image xml
        @param message:
        @param up_dir:
        @param md5: image的md5
        @param thumb: 是否是缩略图
        @param talker_username: 聊天对象的wxid
        @return:
        """
        result = '.'
        self.create_index()
        if thumb:
            return self.get_image_thumb(message, talker_username)
        else:
            result = self.get_image_by_time(message, talker_username)
            if os.path.exists(os.path.join(Me().wx_dir, result)):
                return result
        if not md5:
            md5 = get_md5_from_xml(content)
        if md5:
            imginfo = self.get_image_by_md5(md5)
            if imginfo:
                type_ = imginfo[1]
                if type_ == 4:
                    dir1 = imginfo[3]
                    dir2 = imginfo[4]
                    extra_buffer = imginfo[7]
                    # 创建顶级消息对象
                    message = file_info_pb2.FileInfoData()
                    # 解析二进制数据
                    message.ParseFromString(extra_buffer)
                    extra_dic = MessageToDict(message)
                    dir3 = extra_dic.get('dir3', '')
                    file_name = imginfo[2]
                    result = os.path.join(image_root_path, dir1, dir2, 'Rec', dir3, 'Img', file_name)
                else:
                    dir1 = imginfo[3]
                    dir2 = imginfo[4]
                    data_image = imginfo[2]
                    dir0 = "Img"
                    dat_image = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
                    result = dat_image
            else:
                result = self.get_image_thumb(message, talker_username)
        else:
            result = self.get_image_by_time(message, talker_username)
        return result

    def get_file(self, md5):
        file_info = self.get_file_by_md5(md5)
        if file_info:
            type_ = file_info[1]
            if type_ == 6:
                dir1 = file_info[3]
                dir2 = file_info[4]
                extra_buffer = file_info[7]
                # 创建顶级消息对象
                message = file_info_pb2.FileInfoData()
                # 解析二进制数据
                message.ParseFromString(extra_buffer)
                extra_dic = MessageToDict(message)
                dir3 = extra_dic.get('dir3', '')
                file_name = file_info[2]
                filepath = os.path.join(image_root_path, dir1, dir2, dir3, file_name)
            else:
                dir1 = file_info[3]
                filename = file_info[2]
                filepath = os.path.join(file_root_path, dir1, filename)
            return filepath
        return ''

    def merge(self, db_path):
        if not (os.path.exists(db_path) or os.path.isfile(db_path)):
            print(f'{db_path} 不存在')
            return
        try:
            # 获取列名
            increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5', exclude_column='_rowid_')
            increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5', exclude_column='_rowid_')
            increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5', exclude_column='_rowid_')
            increase_data(db_path, self.cursor, self.DB, 'dir2id', 'username')
        except:
            print(f"数据库操作错误: {traceback.format_exc()}")
            self.DB.rollback()


if __name__ == '__main__':
    pass