WeChatMsg/wxManager/db_v4/hardlink.py

278 lines
9.9 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/12/8 17:30
@Author : SiYuan
@Email : 863909694@qq.com
@File : MemoTrace-hardlink.py
@Description :
"""
import hashlib
import os
import traceback
from lxml import etree
from wxManager import Me
from wxManager.merge import increase_data
from wxManager.model.db_model import DataBaseBase
from wxManager.log import logger
from wxManager.model.message import Message
from wxManager.parser.util.protocbuf import file_info_pb2
from google.protobuf.json_format import MessageToJson, MessageToDict
image_root_path = "msg\\attach\\"
video_root_path = "msg\\video\\"
file_root_path = "msg\\file\\"
def get_md5_from_xml(content, type_="img"):
if not content:
return None
try:
content = content.strip('null:').strip().replace(' length="0" ', ' ') # 哪个天才在xml里写两个一样的字段 length="0"
# 解析XML
parser = etree.XMLParser(recover=True)
root = etree.fromstring(content, parser=parser)
if type_ == "img":
# 提取md5的值
md5_value = root.find(".//img").get("md5")
elif type_ == "video":
md5_value = root.find(".//videomsg").get("md5")
else:
md5_value = None
# print(md5_value)
return md5_value
except:
logger.error(traceback.format_exc())
logger.error(content)
return None
class HardLinkDB(DataBaseBase):
def get_image_path(self):
pass
def create_index(self):
sql = "CREATE INDEX IF NOT EXISTS image_hardlink_info_v3_md5 ON image_hardlink_info_v3(md5);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
except:
pass
sql = "CREATE INDEX IF NOT EXISTS video_hardlink_info_v3_md5 ON video_hardlink_info_v3(md5);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
except:
pass
sql = "CREATE INDEX IF NOT EXISTS file_hardlink_info_v3_md5 ON file_hardlink_info_v3(md5);"
try:
cursor = self.DB.cursor()
cursor.execute(sql)
self.commit()
cursor.close()
except:
pass
def get_image_by_md5(self, md5: str):
sql = '''
select file_size,type,file_name,dir2id.username,dir2id2.username,_rowid_,modify_time,extra_buffer
from image_hardlink_info_v3
join dir2id on dir2id.rowid = dir1
join dir2id as dir2id2 on dir2id2.rowid=dir2
where md5=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchall()
if result:
return result[0]
return None
def get_video_by_md5(self, md5: str):
sql = '''
SELECT file_size, type, file_name, dir2id.username, dir2id2.username, _rowid_, modify_time, extra_buffer
FROM video_hardlink_info_v3
JOIN dir2id ON dir2id.rowid = dir1
LEFT JOIN dir2id AS dir2id2 ON dir2id2.rowid = dir2 AND dir2 != 0
WHERE md5 = ?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchall()
if result:
return result[0]
return None
def get_file_by_md5(self, md5: str):
sql = '''
select file_size,type,file_name,dir2id.username,dir2id2.username,_rowid_,modify_time,extra_buffer
from file_hardlink_info_v3
join dir2id on dir2id.rowid = dir1
LEFT JOIN dir2id AS dir2id2 ON dir2id2.rowid = dir2 AND dir2 != 0
where md5=?
'''
cursor = self.DB.cursor()
cursor.execute(sql, [md5])
result = cursor.fetchall()
if result:
return result[0]
return None
def get_video(self, md5, thumb=False):
video_info = self.get_video_by_md5(md5)
if video_info:
type_ = video_info[1]
if type_ == 5:
dir1 = video_info[3]
dir2 = video_info[4]
extra_buffer = video_info[7]
# 创建顶级消息对象
message = file_info_pb2.FileInfoData()
# 解析二进制数据
message.ParseFromString(extra_buffer)
extra_dic = MessageToDict(message)
dir3 = extra_dic.get('dir3', '')
file_name = video_info[2]
result = os.path.join(video_root_path, dir1, dir2, 'Rec', dir3, 'V', file_name)
else:
dir1 = video_info[3]
data_image = video_info[2].split('.')[0] + '_thumb.jpg' if thumb else video_info[2]
dat_image = os.path.join(video_root_path, dir1, data_image)
result = dat_image
return result
return ''
def get_image_thumb(self, message: Message, talker_username):
"""
@param message:
@param talker_username: 聊天对象的wxid
@return:
"""
dir1 = hashlib.md5(talker_username.encode('utf-8')).hexdigest()
str_time = message.str_time
dir2 = str_time[:7] # 2024-12
dir0 = "Img"
local_id = message.local_id
create_time = message.timestamp
data_image = f'{message.file_name}_t.dat' if message.file_name else f'{local_id}_{create_time}_t.dat'
return os.path.join(image_root_path, dir1, dir2, dir0, data_image)
def get_image_by_time(self, message: Message, talker_username):
"""
@param message:
@param talker_username: 聊天对象的wxid
@return:
"""
dir1 = hashlib.md5(talker_username.encode('utf-8')).hexdigest()
str_time = message.str_time
dir2 = str_time[:7] # 2024-12
dir0 = "Img"
local_id = message.local_id
create_time = message.timestamp
data_image = f'{message.file_name}_W.dat' if message.file_name else f'{local_id}_{create_time}_W.dat'
path1 = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
if os.path.exists(os.path.join(Me().wx_dir, path1)):
return path1
else:
data_image = f'{message.file_name}.dat' if message.file_name else f'{local_id}_{create_time}.dat'
path1 = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
return path1
def get_image(self, content, message, up_dir="", md5=None, thumb=False, talker_username='') -> str:
"""
@param content: image xml
@param message:
@param up_dir:
@param md5: image的md5
@param thumb: 是否是缩略图
@param talker_username: 聊天对象的wxid
@return:
"""
result = '.'
self.create_index()
if thumb:
return self.get_image_thumb(message, talker_username)
else:
result = self.get_image_by_time(message, talker_username)
if os.path.exists(os.path.join(Me().wx_dir, result)):
return result
if not md5:
md5 = get_md5_from_xml(content)
if md5:
imginfo = self.get_image_by_md5(md5)
if imginfo:
type_ = imginfo[1]
if type_ == 4:
dir1 = imginfo[3]
dir2 = imginfo[4]
extra_buffer = imginfo[7]
# 创建顶级消息对象
message = file_info_pb2.FileInfoData()
# 解析二进制数据
message.ParseFromString(extra_buffer)
extra_dic = MessageToDict(message)
dir3 = extra_dic.get('dir3', '')
file_name = imginfo[2]
result = os.path.join(image_root_path, dir1, dir2, 'Rec', dir3, 'Img', file_name)
else:
dir1 = imginfo[3]
dir2 = imginfo[4]
data_image = imginfo[2]
dir0 = "Img"
dat_image = os.path.join(image_root_path, dir1, dir2, dir0, data_image)
result = dat_image
else:
result = self.get_image_thumb(message, talker_username)
else:
result = self.get_image_by_time(message, talker_username)
return result
def get_file(self, md5):
file_info = self.get_file_by_md5(md5)
if file_info:
type_ = file_info[1]
if type_ == 6:
dir1 = file_info[3]
dir2 = file_info[4]
extra_buffer = file_info[7]
# 创建顶级消息对象
message = file_info_pb2.FileInfoData()
# 解析二进制数据
message.ParseFromString(extra_buffer)
extra_dic = MessageToDict(message)
dir3 = extra_dic.get('dir3', '')
file_name = file_info[2]
filepath = os.path.join(image_root_path, dir1, dir2, dir3, file_name)
else:
dir1 = file_info[3]
filename = file_info[2]
filepath = os.path.join(file_root_path, dir1, filename)
return filepath
return ''
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5', exclude_column='_rowid_')
increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5', exclude_column='_rowid_')
increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5', exclude_column='_rowid_')
increase_data(db_path, self.cursor, self.DB, 'dir2id', 'username')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
pass