From 5689de2ca622e6d0c1847d36d4365b6a59313cc9 Mon Sep 17 00:00:00 2001 From: SiYuan <863909694@qq.com> Date: Wed, 2 Apr 2025 12:35:17 +0800 Subject: [PATCH] =?UTF-8?q?=E9=80=82=E9=85=8D=E5=BE=AE=E4=BF=A14.0.3?= =?UTF-8?q?=E6=AD=A3=E5=BC=8F=E7=89=88=EF=BC=8C=E4=BF=AE=E5=A4=8D=E5=90=88?= =?UTF-8?q?=E5=B9=B6=E6=95=B0=E6=8D=AE=E5=BA=93=E7=9A=84=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/1-decrypt.py | 6 ++ wxManager/db_v3/audio2text.py | 66 +++++++++++++++++++ wxManager/db_v3/emotion.py | 8 +-- wxManager/db_v3/msg.py | 2 +- wxManager/db_v3/open_im_msg.py | 5 +- wxManager/db_v3/public_msg.py | 2 +- wxManager/db_v3/sns.py | 14 +--- wxManager/db_v4/audio2text.py | 66 +++++++++++++++++++ wxManager/db_v4/biz_message.py | 11 +++- wxManager/db_v4/hardlink.py | 6 +- wxManager/db_v4/message.py | 12 +++- wxManager/db_v4/session.py | 7 -- wxManager/decrypt/decrypt_dat.py | 39 +++++++---- wxManager/decrypt/decrypt_v3.py | 13 +++- wxManager/decrypt/decrypt_v4.py | 14 +++- wxManager/decrypt/version_list.json | 21 ++++++ wxManager/decrypt/wx_info_v4.py | 4 +- wxManager/manager_v3.py | 27 ++++---- wxManager/manager_v4.py | 17 +++-- wxManager/merge.py | 33 ++++++---- wxManager/model/db_model.py | 11 ++-- wxManager/parser/audio_parser.py | 6 +- .../protocbuf/packed_info_data_img2.proto | 13 ++++ .../protocbuf/packed_info_data_img2_pb2.py | 27 ++++++++ wxManager/parser/wechat_v3.py | 3 + wxManager/parser/wechat_v4.py | 30 +++++++-- 26 files changed, 355 insertions(+), 108 deletions(-) create mode 100644 wxManager/db_v3/audio2text.py create mode 100644 wxManager/db_v4/audio2text.py create mode 100644 wxManager/parser/util/protocbuf/packed_info_data_img2.proto create mode 100644 wxManager/parser/util/protocbuf/packed_info_data_img2_pb2.py diff --git a/example/1-decrypt.py b/example/1-decrypt.py index 0db49df..771763a 100644 --- a/example/1-decrypt.py +++ b/example/1-decrypt.py @@ -36,6 +36,9 @@ def dump_v3(): info_data = me.to_json() output_dir = wx_info.wxid key = wx_info.key + if not key: + print('error! 未找到key,请重启微信后再试') + continue wx_dir = wx_info.wx_dir decrypt_v3.decrypt_db_files(key, src_dir=wx_dir, dest_dir=output_dir) # 导出的数据库在 output_dir/Msg 文件夹下,后面会用到 @@ -59,6 +62,9 @@ def dump_v4(): info_data = me.to_json() output_dir = wx_info.wxid # 数据库输出文件夹 key = wx_info.key + if not key: + print('error! 未找到key,请重启微信后再试') + continue wx_dir = wx_info.wx_dir decrypt_v4.decrypt_db_files(key, src_dir=wx_dir, dest_dir=output_dir) # 导出的数据库在 output_dir/db_storage 文件夹下,后面会用到 diff --git a/wxManager/db_v3/audio2text.py b/wxManager/db_v3/audio2text.py new file mode 100644 index 0000000..3c8a5fb --- /dev/null +++ b/wxManager/db_v3/audio2text.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@Time : 2025/4/1 20:31 +@Author : SiYuan +@Email : 863909694@qq.com +@File : wxManager-audio2text.py +@Description : +""" + +import os +import sqlite3 +import traceback + +from wxManager.merge import increase_update_data, increase_data +from wxManager.model.db_model import DataBaseBase + + +class Audio2TextDB(DataBaseBase): + def create(self): + sql = ''' + CREATE TABLE IF NOT EXISTS Audio2Text ( + ID INTEGER PRIMARY KEY, + msgSvrId INTEGER UNIQUE, + Text TEXT NOT NULL + ); + ''' + cursor = self.DB.cursor() + cursor.execute(sql) + # 创建索引 + cursor.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_msg_id ON Audio2Text (msgSvrId);''') + self.commit() + + def get_audio_text(self, server_id): + sql = '''select text from Audio2Text where msgSvrId=?''' + cursor = self.DB.cursor() + cursor.execute(sql, [server_id]) + result = cursor.fetchone() + if result: + return result[0] + else: + return '' + + def add_text(self, server_id, text): + try: + cursor = self.DB.cursor() + sql = '''INSERT INTO Audio2Text (msgSvrId, Text) VALUES (?, ?)''' + cursor.execute(sql, [server_id, text]) + self.commit() + return True + except sqlite3.IntegrityError: + return False + except: + return False + + def merge(self, db_path): + if not (os.path.exists(db_path) or os.path.isfile(db_path)): + print(f'{db_path} 不存在') + return + try: + # 获取列名 + increase_data(db_path, self.cursor, self.DB, 'Audio2Text', 'msgSvrId') + except: + print(f"数据库操作错误: {traceback.format_exc()}") + self.DB.rollback() diff --git a/wxManager/db_v3/emotion.py b/wxManager/db_v3/emotion.py index 6d6e05f..58537d3 100644 --- a/wxManager/db_v3/emotion.py +++ b/wxManager/db_v3/emotion.py @@ -126,10 +126,10 @@ class Emotion(DataBaseBase): cursor = self.DB.cursor() # 获取列名 increase_data(db_path, cursor, self.DB, 'CustomEmotion', 'MD5', 0) - increase_data(db_path, cursor, self.DB, 'EmotionDes1', 'MD5', 1, True) - increase_data(db_path, cursor, self.DB, 'EmotionItem', 'MD5', 1, True) - increase_data(db_path, cursor, self.DB, 'EmotionPackageItem', 'ProductId', 0, False) - increase_data(db_path, cursor, self.DB, 'EmotionOrderInfo', 'MD5', 0, False) + increase_data(db_path, cursor, self.DB, 'EmotionDes1', 'MD5', 1, 'localId') + increase_data(db_path, cursor, self.DB, 'EmotionItem', 'MD5', 1, 'localId') + increase_data(db_path, cursor, self.DB, 'EmotionPackageItem', 'ProductId', 0, 'localId') + increase_data(db_path, cursor, self.DB, 'EmotionOrderInfo', 'MD5', 0, 'localId') except: print(f"数据库操作错误: {traceback.format_exc()}") self.DB.rollback() diff --git a/wxManager/db_v3/msg.py b/wxManager/db_v3/msg.py index 577ac41..8bf28f6 100644 --- a/wxManager/db_v3/msg.py +++ b/wxManager/db_v3/msg.py @@ -277,7 +277,7 @@ class Msg(DataBaseBase): """ increase_data(db_path, cursor, db, 'Name2Id', 'UsrName') increase_update_data(db_path, cursor, db, 'DBInfo', 'tableIndex') - increase_data(db_path, cursor, db, 'MSG', 'MsgSvrID', exclude_first_column=True) + increase_data(db_path, cursor, db, 'MSG', 'MsgSvrID', exclude_column='localId') tasks = [] for i in range(100): diff --git a/wxManager/db_v3/open_im_msg.py b/wxManager/db_v3/open_im_msg.py index 9d7a888..a6e6e45 100644 --- a/wxManager/db_v3/open_im_msg.py +++ b/wxManager/db_v3/open_im_msg.py @@ -111,8 +111,7 @@ class OpenIMMsgDB(DataBaseBase): def get_messages_by_username(self, username: str, time_range: Tuple[int | float | str | date, int | float | str | date] = None, ): - result = self._get_messages_by_username(self.DB.cursor(), username, time_range) - return [result] + return self._get_messages_by_username(self.DB.cursor(), username, time_range) def get_message_by_server_id(self, username, server_id): """ @@ -141,7 +140,7 @@ class OpenIMMsgDB(DataBaseBase): return try: # 获取列名 - increase_update_data(db_path, self.cursor, self.DB, 'ChatCRMsg', 'MsgSvrID', 1, exclude_first_column=True) + increase_data(db_path, self.cursor, self.DB, 'ChatCRMsg', 'MsgSvrID', 1, exclude_column='localId') except: print(f"数据库操作错误: {traceback.format_exc()}") self.DB.rollback() \ No newline at end of file diff --git a/wxManager/db_v3/public_msg.py b/wxManager/db_v3/public_msg.py index b2fb969..a10d739 100644 --- a/wxManager/db_v3/public_msg.py +++ b/wxManager/db_v3/public_msg.py @@ -176,7 +176,7 @@ class PublicMsg(DataBaseBase): return try: # 获取列名 - increase_data(db_path, self.cursor, self.DB, 'PublicMsg', 'MsgSvrID', 1, exclude_first_column=True) + increase_data(db_path, self.cursor, self.DB, 'PublicMsg', 'MsgSvrID', 1, exclude_column='localId') except: print(f"数据库操作错误: {traceback.format_exc()}") self.DB.rollback() diff --git a/wxManager/db_v3/sns.py b/wxManager/db_v3/sns.py index d7385ab..ced5215 100644 --- a/wxManager/db_v3/sns.py +++ b/wxManager/db_v3/sns.py @@ -195,16 +195,4 @@ class Sns: return result def __del__(self): - self.close() - - -if __name__ == '__main__': - db_path = "./Msg1/Sns.db" - sns_db = Sns() - sns_db.init_database() - print(sns_db.get_sns_bg_url()) - feeds = sns_db.get_feeds_by_username('wxid_27hqbq7vx5hf22') - print(feeds) - for feed in feeds: - comment = sns_db.get_comment(feed[0]) - print(comment) + self.close() \ No newline at end of file diff --git a/wxManager/db_v4/audio2text.py b/wxManager/db_v4/audio2text.py new file mode 100644 index 0000000..64d1d83 --- /dev/null +++ b/wxManager/db_v4/audio2text.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +@Time : 2025/4/1 20:31 +@Author : SiYuan +@Email : 863909694@qq.com +@File : wxManager-audio2text.py +@Description : +""" + +import os +import sqlite3 +import traceback + +from wxManager.merge import increase_update_data, increase_data +from wxManager.model.db_model import DataBaseBase + + +class Audio2TextDB(DataBaseBase): + def create(self): + sql = ''' + CREATE TABLE IF NOT EXISTS Audio2Text ( + ID INTEGER PRIMARY KEY, + msgSvrId INTEGER UNIQUE, + Text TEXT NOT NULL + ); + ''' + cursor = self.DB.cursor() + cursor.execute(sql) + # 创建索引 + cursor.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_msg_id ON Audio2Text (msgSvrId);''') + self.commit() + + def get_audio_text(self, server_id): + sql = '''select text from Audio2Text where msgSvrId=?''' + cursor = self.DB.cursor() + cursor.execute(sql, [server_id]) + result = cursor.fetchone() + if result: + return result[0] + else: + return '' + + def add_text(self, server_id, text): + try: + cursor = self.DB.cursor() + sql = '''INSERT INTO Audio2Text (msgSvrId, Text) VALUES (?, ?)''' + cursor.execute(sql, [server_id, text]) + self.commit() + return True + except sqlite3.IntegrityError: + return False + except: + return False + + def merge(self, db_path): + if not (os.path.exists(db_path) or os.path.isfile(db_path)): + print(f'{db_path} 不存在') + return + try: + # 获取列名 + increase_data(db_path, self.cursor, self.DB, 'Audio2Text', 'msgSvrId') + except: + print(f"数据库操作错误: {traceback.format_exc()}") + self.DB.rollback() diff --git a/wxManager/db_v4/biz_message.py b/wxManager/db_v4/biz_message.py index c1f7643..b2ca817 100644 --- a/wxManager/db_v4/biz_message.py +++ b/wxManager/db_v4/biz_message.py @@ -12,6 +12,7 @@ import concurrent import hashlib import os import shutil +import sqlite3 import threading from concurrent.futures import ThreadPoolExecutor from datetime import date, datetime @@ -280,14 +281,18 @@ order by sort_seq """ increase_data(db_path, cursor, db, 'Name2Id', 'user_name') increase_update_data(db_path, cursor, db, 'TimeStamp', 'timestamp') - cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") - result = cursor.fetchall() + tgt_conn = sqlite3.connect(db_path) + tgt_cur = tgt_conn.cursor() + tgt_cur.execute("SELECT name FROM sqlite_master WHERE type='table';") + result = tgt_cur.fetchall() + tgt_cur.close() + tgt_conn.close() # print(result) if result: for row in result: table_name = row[0] if table_name.startswith('Msg'): - increase_data(db_path, cursor, db, table_name, 'server_id', exclude_first_column=True) + increase_data(db_path, cursor, db, table_name, 'server_id', exclude_column='local_id') tasks = [] for i in range(100): diff --git a/wxManager/db_v4/hardlink.py b/wxManager/db_v4/hardlink.py index 8ed84e2..d5b0abb 100644 --- a/wxManager/db_v4/hardlink.py +++ b/wxManager/db_v4/hardlink.py @@ -264,9 +264,9 @@ class HardLinkDB(DataBaseBase): return try: # 获取列名 - increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5') - increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5') - increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5') + increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5', exclude_column='_rowid_') + increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5', exclude_column='_rowid_') + increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5', exclude_column='_rowid_') increase_data(db_path, self.cursor, self.DB, 'dir2id', 'username') except: print(f"数据库操作错误: {traceback.format_exc()}") diff --git a/wxManager/db_v4/message.py b/wxManager/db_v4/message.py index 62af58f..d1068ff 100644 --- a/wxManager/db_v4/message.py +++ b/wxManager/db_v4/message.py @@ -12,6 +12,7 @@ import concurrent import hashlib import os import shutil +import sqlite3 import threading import traceback from concurrent.futures import ThreadPoolExecutor @@ -281,14 +282,19 @@ order by sort_seq """ increase_data(db_path, cursor, db, 'Name2Id', 'user_name') increase_update_data(db_path, cursor, db, 'TimeStamp', 'timestamp') - cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") - result = cursor.fetchall() + tgt_conn = sqlite3.connect(db_path) + tgt_cur = tgt_conn.cursor() + tgt_cur.execute("SELECT name FROM sqlite_master WHERE type='table';") + result = tgt_cur.fetchall() + tgt_cur.close() + tgt_conn.close() + # print(result) if result: for row in result: table_name = row[0] if table_name.startswith('Msg'): - increase_data(db_path, cursor, db, table_name, 'server_id', exclude_first_column=True) + increase_data(db_path, cursor, db, table_name, 'server_id', exclude_column='local_id') tasks = [] for i in range(100): diff --git a/wxManager/db_v4/session.py b/wxManager/db_v4/session.py index 0c77b96..22fb63f 100644 --- a/wxManager/db_v4/session.py +++ b/wxManager/db_v4/session.py @@ -42,10 +42,3 @@ order by sort_timestamp desc except: print(f"数据库操作错误: {traceback.format_exc()}") self.DB.rollback() - - -if __name__ == '__main__': - cd = SessionDB('session/session.db') - cd.init_database(r'E:\Project\Python\MemoTrace\app\DataBase\Msg\wxid_27hqbq7vx5hf22\db_storage') - r = cd.get_session() - print(r) diff --git a/wxManager/decrypt/decrypt_dat.py b/wxManager/decrypt/decrypt_dat.py index a4e82a9..cc2d131 100644 --- a/wxManager/decrypt/decrypt_dat.py +++ b/wxManager/decrypt/decrypt_dat.py @@ -6,7 +6,7 @@ @Author : SiYuan @Email : 863909694@qq.com @File : wxManager-decrypt_dat.py -@Description : +@Description : 微信4.0图片加密原理解析:https://blog.lc044.love/post/16 """ import os import struct @@ -26,6 +26,19 @@ pic_head = (0xff, 0xd8, 0x89, 0x50, 0x47, 0x49) decode_code = 0 decode_code_v4 = -1 +AES_KEY_MAP = { + b'\x07\x08V1\x08\x07': b'cfcd208495d565ef', # 4.0第一代图片密钥 + b'\x07\x08V2\x08\x07': b'43e7d25eb1b9bb64', # 4.0第二代图片密钥,微信4.0.3正式版使用 +} + + +def get_aes_key(header): + return AES_KEY_MAP.get(header[:6], b'') + + +def is_v4_image(header): + return header[:6] in AES_KEY_MAP + def get_code(dat_read): """ @@ -68,14 +81,14 @@ def decode_dat(xor_key: int, file_path, out_path, dst_name='') -> str | bytes: return '' # print(file_path,out_path,dst_name) with open(file_path, 'rb') as file_in: - data = file_in.read(0xf) - if data.startswith(b'\x07\x08V1\x08\x07'): + header = file_in.read(0xf) + if is_v4_image(header): # 微信4.0 return decode_dat_v4(xor_key, file_path, out_path, dst_name) with open(file_path, 'rb') as file_in: - data = file_in.read(2) - file_type, decode_code = get_code(data) + header = file_in.read(2) + file_type, decode_code = get_code(header) if decode_code == -1: return '' @@ -96,12 +109,12 @@ def decode_dat(xor_key: int, file_path, out_path, dst_name='') -> str | bytes: # 分块读取和写入 buffer_size = 1024 # 定义缓冲区大小 with open(file_outpath, 'wb') as file_out: - file_out.write(bytes([byte ^ decode_code for byte in data])) + file_out.write(bytes([byte ^ decode_code for byte in header])) while True: - data = file_in.read(buffer_size) - if not data: + header = file_in.read(buffer_size) + if not header: break - file_out.write(bytes([byte ^ decode_code for byte in data])) + file_out.write(bytes([byte ^ decode_code for byte in header])) # print(os.path.basename(file_outpath)) return file_outpath @@ -121,7 +134,7 @@ def get_decode_code_v4(wx_dir): src_file_path = os.path.join(root, file) with open(src_file_path, 'rb') as f: data = f.read() - if not data.startswith(b'\x07\x08V1\x08\x07'): + if not is_v4_image(data): continue file_tail = data[-2:] @@ -184,7 +197,7 @@ def decode_dat_v4(xor_key: int, file_path, out_path, dst_name='') -> str | bytes padding_length = 16 - (len(encrypted_data) % 16) encrypted_data += b'\x00' * padding_length - aes_key = b'cfcd208495d565ef' + aes_key = get_aes_key(header) # 初始化AES解密器(ECB模式) cipher = AES.new(aes_key, AES.MODE_ECB) @@ -236,7 +249,7 @@ async def decode_dat_v4_async(xor_key: int, file_path, out_path, dst_name='') -> encrypted_data = await f.read(encrypt_length0) res_data = await f.read() - aes_key = b'cfcd208495d565ef' + aes_key = get_aes_key(header) # 初始化AES解密器(ECB模式) cipher = AES.new(aes_key, AES.MODE_ECB) @@ -303,5 +316,5 @@ def batch_decode_image_multiprocessing(xor_key, file_infos: List[Tuple[str, str, if __name__ == '__main__': wx_dir = '' xor_key = get_decode_code_v4(wx_dir) - dat_file = "2_1730948126.dat" + dat_file = "1c5d8c0cf05d97869b0bc9fe16a8e3c2.dat" decode_dat_v4(xor_key, dat_file, '.', dst_name='解密后的图片') diff --git a/wxManager/decrypt/decrypt_v3.py b/wxManager/decrypt/decrypt_v3.py index 0453a86..508c5d8 100644 --- a/wxManager/decrypt/decrypt_v3.py +++ b/wxManager/decrypt/decrypt_v3.py @@ -17,6 +17,7 @@ import hmac import hashlib import os import traceback +from concurrent.futures import ProcessPoolExecutor from typing import Union, List from Crypto.Cipher import AES @@ -85,6 +86,11 @@ def decrypt_db_file_v3(key: str, db_path, out_path): return True, [db_path, out_path, key] +def decode_wrapper(tasks): + """用于包装解码函数的顶层定义""" + return decrypt_db_file_v3(*tasks) + + def decrypt_db_files(key, src_dir: str, dest_dir: str): if not os.path.exists(src_dir): print(f"源文件夹 {src_dir} 不存在") @@ -92,7 +98,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str): if not os.path.exists(dest_dir): os.makedirs(dest_dir) # 如果目标文件夹不存在,创建它 - + decrypt_tasks = [] for root, dirs, files in os.walk(src_dir): for file in files: if file.endswith(".db"): @@ -108,4 +114,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str): if not os.path.exists(dest_sub_dir): os.makedirs(dest_sub_dir) print(dest_file_path) - decrypt_db_file_v3(key, src_file_path, dest_file_path) + decrypt_tasks.append((key, src_file_path, dest_file_path)) + # decrypt_db_file_v3(key, src_file_path, dest_file_path) + with ProcessPoolExecutor(max_workers=16) as executor: + results = list(executor.map(decode_wrapper, decrypt_tasks)) # 使用顶层定义的函数 diff --git a/wxManager/decrypt/decrypt_v4.py b/wxManager/decrypt/decrypt_v4.py index aa5505f..74bfdd2 100644 --- a/wxManager/decrypt/decrypt_v4.py +++ b/wxManager/decrypt/decrypt_v4.py @@ -1,6 +1,8 @@ import hmac import os import struct +from concurrent.futures import ProcessPoolExecutor + from Crypto.Cipher import AES from Crypto.Protocol.KDF import PBKDF2 from Crypto.Hash import SHA512 @@ -101,6 +103,11 @@ def decrypt_db_file_v4(pkey, in_db_path, out_db_path): return True +def decode_wrapper(tasks): + """用于包装解码函数的顶层定义""" + return decrypt_db_file_v4(*tasks) + + def decrypt_db_files(key, src_dir: str, dest_dir: str): if not os.path.exists(src_dir): print(f"源文件夹 {src_dir} 不存在") @@ -108,7 +115,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str): if not os.path.exists(dest_dir): os.makedirs(dest_dir) # 如果目标文件夹不存在,创建它 - + decrypt_tasks = [] for root, dirs, files in os.walk(src_dir): for file in files: if file.endswith(".db"): @@ -124,4 +131,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str): if not os.path.exists(dest_sub_dir): os.makedirs(dest_sub_dir) print(dest_file_path) - decrypt_db_file_v4(key, src_file_path, dest_file_path) + decrypt_tasks.append((key, src_file_path, dest_file_path)) + # decrypt_db_file_v4(key, src_file_path, dest_file_path) + with ProcessPoolExecutor(max_workers=16) as executor: + results = list(executor.map(decode_wrapper, decrypt_tasks)) # 使用顶层定义的函数 diff --git a/wxManager/decrypt/version_list.json b/wxManager/decrypt/version_list.json index d09e60a..b5e125c 100644 --- a/wxManager/decrypt/version_list.json +++ b/wxManager/decrypt/version_list.json @@ -1139,5 +1139,26 @@ 94554984, 0, 94556448 + ], + "2.4.0.1": [ + 14907156, + 14907512, + 14907208, + 0, + 0 + ], + "3.7.5.19": [ + 37895672, + 37896480, + 37895552, + 0, + 0 + ], + "3.9.2.114": [ + 50370000, + 0, + 50369856, + 0, + 0 ] } \ No newline at end of file diff --git a/wxManager/decrypt/wx_info_v4.py b/wxManager/decrypt/wx_info_v4.py index 54242b3..f901f2d 100644 --- a/wxManager/decrypt/wx_info_v4.py +++ b/wxManager/decrypt/wx_info_v4.py @@ -6,7 +6,7 @@ @Author : SiYuan @Email : 863909694@qq.com @File : wxManager-wx_info_v4.py -@Description : +@Description : 部分思路参考:https://github.com/0xlane/wechat-dump-rs """ import ctypes @@ -330,7 +330,7 @@ def get_key_inner(pid, process_infos): keys = [] key_set = set() for pre_address in pre_addresses: - if any([base_address <= pre_address <= base_address + region_size - KEY_SIZE for base_address, region_size in + if True or any([base_address <= pre_address <= base_address + region_size - KEY_SIZE for base_address, region_size in process_infos]): key = read_bytes_from_pid(pid, pre_address, 32) if key not in key_set: diff --git a/wxManager/manager_v3.py b/wxManager/manager_v3.py index a077acd..5c4b04a 100644 --- a/wxManager/manager_v3.py +++ b/wxManager/manager_v3.py @@ -10,6 +10,7 @@ """ import concurrent import os +import re import traceback from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor from datetime import date @@ -19,6 +20,7 @@ import xmltodict from wxManager import MessageType from wxManager.db_main import DataBaseInterface +from wxManager.db_v3.audio2text import Audio2TextDB from wxManager.db_v3.hard_link_file import HardLinkFile from wxManager.db_v3.hard_link_image import HardLinkImage from wxManager.db_v3.hard_link_video import HardLinkVideo @@ -188,11 +190,7 @@ class DataBaseV3(DataBaseInterface): self.open_contact_db = OpenIMContactDB('OpenIMContact.db') self.open_media_db = OpenIMMediaDB('OpenIMMedia.db') self.open_msg_db = OpenIMMsgDB('OpenIMMsg.db') - # self.sns_db = Sns() - - # self.audio_to_text = Audio2TextDB() - # self.public_msg_db = PublicMsg() - # self.favorite_db = Favorite() + self.audio2text_db = Audio2TextDB('Audio2Text.db') def init_database(self, db_dir=''): # print('初始化数据库', db_dir) @@ -211,6 +209,8 @@ class DataBaseV3(DataBaseInterface): flag &= self.open_contact_db.init_database(db_dir) flag &= self.open_media_db.init_database(db_dir) flag &= self.open_msg_db.init_database(db_dir) + flag &= self.audio2text_db.init_database(db_dir) + self.audio2text_db.create() # 初始化数据转文字数据库 return flag # self.sns_db.init_database(db_dir) @@ -231,9 +231,7 @@ class DataBaseV3(DataBaseInterface): self.open_contact_db.close() self.open_media_db.close() self.open_msg_db.close() - # self.sns_db.close() - # self.audio_to_text.close() - # self.public_msg_db.close() + self.audio2text_db.close() def get_session(self): """ @@ -446,8 +444,10 @@ class DataBaseV3(DataBaseInterface): return self.media_msg_db.get_audio_path(reserved0, output_path, filename) def get_audio_text(self, msgSvrId): - return '' - return self.media_msg_db.get_audio_text(msgSvrId) + return self.audio2text_db.get_audio_text(msgSvrId) + + def add_audio_txt(self, msgSvrId, text): + return self.audio2text_db.add_text(msgSvrId, text) def update_audio_to_text(self): messages = self.get_messages_all() @@ -491,10 +491,8 @@ class DataBaseV3(DataBaseInterface): gender = '男' elif gender_code == 2: gender = '女' - type_ = contact_info_list[2] wxid = contact_info_list[0] - contact = Contact( wxid=contact_info_list[0], remark=remark, @@ -524,10 +522,10 @@ class DataBaseV3(DataBaseInterface): contact.type |= ContactType.Star if is_nth_bit_set(type_, 11): contact.type |= ContactType.Sticky - if type_ == 10086: contact.type = ContactType.Unknown contact.is_unknown = True + contact.remark = re.sub(r'[\\/:*?"<>|\s\.\x00-\x08\x0B\x0C\x0E-\x1F]', '_', contact.remark) return contact def create_open_im_contact(self, contact_info_list) -> Person: @@ -560,6 +558,7 @@ class DataBaseV3(DataBaseInterface): ) contact.type = ContactType.Normal contact.type |= ContactType.OpenIM + contact.remark = re.sub(r'[\\/:*?"<>|\s\.\x00-\x08\x0B\x0C\x0E-\x1F]', '_', contact.remark) return contact def get_contacts(self) -> List[Person]: @@ -660,8 +659,6 @@ class DataBaseV3(DataBaseInterface): return chatroom_name.rstrip('、') # 联系人结束 - def add_audio_txt(self, msgSvrId, text): - return self.audio_to_text.add_text(msgSvrId, text) def get_favorite_items(self, time_range): return self.favorite_db.get_items(time_range) diff --git a/wxManager/manager_v4.py b/wxManager/manager_v4.py index 5acec81..8aa5214 100644 --- a/wxManager/manager_v4.py +++ b/wxManager/manager_v4.py @@ -10,6 +10,7 @@ """ import concurrent import os +import re from concurrent.futures import ProcessPoolExecutor, as_completed, ThreadPoolExecutor from datetime import date, datetime from multiprocessing import Pool, cpu_count @@ -18,6 +19,7 @@ from typing import Tuple, List, Any import zstandard as zstd from wxManager import MessageType +from wxManager.db_v4.audio2text import Audio2TextDB from wxManager.db_v4.biz_message import BizMessageDB from wxManager.db_v4.emotion import EmotionDB from wxManager.db_v4.media import MediaDB @@ -82,6 +84,7 @@ class DataBaseV4(DataBaseInterface): self.media_db = MediaDB('message/media_0.db', is_series=True) self.hardlink_db = HardLinkDB('hardlink/hardlink.db') self.emotion_db = EmotionDB('emoticon/emoticon.db') + self.audio2text_db = Audio2TextDB('Audio2Text.db') def init_database(self, db_dir=''): Me().load_from_json(os.path.join(db_dir, 'info.json')) # 加载自己的信息 @@ -96,6 +99,8 @@ class DataBaseV4(DataBaseInterface): flag &= self.media_db.init_database(db_dir) flag &= self.hardlink_db.init_database(db_dir) flag &= self.emotion_db.init_database(db_dir) + flag &= self.audio2text_db.init_database(db_dir) + self.audio2text_db.create() # 初始化数据转文字数据库 return flag def close(self): @@ -271,13 +276,16 @@ class DataBaseV4(DataBaseInterface): def get_audio_path(self, reserved0, output_path, filename=''): return self.media_db.get_audio_path(reserved0, output_path, filename) - def get_audio_text(self, msgSvrId): - return '' + def get_audio_text(self, server_id): + return self.audio2text_db.get_audio_text(server_id) def update_audio_to_text(self): # todo return + def add_audio_txt(self, server_id, text): + return self.audio2text_db.add_text(server_id, text) + # 语音结束 # 联系人 @@ -353,10 +361,10 @@ class DataBaseV4(DataBaseInterface): contact.type |= ContactType.Star if is_nth_bit_set(flag, 11): contact.type |= ContactType.Sticky - if local_type == 10086: contact.type = ContactType.Unknown contact.is_unknown = True + contact.remark = re.sub(r'[\\/:*?"<>|\s\.\x00-\x08\x0B\x0C\x0E-\x1F]', '_', contact.remark) return contact def get_contacts(self) -> List[Person]: @@ -437,9 +445,6 @@ class DataBaseV4(DataBaseInterface): # 联系人结束 - def add_audio_txt(self, msgSvrId, text): - return self.audio_to_text.add_text(msgSvrId, text) - def get_favorite_items(self, time_range): return self.favorite_db.get_items(time_range) diff --git a/wxManager/merge.py b/wxManager/merge.py index a6452cf..5206367 100644 --- a/wxManager/merge.py +++ b/wxManager/merge.py @@ -19,7 +19,7 @@ def get_create_statements(conn, table_name, object_type): return [row[0] for row in cursor.fetchall() if row[0]] # 过滤掉 None 值 -def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index=-1, exclude_first_column=False): +def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index=-1, exclude_column=''): """ 将db_path数据库的内容增量写入connect数据库中 @param db_path: 新的数据库路径 @@ -28,7 +28,7 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index @param table_name: 待写入的表名 @param col_name: 根据该列进行判断是否是新增数据 @param col_index: 待写入的列号 - @param exclude_first_column: 是否不考虑低一列(针对第一列是自增ID的表) + @param exclude_column: 是否不考虑某一列(针对某一列是自增ID的表) @return: """ if not (os.path.exists(db_path) or os.path.isfile(db_path)): @@ -41,26 +41,30 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index tgt_cur = tgt_conn.cursor() try: - if not table_exists(tgt_conn, table_name): + if not table_exists(src_conn, table_name): # 复制表结构 - create_table_sql = get_create_statements(src_conn, table_name, "table") + create_table_sql = get_create_statements(tgt_conn, table_name, "table") if create_table_sql: - tgt_conn.execute(create_table_sql[0]) # 执行 CREATE TABLE 语句 + src_conn.execute(create_table_sql[0]) # 执行 CREATE TABLE 语句 print(f"表 {table_name} 结构已复制") # 复制索引 - create_index_sql_list = get_create_statements(src_conn, table_name, "index") + create_index_sql_list = get_create_statements(tgt_conn, table_name, "index") for create_index_sql in create_index_sql_list: - tgt_conn.execute(create_index_sql) # 执行 CREATE INDEX 语句 + src_conn.execute(create_index_sql) # 执行 CREATE INDEX 语句 print(f"索引已复制: {create_index_sql}") # 获取列名 src_cursor.execute(f"PRAGMA table_info({table_name})") columns_info = src_cursor.fetchall() - if columns_info and exclude_first_column: - columns_info = columns_info[1:] column_names = [info[1] for info in columns_info] + if columns_info and exclude_column: + try: + exclude_col_index = column_names.index(exclude_column) + except ValueError: + print(f"错误: 列 {exclude_column} 在表 {table_name} 中不存在") + return + column_names = column_names[:exclude_col_index]+column_names[exclude_col_index+1:] num_columns = len(column_names) - if col_index == -1: try: col_index = column_names.index(col_name) @@ -69,7 +73,7 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index return # 从数据库B中选择主键不在数据库A中的行 query = f""" - SELECT {', '.join([name for name in column_names])} + SELECT {', '.join(column_names)} FROM {table_name} """ tgt_cur.execute(query) @@ -83,7 +87,6 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index source_rows = {r[0] for r in source_rows} rows_to_insert = [row for row in target_rows if row[col_index] not in source_rows] - if rows_to_insert: insert_query = f""" INSERT INTO {table_name} ({', '.join(column_names)}) @@ -93,7 +96,8 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index src_conn.commit() print(f"{len(rows_to_insert)} 行已插入到 {table_name} 表中") else: - print(f"没有需要插入的数据,{table_name} 表已是最新") + pass + # print(f"没有需要插入的数据,{table_name} 表已是最新") except sqlite3.Error as e: print(f"{db_path} 数据库操作错误: {e}") finally: @@ -170,7 +174,8 @@ def increase_update_data(db_path, src_cur, src_conn, table_name, col_name, col_i src_conn.commit() print(f"{len(rows_to_insert)} 行已更新到 {table_name} 表中。") else: - print(f"没有需要插入的数据,{table_name} 表已是最新。") + pass + # print(f"没有需要插入的数据,{table_name} 表已是最新。") except sqlite3.Error as e: print(f"{db_path} 数据库操作错误: {e}") finally: diff --git a/wxManager/model/db_model.py b/wxManager/model/db_model.py index 8dacd43..986b206 100644 --- a/wxManager/model/db_model.py +++ b/wxManager/model/db_model.py @@ -25,7 +25,7 @@ class DataBaseBase: def init_database(self, db_dir=''): self.db_dir = db_dir db_path = os.path.join(db_dir, self.db_file_name) - if not os.path.exists(db_path): + if not os.path.exists(db_path) and self.db_file_name != 'Audio2Text.db': return False db_file_name = self.db_file_name self.db_file_name = [] @@ -44,11 +44,10 @@ class DataBaseBase: self.cursor.append(cursor) self.open_flag = True else: - if os.path.exists(db_path): - self.DB = sqlite3.connect(db_path, check_same_thread=False) - # '''创建游标''' - self.cursor = self.DB.cursor() - self.open_flag = True + self.DB = sqlite3.connect(db_path, check_same_thread=False) + # '''创建游标''' + self.cursor = self.DB.cursor() + self.open_flag = True # print('初始化数据库完成:', db_path) self.self_init() return True diff --git a/wxManager/parser/audio_parser.py b/wxManager/parser/audio_parser.py index 2a0a536..f79e9e9 100644 --- a/wxManager/parser/audio_parser.py +++ b/wxManager/parser/audio_parser.py @@ -14,16 +14,16 @@ import xmltodict def parser_audio(xml_content): result = { 'audio_length': 0, - 'audio_text':'' + 'audio_text': '' } xml_content = xml_content.strip() try: xml_dict = xmltodict.parse(xml_content) voice_length = xml_dict.get('msg', {}).get('voicemsg', {}).get('@voicelength', 0) - audio_text = xml_dict.get('msg',{}).get('voicetrans',{}).get('@transtext','') + audio_text = xml_dict.get('msg', {}).get('voicetrans', {}).get('@transtext', '') result = { 'audio_length': voice_length, - 'audio_text':audio_text + 'audio_text': audio_text } except: if xml_content and ':' in xml_content: diff --git a/wxManager/parser/util/protocbuf/packed_info_data_img2.proto b/wxManager/parser/util/protocbuf/packed_info_data_img2.proto new file mode 100644 index 0000000..4fd3a1c --- /dev/null +++ b/wxManager/parser/util/protocbuf/packed_info_data_img2.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; +// 2025年3月微信4.0.3正式版修改了img命名方式才有了这个东西 +message PackedInfoDataImg2 { + int32 field1 = 1; + int32 field2 = 2; + ImageInfo imageInfo = 3; +} + +message ImageInfo { + int32 height = 1; + int32 width = 2; + string filename = 4; +} \ No newline at end of file diff --git a/wxManager/parser/util/protocbuf/packed_info_data_img2_pb2.py b/wxManager/parser/util/protocbuf/packed_info_data_img2_pb2.py new file mode 100644 index 0000000..ea163dc --- /dev/null +++ b/wxManager/parser/util/protocbuf/packed_info_data_img2_pb2.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: packed_info_data_img2.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1bpacked_info_data_img2.proto\"S\n\x12PackedInfoDataImg2\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x05\x12\x0e\n\x06\x66ield2\x18\x02 \x01(\x05\x12\x1d\n\timageInfo\x18\x03 \x01(\x0b\x32\n.ImageInfo\"<\n\tImageInfo\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x10\n\x08\x66ilename\x18\x04 \x01(\tb\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'packed_info_data_img2_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _PACKEDINFODATAIMG2._serialized_start=31 + _PACKEDINFODATAIMG2._serialized_end=114 + _IMAGEINFO._serialized_start=116 + _IMAGEINFO._serialized_end=176 +# @@protoc_insertion_point(module_scope) diff --git a/wxManager/parser/wechat_v3.py b/wxManager/parser/wechat_v3.py index b233ca0..08bd819 100644 --- a/wxManager/parser/wechat_v3.py +++ b/wxManager/parser/wechat_v3.py @@ -292,6 +292,8 @@ class AudioMessageFactory(MessageFactory, Singleton): audio_dic = parser_audio(msg.xml_content) msg.duration = audio_dic.get('audio_length', 0) msg.audio_text = audio_dic.get('audio_text', '') + if not msg.audio_text: + msg.audio_text = manager.get_audio_text(msg.server_id) self.add_message(msg) return msg @@ -439,6 +441,7 @@ class LinkMessageFactory(MessageFactory, Singleton): contact = manager.get_contact_by_username(source_username) msg.app_name = contact.nickname msg.app_icon = contact.small_head_img_url + msg.app_id = source_username elif (type_, sub_type) in {(49, 33), (49, 36)}: # 小程序 msg.type = MessageType.Applet diff --git a/wxManager/parser/wechat_v4.py b/wxManager/parser/wechat_v4.py index 141bba6..6cb68c2 100644 --- a/wxManager/parser/wechat_v4.py +++ b/wxManager/parser/wechat_v4.py @@ -24,7 +24,8 @@ from wxManager.model.message import VoipMessage, BusinessCardMessage, MergedMess from wxManager.parser.link_parser import parser_link, parser_voip, parser_applet, parser_business, \ parser_merged_messages, parser_wechat_video, parser_position, parser_reply, parser_transfer, parser_red_envelop, \ parser_file, parser_favorite_note, parser_pat -from wxManager.parser.util.protocbuf import packed_info_data_pb2, packed_info_data_merged_pb2,packed_info_data_img_pb2 +from wxManager.parser.util.protocbuf import packed_info_data_pb2, packed_info_data_merged_pb2, packed_info_data_img_pb2, \ + packed_info_data_img2_pb2 from .audio_parser import parser_audio from .emoji_parser import parser_emoji from .file_parser import parse_video @@ -248,14 +249,27 @@ class ImageMessageFactory(MessageFactory, Singleton): is_sender, wxid, message_content = self.common_attribute(message, username, manager) filename = '' try: - # 2025年3月微信测试版修改了img命名方式才有了这个东西 - packed_info_data_proto = packed_info_data_img_pb2.PackedInfoDataImg() + # 2025年3月微信4.0.3正式版修改了img命名方式才有了这个东西 + packed_info_data_proto = packed_info_data_img2_pb2.PackedInfoDataImg2() packed_info_data_proto.ParseFromString(message[14]) # 转换为 JSON 格式 packed_info_data = MessageToDict(packed_info_data_proto) - filename = packed_info_data.get('filename', '').strip().strip('"').strip() + image_info = packed_info_data.get('imageInfo', {}) + width = image_info.get('width',0) + height = image_info.get('height',0) + filename = image_info.get('filename', '').strip().strip('"').strip() except: pass + if not filename: + try: + # 2025年3月微信测试版修改了img命名方式才有了这个东西 + packed_info_data_proto = packed_info_data_img_pb2.PackedInfoDataImg() + packed_info_data_proto.ParseFromString(message[14]) + # 转换为 JSON 格式 + packed_info_data = MessageToDict(packed_info_data_proto) + filename = packed_info_data.get('filename', '').strip().strip('"').strip() + except: + pass msg = ImageMessage( local_id=message[0], server_id=message[1], @@ -277,8 +291,6 @@ class ImageMessageFactory(MessageFactory, Singleton): file_name=filename, file_type='png' ) - # with open(f'{msg.str_time}{msg.server_id}.bin', 'wb') as f: - # f.write(message[14]) path = manager.get_image(content=message_content, bytesExtra=msg, up_dir='', thumb=False, talker_username=username) msg.path = path @@ -300,6 +312,8 @@ class AudioMessageFactory(MessageFactory, Singleton): # 转换为 JSON 格式 packed_info_data = MessageToDict(packed_info_data_proto) audio_text = packed_info_data.get('info', {}).get('audioTxt', '') + if not audio_text: + audio_text = manager.get_audio_text(message[1]) msg = AudioMessage( local_id=message[0], server_id=message[1], @@ -454,6 +468,7 @@ class LinkMessageFactory(MessageFactory, Singleton): contact = manager.get_contact_by_username(source_username) msg.app_name = contact.nickname msg.app_icon = contact.small_head_img_url + msg.app_id = source_username elif message[2] == MessageType.Applet or message[2] == MessageType.Applet2: info = parser_applet(message_content) @@ -627,7 +642,8 @@ class MergedMessageFactory(MessageFactory, Singleton): inner_msg.path = os.path.join('msg', 'attach', wxid_md5, month, - 'Rec', dir0, 'F', f"{level}{'_' if level else ''}{index}", inner_msg.file_name) + 'Rec', dir0, 'F', f"{level}{'_' if level else ''}{index}", + inner_msg.file_name) else: inner_msg.path = manager.get_file(inner_msg.md5) elif inner_msg.type == MessageType.MergedMessages: