适配微信4.0.3正式版,修复合并数据库的一些问题

This commit is contained in:
SiYuan 2025-04-02 12:35:17 +08:00
parent f09a00d4ec
commit 5689de2ca6
26 changed files with 355 additions and 108 deletions

View File

@ -36,6 +36,9 @@ def dump_v3():
info_data = me.to_json()
output_dir = wx_info.wxid
key = wx_info.key
if not key:
print('error! 未找到key请重启微信后再试')
continue
wx_dir = wx_info.wx_dir
decrypt_v3.decrypt_db_files(key, src_dir=wx_dir, dest_dir=output_dir)
# 导出的数据库在 output_dir/Msg 文件夹下,后面会用到
@ -59,6 +62,9 @@ def dump_v4():
info_data = me.to_json()
output_dir = wx_info.wxid # 数据库输出文件夹
key = wx_info.key
if not key:
print('error! 未找到key请重启微信后再试')
continue
wx_dir = wx_info.wx_dir
decrypt_v4.decrypt_db_files(key, src_dir=wx_dir, dest_dir=output_dir)
# 导出的数据库在 output_dir/db_storage 文件夹下,后面会用到

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2025/4/1 20:31
@Author : SiYuan
@Email : 863909694@qq.com
@File : wxManager-audio2text.py
@Description :
"""
import os
import sqlite3
import traceback
from wxManager.merge import increase_update_data, increase_data
from wxManager.model.db_model import DataBaseBase
class Audio2TextDB(DataBaseBase):
def create(self):
sql = '''
CREATE TABLE IF NOT EXISTS Audio2Text (
ID INTEGER PRIMARY KEY,
msgSvrId INTEGER UNIQUE,
Text TEXT NOT NULL
);
'''
cursor = self.DB.cursor()
cursor.execute(sql)
# 创建索引
cursor.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_msg_id ON Audio2Text (msgSvrId);''')
self.commit()
def get_audio_text(self, server_id):
sql = '''select text from Audio2Text where msgSvrId=?'''
cursor = self.DB.cursor()
cursor.execute(sql, [server_id])
result = cursor.fetchone()
if result:
return result[0]
else:
return ''
def add_text(self, server_id, text):
try:
cursor = self.DB.cursor()
sql = '''INSERT INTO Audio2Text (msgSvrId, Text) VALUES (?, ?)'''
cursor.execute(sql, [server_id, text])
self.commit()
return True
except sqlite3.IntegrityError:
return False
except:
return False
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'Audio2Text', 'msgSvrId')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()

View File

@ -126,10 +126,10 @@ class Emotion(DataBaseBase):
cursor = self.DB.cursor()
# 获取列名
increase_data(db_path, cursor, self.DB, 'CustomEmotion', 'MD5', 0)
increase_data(db_path, cursor, self.DB, 'EmotionDes1', 'MD5', 1, True)
increase_data(db_path, cursor, self.DB, 'EmotionItem', 'MD5', 1, True)
increase_data(db_path, cursor, self.DB, 'EmotionPackageItem', 'ProductId', 0, False)
increase_data(db_path, cursor, self.DB, 'EmotionOrderInfo', 'MD5', 0, False)
increase_data(db_path, cursor, self.DB, 'EmotionDes1', 'MD5', 1, 'localId')
increase_data(db_path, cursor, self.DB, 'EmotionItem', 'MD5', 1, 'localId')
increase_data(db_path, cursor, self.DB, 'EmotionPackageItem', 'ProductId', 0, 'localId')
increase_data(db_path, cursor, self.DB, 'EmotionOrderInfo', 'MD5', 0, 'localId')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()

View File

@ -277,7 +277,7 @@ class Msg(DataBaseBase):
"""
increase_data(db_path, cursor, db, 'Name2Id', 'UsrName')
increase_update_data(db_path, cursor, db, 'DBInfo', 'tableIndex')
increase_data(db_path, cursor, db, 'MSG', 'MsgSvrID', exclude_first_column=True)
increase_data(db_path, cursor, db, 'MSG', 'MsgSvrID', exclude_column='localId')
tasks = []
for i in range(100):

View File

@ -111,8 +111,7 @@ class OpenIMMsgDB(DataBaseBase):
def get_messages_by_username(self, username: str,
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
result = self._get_messages_by_username(self.DB.cursor(), username, time_range)
return [result]
return self._get_messages_by_username(self.DB.cursor(), username, time_range)
def get_message_by_server_id(self, username, server_id):
"""
@ -141,7 +140,7 @@ class OpenIMMsgDB(DataBaseBase):
return
try:
# 获取列名
increase_update_data(db_path, self.cursor, self.DB, 'ChatCRMsg', 'MsgSvrID', 1, exclude_first_column=True)
increase_data(db_path, self.cursor, self.DB, 'ChatCRMsg', 'MsgSvrID', 1, exclude_column='localId')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()

View File

@ -176,7 +176,7 @@ class PublicMsg(DataBaseBase):
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'PublicMsg', 'MsgSvrID', 1, exclude_first_column=True)
increase_data(db_path, self.cursor, self.DB, 'PublicMsg', 'MsgSvrID', 1, exclude_column='localId')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()

View File

@ -195,16 +195,4 @@ class Sns:
return result
def __del__(self):
self.close()
if __name__ == '__main__':
db_path = "./Msg1/Sns.db"
sns_db = Sns()
sns_db.init_database()
print(sns_db.get_sns_bg_url())
feeds = sns_db.get_feeds_by_username('wxid_27hqbq7vx5hf22')
print(feeds)
for feed in feeds:
comment = sns_db.get_comment(feed[0])
print(comment)
self.close()

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2025/4/1 20:31
@Author : SiYuan
@Email : 863909694@qq.com
@File : wxManager-audio2text.py
@Description :
"""
import os
import sqlite3
import traceback
from wxManager.merge import increase_update_data, increase_data
from wxManager.model.db_model import DataBaseBase
class Audio2TextDB(DataBaseBase):
def create(self):
sql = '''
CREATE TABLE IF NOT EXISTS Audio2Text (
ID INTEGER PRIMARY KEY,
msgSvrId INTEGER UNIQUE,
Text TEXT NOT NULL
);
'''
cursor = self.DB.cursor()
cursor.execute(sql)
# 创建索引
cursor.execute('''CREATE UNIQUE INDEX IF NOT EXISTS idx_msg_id ON Audio2Text (msgSvrId);''')
self.commit()
def get_audio_text(self, server_id):
sql = '''select text from Audio2Text where msgSvrId=?'''
cursor = self.DB.cursor()
cursor.execute(sql, [server_id])
result = cursor.fetchone()
if result:
return result[0]
else:
return ''
def add_text(self, server_id, text):
try:
cursor = self.DB.cursor()
sql = '''INSERT INTO Audio2Text (msgSvrId, Text) VALUES (?, ?)'''
cursor.execute(sql, [server_id, text])
self.commit()
return True
except sqlite3.IntegrityError:
return False
except:
return False
def merge(self, db_path):
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
print(f'{db_path} 不存在')
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'Audio2Text', 'msgSvrId')
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()

View File

@ -12,6 +12,7 @@ import concurrent
import hashlib
import os
import shutil
import sqlite3
import threading
from concurrent.futures import ThreadPoolExecutor
from datetime import date, datetime
@ -280,14 +281,18 @@ order by sort_seq
"""
increase_data(db_path, cursor, db, 'Name2Id', 'user_name')
increase_update_data(db_path, cursor, db, 'TimeStamp', 'timestamp')
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
result = cursor.fetchall()
tgt_conn = sqlite3.connect(db_path)
tgt_cur = tgt_conn.cursor()
tgt_cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
result = tgt_cur.fetchall()
tgt_cur.close()
tgt_conn.close()
# print(result)
if result:
for row in result:
table_name = row[0]
if table_name.startswith('Msg'):
increase_data(db_path, cursor, db, table_name, 'server_id', exclude_first_column=True)
increase_data(db_path, cursor, db, table_name, 'server_id', exclude_column='local_id')
tasks = []
for i in range(100):

View File

@ -264,9 +264,9 @@ class HardLinkDB(DataBaseBase):
return
try:
# 获取列名
increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5')
increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5')
increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5')
increase_data(db_path, self.cursor, self.DB, 'file_hardlink_info_v3', 'md5', exclude_column='_rowid_')
increase_data(db_path, self.cursor, self.DB, 'image_hardlink_info_v3', 'md5', exclude_column='_rowid_')
increase_data(db_path, self.cursor, self.DB, 'video_hardlink_info_v3', 'md5', exclude_column='_rowid_')
increase_data(db_path, self.cursor, self.DB, 'dir2id', 'username')
except:
print(f"数据库操作错误: {traceback.format_exc()}")

View File

@ -12,6 +12,7 @@ import concurrent
import hashlib
import os
import shutil
import sqlite3
import threading
import traceback
from concurrent.futures import ThreadPoolExecutor
@ -281,14 +282,19 @@ order by sort_seq
"""
increase_data(db_path, cursor, db, 'Name2Id', 'user_name')
increase_update_data(db_path, cursor, db, 'TimeStamp', 'timestamp')
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
result = cursor.fetchall()
tgt_conn = sqlite3.connect(db_path)
tgt_cur = tgt_conn.cursor()
tgt_cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
result = tgt_cur.fetchall()
tgt_cur.close()
tgt_conn.close()
# print(result)
if result:
for row in result:
table_name = row[0]
if table_name.startswith('Msg'):
increase_data(db_path, cursor, db, table_name, 'server_id', exclude_first_column=True)
increase_data(db_path, cursor, db, table_name, 'server_id', exclude_column='local_id')
tasks = []
for i in range(100):

View File

@ -42,10 +42,3 @@ order by sort_timestamp desc
except:
print(f"数据库操作错误: {traceback.format_exc()}")
self.DB.rollback()
if __name__ == '__main__':
cd = SessionDB('session/session.db')
cd.init_database(r'E:\Project\Python\MemoTrace\app\DataBase\Msg\wxid_27hqbq7vx5hf22\db_storage')
r = cd.get_session()
print(r)

View File

@ -6,7 +6,7 @@
@Author : SiYuan
@Email : 863909694@qq.com
@File : wxManager-decrypt_dat.py
@Description :
@Description : 微信4.0图片加密原理解析https://blog.lc044.love/post/16
"""
import os
import struct
@ -26,6 +26,19 @@ pic_head = (0xff, 0xd8, 0x89, 0x50, 0x47, 0x49)
decode_code = 0
decode_code_v4 = -1
AES_KEY_MAP = {
b'\x07\x08V1\x08\x07': b'cfcd208495d565ef', # 4.0第一代图片密钥
b'\x07\x08V2\x08\x07': b'43e7d25eb1b9bb64', # 4.0第二代图片密钥微信4.0.3正式版使用
}
def get_aes_key(header):
return AES_KEY_MAP.get(header[:6], b'')
def is_v4_image(header):
return header[:6] in AES_KEY_MAP
def get_code(dat_read):
"""
@ -68,14 +81,14 @@ def decode_dat(xor_key: int, file_path, out_path, dst_name='') -> str | bytes:
return ''
# print(file_path,out_path,dst_name)
with open(file_path, 'rb') as file_in:
data = file_in.read(0xf)
if data.startswith(b'\x07\x08V1\x08\x07'):
header = file_in.read(0xf)
if is_v4_image(header):
# 微信4.0
return decode_dat_v4(xor_key, file_path, out_path, dst_name)
with open(file_path, 'rb') as file_in:
data = file_in.read(2)
file_type, decode_code = get_code(data)
header = file_in.read(2)
file_type, decode_code = get_code(header)
if decode_code == -1:
return ''
@ -96,12 +109,12 @@ def decode_dat(xor_key: int, file_path, out_path, dst_name='') -> str | bytes:
# 分块读取和写入
buffer_size = 1024 # 定义缓冲区大小
with open(file_outpath, 'wb') as file_out:
file_out.write(bytes([byte ^ decode_code for byte in data]))
file_out.write(bytes([byte ^ decode_code for byte in header]))
while True:
data = file_in.read(buffer_size)
if not data:
header = file_in.read(buffer_size)
if not header:
break
file_out.write(bytes([byte ^ decode_code for byte in data]))
file_out.write(bytes([byte ^ decode_code for byte in header]))
# print(os.path.basename(file_outpath))
return file_outpath
@ -121,7 +134,7 @@ def get_decode_code_v4(wx_dir):
src_file_path = os.path.join(root, file)
with open(src_file_path, 'rb') as f:
data = f.read()
if not data.startswith(b'\x07\x08V1\x08\x07'):
if not is_v4_image(data):
continue
file_tail = data[-2:]
@ -184,7 +197,7 @@ def decode_dat_v4(xor_key: int, file_path, out_path, dst_name='') -> str | bytes
padding_length = 16 - (len(encrypted_data) % 16)
encrypted_data += b'\x00' * padding_length
aes_key = b'cfcd208495d565ef'
aes_key = get_aes_key(header)
# 初始化AES解密器ECB模式
cipher = AES.new(aes_key, AES.MODE_ECB)
@ -236,7 +249,7 @@ async def decode_dat_v4_async(xor_key: int, file_path, out_path, dst_name='') ->
encrypted_data = await f.read(encrypt_length0)
res_data = await f.read()
aes_key = b'cfcd208495d565ef'
aes_key = get_aes_key(header)
# 初始化AES解密器ECB模式
cipher = AES.new(aes_key, AES.MODE_ECB)
@ -303,5 +316,5 @@ def batch_decode_image_multiprocessing(xor_key, file_infos: List[Tuple[str, str,
if __name__ == '__main__':
wx_dir = ''
xor_key = get_decode_code_v4(wx_dir)
dat_file = "2_1730948126.dat"
dat_file = "1c5d8c0cf05d97869b0bc9fe16a8e3c2.dat"
decode_dat_v4(xor_key, dat_file, '.', dst_name='解密后的图片')

View File

@ -17,6 +17,7 @@ import hmac
import hashlib
import os
import traceback
from concurrent.futures import ProcessPoolExecutor
from typing import Union, List
from Crypto.Cipher import AES
@ -85,6 +86,11 @@ def decrypt_db_file_v3(key: str, db_path, out_path):
return True, [db_path, out_path, key]
def decode_wrapper(tasks):
"""用于包装解码函数的顶层定义"""
return decrypt_db_file_v3(*tasks)
def decrypt_db_files(key, src_dir: str, dest_dir: str):
if not os.path.exists(src_dir):
print(f"源文件夹 {src_dir} 不存在")
@ -92,7 +98,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir) # 如果目标文件夹不存在,创建它
decrypt_tasks = []
for root, dirs, files in os.walk(src_dir):
for file in files:
if file.endswith(".db"):
@ -108,4 +114,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str):
if not os.path.exists(dest_sub_dir):
os.makedirs(dest_sub_dir)
print(dest_file_path)
decrypt_db_file_v3(key, src_file_path, dest_file_path)
decrypt_tasks.append((key, src_file_path, dest_file_path))
# decrypt_db_file_v3(key, src_file_path, dest_file_path)
with ProcessPoolExecutor(max_workers=16) as executor:
results = list(executor.map(decode_wrapper, decrypt_tasks)) # 使用顶层定义的函数

View File

@ -1,6 +1,8 @@
import hmac
import os
import struct
from concurrent.futures import ProcessPoolExecutor
from Crypto.Cipher import AES
from Crypto.Protocol.KDF import PBKDF2
from Crypto.Hash import SHA512
@ -101,6 +103,11 @@ def decrypt_db_file_v4(pkey, in_db_path, out_db_path):
return True
def decode_wrapper(tasks):
"""用于包装解码函数的顶层定义"""
return decrypt_db_file_v4(*tasks)
def decrypt_db_files(key, src_dir: str, dest_dir: str):
if not os.path.exists(src_dir):
print(f"源文件夹 {src_dir} 不存在")
@ -108,7 +115,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir) # 如果目标文件夹不存在,创建它
decrypt_tasks = []
for root, dirs, files in os.walk(src_dir):
for file in files:
if file.endswith(".db"):
@ -124,4 +131,7 @@ def decrypt_db_files(key, src_dir: str, dest_dir: str):
if not os.path.exists(dest_sub_dir):
os.makedirs(dest_sub_dir)
print(dest_file_path)
decrypt_db_file_v4(key, src_file_path, dest_file_path)
decrypt_tasks.append((key, src_file_path, dest_file_path))
# decrypt_db_file_v4(key, src_file_path, dest_file_path)
with ProcessPoolExecutor(max_workers=16) as executor:
results = list(executor.map(decode_wrapper, decrypt_tasks)) # 使用顶层定义的函数

View File

@ -1139,5 +1139,26 @@
94554984,
0,
94556448
],
"2.4.0.1": [
14907156,
14907512,
14907208,
0,
0
],
"3.7.5.19": [
37895672,
37896480,
37895552,
0,
0
],
"3.9.2.114": [
50370000,
0,
50369856,
0,
0
]
}

View File

@ -6,7 +6,7 @@
@Author : SiYuan
@Email : 863909694@qq.com
@File : wxManager-wx_info_v4.py
@Description :
@Description : 部分思路参考https://github.com/0xlane/wechat-dump-rs
"""
import ctypes
@ -330,7 +330,7 @@ def get_key_inner(pid, process_infos):
keys = []
key_set = set()
for pre_address in pre_addresses:
if any([base_address <= pre_address <= base_address + region_size - KEY_SIZE for base_address, region_size in
if True or any([base_address <= pre_address <= base_address + region_size - KEY_SIZE for base_address, region_size in
process_infos]):
key = read_bytes_from_pid(pid, pre_address, 32)
if key not in key_set:

View File

@ -10,6 +10,7 @@
"""
import concurrent
import os
import re
import traceback
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from datetime import date
@ -19,6 +20,7 @@ import xmltodict
from wxManager import MessageType
from wxManager.db_main import DataBaseInterface
from wxManager.db_v3.audio2text import Audio2TextDB
from wxManager.db_v3.hard_link_file import HardLinkFile
from wxManager.db_v3.hard_link_image import HardLinkImage
from wxManager.db_v3.hard_link_video import HardLinkVideo
@ -188,11 +190,7 @@ class DataBaseV3(DataBaseInterface):
self.open_contact_db = OpenIMContactDB('OpenIMContact.db')
self.open_media_db = OpenIMMediaDB('OpenIMMedia.db')
self.open_msg_db = OpenIMMsgDB('OpenIMMsg.db')
# self.sns_db = Sns()
# self.audio_to_text = Audio2TextDB()
# self.public_msg_db = PublicMsg()
# self.favorite_db = Favorite()
self.audio2text_db = Audio2TextDB('Audio2Text.db')
def init_database(self, db_dir=''):
# print('初始化数据库', db_dir)
@ -211,6 +209,8 @@ class DataBaseV3(DataBaseInterface):
flag &= self.open_contact_db.init_database(db_dir)
flag &= self.open_media_db.init_database(db_dir)
flag &= self.open_msg_db.init_database(db_dir)
flag &= self.audio2text_db.init_database(db_dir)
self.audio2text_db.create() # 初始化数据转文字数据库
return flag
# self.sns_db.init_database(db_dir)
@ -231,9 +231,7 @@ class DataBaseV3(DataBaseInterface):
self.open_contact_db.close()
self.open_media_db.close()
self.open_msg_db.close()
# self.sns_db.close()
# self.audio_to_text.close()
# self.public_msg_db.close()
self.audio2text_db.close()
def get_session(self):
"""
@ -446,8 +444,10 @@ class DataBaseV3(DataBaseInterface):
return self.media_msg_db.get_audio_path(reserved0, output_path, filename)
def get_audio_text(self, msgSvrId):
return ''
return self.media_msg_db.get_audio_text(msgSvrId)
return self.audio2text_db.get_audio_text(msgSvrId)
def add_audio_txt(self, msgSvrId, text):
return self.audio2text_db.add_text(msgSvrId, text)
def update_audio_to_text(self):
messages = self.get_messages_all()
@ -491,10 +491,8 @@ class DataBaseV3(DataBaseInterface):
gender = ''
elif gender_code == 2:
gender = ''
type_ = contact_info_list[2]
wxid = contact_info_list[0]
contact = Contact(
wxid=contact_info_list[0],
remark=remark,
@ -524,10 +522,10 @@ class DataBaseV3(DataBaseInterface):
contact.type |= ContactType.Star
if is_nth_bit_set(type_, 11):
contact.type |= ContactType.Sticky
if type_ == 10086:
contact.type = ContactType.Unknown
contact.is_unknown = True
contact.remark = re.sub(r'[\\/:*?"<>|\s\.\x00-\x08\x0B\x0C\x0E-\x1F]', '_', contact.remark)
return contact
def create_open_im_contact(self, contact_info_list) -> Person:
@ -560,6 +558,7 @@ class DataBaseV3(DataBaseInterface):
)
contact.type = ContactType.Normal
contact.type |= ContactType.OpenIM
contact.remark = re.sub(r'[\\/:*?"<>|\s\.\x00-\x08\x0B\x0C\x0E-\x1F]', '_', contact.remark)
return contact
def get_contacts(self) -> List[Person]:
@ -660,8 +659,6 @@ class DataBaseV3(DataBaseInterface):
return chatroom_name.rstrip('')
# 联系人结束
def add_audio_txt(self, msgSvrId, text):
return self.audio_to_text.add_text(msgSvrId, text)
def get_favorite_items(self, time_range):
return self.favorite_db.get_items(time_range)

View File

@ -10,6 +10,7 @@
"""
import concurrent
import os
import re
from concurrent.futures import ProcessPoolExecutor, as_completed, ThreadPoolExecutor
from datetime import date, datetime
from multiprocessing import Pool, cpu_count
@ -18,6 +19,7 @@ from typing import Tuple, List, Any
import zstandard as zstd
from wxManager import MessageType
from wxManager.db_v4.audio2text import Audio2TextDB
from wxManager.db_v4.biz_message import BizMessageDB
from wxManager.db_v4.emotion import EmotionDB
from wxManager.db_v4.media import MediaDB
@ -82,6 +84,7 @@ class DataBaseV4(DataBaseInterface):
self.media_db = MediaDB('message/media_0.db', is_series=True)
self.hardlink_db = HardLinkDB('hardlink/hardlink.db')
self.emotion_db = EmotionDB('emoticon/emoticon.db')
self.audio2text_db = Audio2TextDB('Audio2Text.db')
def init_database(self, db_dir=''):
Me().load_from_json(os.path.join(db_dir, 'info.json')) # 加载自己的信息
@ -96,6 +99,8 @@ class DataBaseV4(DataBaseInterface):
flag &= self.media_db.init_database(db_dir)
flag &= self.hardlink_db.init_database(db_dir)
flag &= self.emotion_db.init_database(db_dir)
flag &= self.audio2text_db.init_database(db_dir)
self.audio2text_db.create() # 初始化数据转文字数据库
return flag
def close(self):
@ -271,13 +276,16 @@ class DataBaseV4(DataBaseInterface):
def get_audio_path(self, reserved0, output_path, filename=''):
return self.media_db.get_audio_path(reserved0, output_path, filename)
def get_audio_text(self, msgSvrId):
return ''
def get_audio_text(self, server_id):
return self.audio2text_db.get_audio_text(server_id)
def update_audio_to_text(self):
# todo
return
def add_audio_txt(self, server_id, text):
return self.audio2text_db.add_text(server_id, text)
# 语音结束
# 联系人
@ -353,10 +361,10 @@ class DataBaseV4(DataBaseInterface):
contact.type |= ContactType.Star
if is_nth_bit_set(flag, 11):
contact.type |= ContactType.Sticky
if local_type == 10086:
contact.type = ContactType.Unknown
contact.is_unknown = True
contact.remark = re.sub(r'[\\/:*?"<>|\s\.\x00-\x08\x0B\x0C\x0E-\x1F]', '_', contact.remark)
return contact
def get_contacts(self) -> List[Person]:
@ -437,9 +445,6 @@ class DataBaseV4(DataBaseInterface):
# 联系人结束
def add_audio_txt(self, msgSvrId, text):
return self.audio_to_text.add_text(msgSvrId, text)
def get_favorite_items(self, time_range):
return self.favorite_db.get_items(time_range)

View File

@ -19,7 +19,7 @@ def get_create_statements(conn, table_name, object_type):
return [row[0] for row in cursor.fetchall() if row[0]] # 过滤掉 None 值
def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index=-1, exclude_first_column=False):
def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index=-1, exclude_column=''):
"""
将db_path数据库的内容增量写入connect数据库中
@param db_path: 新的数据库路径
@ -28,7 +28,7 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index
@param table_name: 待写入的表名
@param col_name: 根据该列进行判断是否是新增数据
@param col_index: 待写入的列号
@param exclude_first_column: 是否不考虑低一列针对第一列是自增ID的表
@param exclude_column: 是否不考虑某一列针对某一列是自增ID的表
@return:
"""
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
@ -41,26 +41,30 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index
tgt_cur = tgt_conn.cursor()
try:
if not table_exists(tgt_conn, table_name):
if not table_exists(src_conn, table_name):
# 复制表结构
create_table_sql = get_create_statements(src_conn, table_name, "table")
create_table_sql = get_create_statements(tgt_conn, table_name, "table")
if create_table_sql:
tgt_conn.execute(create_table_sql[0]) # 执行 CREATE TABLE 语句
src_conn.execute(create_table_sql[0]) # 执行 CREATE TABLE 语句
print(f"{table_name} 结构已复制")
# 复制索引
create_index_sql_list = get_create_statements(src_conn, table_name, "index")
create_index_sql_list = get_create_statements(tgt_conn, table_name, "index")
for create_index_sql in create_index_sql_list:
tgt_conn.execute(create_index_sql) # 执行 CREATE INDEX 语句
src_conn.execute(create_index_sql) # 执行 CREATE INDEX 语句
print(f"索引已复制: {create_index_sql}")
# 获取列名
src_cursor.execute(f"PRAGMA table_info({table_name})")
columns_info = src_cursor.fetchall()
if columns_info and exclude_first_column:
columns_info = columns_info[1:]
column_names = [info[1] for info in columns_info]
if columns_info and exclude_column:
try:
exclude_col_index = column_names.index(exclude_column)
except ValueError:
print(f"错误: 列 {exclude_column} 在表 {table_name} 中不存在")
return
column_names = column_names[:exclude_col_index]+column_names[exclude_col_index+1:]
num_columns = len(column_names)
if col_index == -1:
try:
col_index = column_names.index(col_name)
@ -69,7 +73,7 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index
return
# 从数据库B中选择主键不在数据库A中的行
query = f"""
SELECT {', '.join([name for name in column_names])}
SELECT {', '.join(column_names)}
FROM {table_name}
"""
tgt_cur.execute(query)
@ -83,7 +87,6 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index
source_rows = {r[0] for r in source_rows}
rows_to_insert = [row for row in target_rows if row[col_index] not in source_rows]
if rows_to_insert:
insert_query = f"""
INSERT INTO {table_name} ({', '.join(column_names)})
@ -93,7 +96,8 @@ def increase_data(db_path, src_cursor, src_conn, table_name, col_name, col_index
src_conn.commit()
print(f"{len(rows_to_insert)} 行已插入到 {table_name} 表中")
else:
print(f"没有需要插入的数据,{table_name} 表已是最新")
pass
# print(f"没有需要插入的数据,{table_name} 表已是最新")
except sqlite3.Error as e:
print(f"{db_path} 数据库操作错误: {e}")
finally:
@ -170,7 +174,8 @@ def increase_update_data(db_path, src_cur, src_conn, table_name, col_name, col_i
src_conn.commit()
print(f"{len(rows_to_insert)} 行已更新到 {table_name} 表中。")
else:
print(f"没有需要插入的数据,{table_name} 表已是最新。")
pass
# print(f"没有需要插入的数据,{table_name} 表已是最新。")
except sqlite3.Error as e:
print(f"{db_path} 数据库操作错误: {e}")
finally:

View File

@ -25,7 +25,7 @@ class DataBaseBase:
def init_database(self, db_dir=''):
self.db_dir = db_dir
db_path = os.path.join(db_dir, self.db_file_name)
if not os.path.exists(db_path):
if not os.path.exists(db_path) and self.db_file_name != 'Audio2Text.db':
return False
db_file_name = self.db_file_name
self.db_file_name = []
@ -44,11 +44,10 @@ class DataBaseBase:
self.cursor.append(cursor)
self.open_flag = True
else:
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
# print('初始化数据库完成:', db_path)
self.self_init()
return True

View File

@ -14,16 +14,16 @@ import xmltodict
def parser_audio(xml_content):
result = {
'audio_length': 0,
'audio_text':''
'audio_text': ''
}
xml_content = xml_content.strip()
try:
xml_dict = xmltodict.parse(xml_content)
voice_length = xml_dict.get('msg', {}).get('voicemsg', {}).get('@voicelength', 0)
audio_text = xml_dict.get('msg',{}).get('voicetrans',{}).get('@transtext','')
audio_text = xml_dict.get('msg', {}).get('voicetrans', {}).get('@transtext', '')
result = {
'audio_length': voice_length,
'audio_text':audio_text
'audio_text': audio_text
}
except:
if xml_content and ':' in xml_content:

View File

@ -0,0 +1,13 @@
syntax = "proto3";
// 202534.0.3img命名方式才有了这个东西
message PackedInfoDataImg2 {
int32 field1 = 1;
int32 field2 = 2;
ImageInfo imageInfo = 3;
}
message ImageInfo {
int32 height = 1;
int32 width = 2;
string filename = 4;
}

View File

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: packed_info_data_img2.proto
"""Generated protocol buffer code."""
from google.protobuf.internal import builder as _builder
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1bpacked_info_data_img2.proto\"S\n\x12PackedInfoDataImg2\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x05\x12\x0e\n\x06\x66ield2\x18\x02 \x01(\x05\x12\x1d\n\timageInfo\x18\x03 \x01(\x0b\x32\n.ImageInfo\"<\n\tImageInfo\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x10\n\x08\x66ilename\x18\x04 \x01(\tb\x06proto3')
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'packed_info_data_img2_pb2', globals())
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
_PACKEDINFODATAIMG2._serialized_start=31
_PACKEDINFODATAIMG2._serialized_end=114
_IMAGEINFO._serialized_start=116
_IMAGEINFO._serialized_end=176
# @@protoc_insertion_point(module_scope)

View File

@ -292,6 +292,8 @@ class AudioMessageFactory(MessageFactory, Singleton):
audio_dic = parser_audio(msg.xml_content)
msg.duration = audio_dic.get('audio_length', 0)
msg.audio_text = audio_dic.get('audio_text', '')
if not msg.audio_text:
msg.audio_text = manager.get_audio_text(msg.server_id)
self.add_message(msg)
return msg
@ -439,6 +441,7 @@ class LinkMessageFactory(MessageFactory, Singleton):
contact = manager.get_contact_by_username(source_username)
msg.app_name = contact.nickname
msg.app_icon = contact.small_head_img_url
msg.app_id = source_username
elif (type_, sub_type) in {(49, 33), (49, 36)}:
# 小程序
msg.type = MessageType.Applet

View File

@ -24,7 +24,8 @@ from wxManager.model.message import VoipMessage, BusinessCardMessage, MergedMess
from wxManager.parser.link_parser import parser_link, parser_voip, parser_applet, parser_business, \
parser_merged_messages, parser_wechat_video, parser_position, parser_reply, parser_transfer, parser_red_envelop, \
parser_file, parser_favorite_note, parser_pat
from wxManager.parser.util.protocbuf import packed_info_data_pb2, packed_info_data_merged_pb2,packed_info_data_img_pb2
from wxManager.parser.util.protocbuf import packed_info_data_pb2, packed_info_data_merged_pb2, packed_info_data_img_pb2, \
packed_info_data_img2_pb2
from .audio_parser import parser_audio
from .emoji_parser import parser_emoji
from .file_parser import parse_video
@ -248,14 +249,27 @@ class ImageMessageFactory(MessageFactory, Singleton):
is_sender, wxid, message_content = self.common_attribute(message, username, manager)
filename = ''
try:
# 2025年3月微信测试版修改了img命名方式才有了这个东西
packed_info_data_proto = packed_info_data_img_pb2.PackedInfoDataImg()
# 2025年3月微信4.0.3正式版修改了img命名方式才有了这个东西
packed_info_data_proto = packed_info_data_img2_pb2.PackedInfoDataImg2()
packed_info_data_proto.ParseFromString(message[14])
# 转换为 JSON 格式
packed_info_data = MessageToDict(packed_info_data_proto)
filename = packed_info_data.get('filename', '').strip().strip('"').strip()
image_info = packed_info_data.get('imageInfo', {})
width = image_info.get('width',0)
height = image_info.get('height',0)
filename = image_info.get('filename', '').strip().strip('"').strip()
except:
pass
if not filename:
try:
# 2025年3月微信测试版修改了img命名方式才有了这个东西
packed_info_data_proto = packed_info_data_img_pb2.PackedInfoDataImg()
packed_info_data_proto.ParseFromString(message[14])
# 转换为 JSON 格式
packed_info_data = MessageToDict(packed_info_data_proto)
filename = packed_info_data.get('filename', '').strip().strip('"').strip()
except:
pass
msg = ImageMessage(
local_id=message[0],
server_id=message[1],
@ -277,8 +291,6 @@ class ImageMessageFactory(MessageFactory, Singleton):
file_name=filename,
file_type='png'
)
# with open(f'{msg.str_time}{msg.server_id}.bin', 'wb') as f:
# f.write(message[14])
path = manager.get_image(content=message_content, bytesExtra=msg, up_dir='',
thumb=False, talker_username=username)
msg.path = path
@ -300,6 +312,8 @@ class AudioMessageFactory(MessageFactory, Singleton):
# 转换为 JSON 格式
packed_info_data = MessageToDict(packed_info_data_proto)
audio_text = packed_info_data.get('info', {}).get('audioTxt', '')
if not audio_text:
audio_text = manager.get_audio_text(message[1])
msg = AudioMessage(
local_id=message[0],
server_id=message[1],
@ -454,6 +468,7 @@ class LinkMessageFactory(MessageFactory, Singleton):
contact = manager.get_contact_by_username(source_username)
msg.app_name = contact.nickname
msg.app_icon = contact.small_head_img_url
msg.app_id = source_username
elif message[2] == MessageType.Applet or message[2] == MessageType.Applet2:
info = parser_applet(message_content)
@ -627,7 +642,8 @@ class MergedMessageFactory(MessageFactory, Singleton):
inner_msg.path = os.path.join('msg', 'attach',
wxid_md5,
month,
'Rec', dir0, 'F', f"{level}{'_' if level else ''}{index}", inner_msg.file_name)
'Rec', dir0, 'F', f"{level}{'_' if level else ''}{index}",
inner_msg.file_name)
else:
inner_msg.path = manager.get_file(inner_msg.md5)
elif inner_msg.type == MessageType.MergedMessages: