mirror of
https://github.com/LC044/WeChatMsg
synced 2025-04-08 05:28:03 +08:00
适配微信4.0.3正式版,修复表情包和合并转发的聊天记录解析失败的问题
This commit is contained in:
parent
e4a39135c2
commit
d95fc1972c
@ -188,8 +188,6 @@ class ExcelExporter(ExporterBase):
|
||||
logger.error(traceback.format_exc())
|
||||
continue
|
||||
type_ = message.type
|
||||
timestamp = message.timestamp
|
||||
msgSvrId = message.server_id
|
||||
if type_ == MessageType.Image:
|
||||
message.set_file_name()
|
||||
image_index[message.server_id] = self.row
|
||||
@ -256,6 +254,8 @@ class ExcelExporter(ExporterBase):
|
||||
if MessageType.Image in self.message_types:
|
||||
for index, message in enumerate(messages):
|
||||
if message.type == MessageType.Image:
|
||||
if not self.is_selected(message):
|
||||
continue
|
||||
row = image_index[message.server_id]
|
||||
img_path = find_image_with_known_extensions(os.path.join(self.origin_path, message.path))
|
||||
if not img_path:
|
||||
|
@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, date
|
||||
from typing import Tuple
|
||||
|
||||
from wxManager import MessageType
|
||||
from wxManager.merge import increase_data, increase_update_data
|
||||
from wxManager.log import logger
|
||||
from wxManager.model import DataBaseBase
|
||||
@ -61,6 +62,45 @@ def convert_to_timestamp(time_range) -> Tuple[int, int]:
|
||||
return convert_to_timestamp_(time_range[0]), convert_to_timestamp_(time_range[1])
|
||||
|
||||
|
||||
def get_local_type(type_: MessageType):
|
||||
type_name_dict = {
|
||||
MessageType.Text: (1, 0),
|
||||
MessageType.Image: (3, 0),
|
||||
MessageType.Audio: (34, 0),
|
||||
MessageType.Video: (43, 0),
|
||||
MessageType.Emoji: (47, 0),
|
||||
MessageType.BusinessCard: (42, 0),
|
||||
MessageType.OpenIMBCard: (66, 0),
|
||||
MessageType.Position: (48, 0),
|
||||
MessageType.FavNote: (49, 40),
|
||||
MessageType.FavNote: (49, 24),
|
||||
(49, 53): "接龙",
|
||||
MessageType.File: (49, 0),
|
||||
MessageType.Text2: (49, 1),
|
||||
MessageType.Music: (49, 3),
|
||||
MessageType.Music: (49, 76),
|
||||
MessageType.LinkMessage: (49, 5),
|
||||
MessageType.File: (49, 6),
|
||||
(49, 8): "用户上传的GIF表情",
|
||||
MessageType.System: (49, 17), # 发起了位置共享
|
||||
MessageType.MergedMessages: (49, 19),
|
||||
MessageType.Applet: (49, 33),
|
||||
MessageType.Applet2: (49, 36),
|
||||
MessageType.WeChatVideo: (49, 51),
|
||||
(49, 57): MessageType.Quote,
|
||||
(49, 63): "视频号直播或直播回放等",
|
||||
(49, 87): "群公告",
|
||||
(49, 88): "视频号直播或直播回放等",
|
||||
(49, 2000): MessageType.Transfer,
|
||||
(49, 2003): "赠送红包封面",
|
||||
(50, 0): MessageType.Voip,
|
||||
(10000, 0): MessageType.System,
|
||||
(10000, 4): MessageType.Pat,
|
||||
(10000, 8000): MessageType.System
|
||||
}
|
||||
return type_name_dict.get(type_, (0, 0))
|
||||
|
||||
|
||||
class OpenIMMsgDB(DataBaseBase):
|
||||
|
||||
def _get_messages_by_num(self, cursor, username_, start_sort_seq, msg_num):
|
||||
@ -134,6 +174,29 @@ class OpenIMMsgDB(DataBaseBase):
|
||||
|
||||
return None
|
||||
|
||||
def _get_messages_by_type(self, cursor, username: str, type_: MessageType,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
|
||||
if time_range:
|
||||
start_time, end_time = convert_to_timestamp(time_range)
|
||||
local_type, sub_type = get_local_type(type_)
|
||||
sql = f'''
|
||||
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
|
||||
from MSG
|
||||
where StrTalker=? and Type=? and SubType = ?
|
||||
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
|
||||
order by CreateTime
|
||||
'''
|
||||
cursor.execute(sql, [username, local_type, sub_type])
|
||||
result = cursor.fetchall()
|
||||
if result:
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_messages_by_type(self, username: str, type_: MessageType,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
|
||||
return self.get_messages_by_type(self.DB.cursor, username, type_, time_range)
|
||||
|
||||
def merge(self, db_path):
|
||||
if not (os.path.exists(db_path) or os.path.isfile(db_path)):
|
||||
print(f'{db_path} 不存在')
|
||||
@ -143,4 +206,4 @@ class OpenIMMsgDB(DataBaseBase):
|
||||
increase_data(db_path, self.cursor, self.DB, 'ChatCRMsg', 'MsgSvrID', 1, exclude_column='localId')
|
||||
except:
|
||||
print(f"数据库操作错误: {traceback.format_exc()}")
|
||||
self.DB.rollback()
|
||||
self.DB.rollback()
|
||||
|
@ -8,79 +8,36 @@ from datetime import date
|
||||
from typing import Tuple
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from wxManager import MessageType
|
||||
from wxManager.merge import increase_data
|
||||
from wxManager.db_v3.msg import convert_to_timestamp
|
||||
from wxManager.db_v3.msg import convert_to_timestamp,get_local_type
|
||||
from wxManager.model import DataBaseBase
|
||||
|
||||
|
||||
class PublicMsg(DataBaseBase):
|
||||
|
||||
def get_messages(
|
||||
self,
|
||||
username_: str,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
||||
):
|
||||
"""
|
||||
return list
|
||||
a[0]: localId,
|
||||
a[1]: talkerId, (和strtalker对应的,不是群聊信息发送人)
|
||||
a[2]: type,
|
||||
a[3]: subType,
|
||||
a[4]: is_sender,
|
||||
a[5]: timestamp,
|
||||
a[6]: status, (没啥用)
|
||||
a[7]: str_content,
|
||||
a[8]: str_time, (格式化的时间)
|
||||
a[9]: msgSvrId,
|
||||
a[10]: BytesExtra,
|
||||
a[11]: CompressContent,
|
||||
a[12]: DisplayContent,
|
||||
a[13]: 联系人的类(如果是群聊就有,不是的话没有这个字段)
|
||||
"""
|
||||
if not self.open_flag:
|
||||
return []
|
||||
def _get_messages_by_type(self, cursor, username: str, type_: MessageType,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
|
||||
if time_range:
|
||||
start_time, end_time = convert_to_timestamp(time_range)
|
||||
local_type, sub_type = get_local_type(type_)
|
||||
sql = f'''
|
||||
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
|
||||
from PublicMsg
|
||||
where StrTalker=?
|
||||
from MSG
|
||||
where StrTalker=? and Type=? and SubType = ?
|
||||
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
|
||||
order by CreateTime
|
||||
'''
|
||||
try:
|
||||
lock.acquire(True)
|
||||
self.cursor.execute(sql, [username_])
|
||||
result = self.cursor.fetchall()
|
||||
finally:
|
||||
lock.release()
|
||||
return result
|
||||
cursor.execute(sql, [username, local_type, sub_type])
|
||||
result = cursor.fetchall()
|
||||
if result:
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_messages_by_type(
|
||||
self,
|
||||
username_: str,
|
||||
type_,
|
||||
sub_type=None,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
||||
):
|
||||
if not self.open_flag:
|
||||
return []
|
||||
if time_range:
|
||||
start_time, end_time = convert_to_timestamp(time_range)
|
||||
sql = f'''
|
||||
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
|
||||
from PublicMsg
|
||||
where StrTalker=? AND Type=? {'AND SubType=' + str(sub_type) if sub_type else ''}
|
||||
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
|
||||
order by CreateTime
|
||||
'''
|
||||
try:
|
||||
lock.acquire(True)
|
||||
self.cursor.execute(sql, [username_, type_])
|
||||
result = self.cursor.fetchall()
|
||||
finally:
|
||||
lock.release()
|
||||
return result
|
||||
def get_messages_by_type(self, username: str, type_: MessageType,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
|
||||
return self.get_messages_by_type(self.DB.cursor, username, type_, time_range)
|
||||
|
||||
def get_sport_score_by_name(self, username,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
|
||||
@ -126,7 +83,7 @@ class PublicMsg(DataBaseBase):
|
||||
|
||||
def get_messages_by_username(self, username: str,
|
||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None, ):
|
||||
return self._get_messages_by_username(self.DB.cursor(),username,time_range)
|
||||
return self._get_messages_by_username(self.DB.cursor(), username, time_range)
|
||||
|
||||
def get_message_by_server_id(self, username, server_id):
|
||||
"""
|
||||
|
@ -26,7 +26,7 @@ def parser_emoji(xml_content):
|
||||
'height': 0,
|
||||
'desc': ''
|
||||
}
|
||||
xml_content = xml_content.strip()
|
||||
xml_content = xml_content.strip().replace('&', '&')
|
||||
try:
|
||||
xml_dict = xmltodict.parse(xml_content)
|
||||
emoji_dic = xml_dict.get('msg', {}).get('emoji', {})
|
||||
|
@ -9,6 +9,7 @@
|
||||
@Description :
|
||||
"""
|
||||
import html
|
||||
import re
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
import xml.etree.ElementTree as ET
|
||||
@ -206,13 +207,27 @@ def parser_business(xml_content):
|
||||
return result
|
||||
|
||||
|
||||
def replace_entity(match):
|
||||
# 获取匹配的数字
|
||||
return ''
|
||||
|
||||
|
||||
def process_xml(xml_string):
|
||||
# 使用正则表达式替换所有十进制转义字符
|
||||
processed_xml = re.sub(r'&#(\d+);', replace_entity, xml_string)
|
||||
return processed_xml
|
||||
|
||||
|
||||
def parser_record_item(recorditem, output_dir, wxid, msg_time, level=0):
|
||||
xml_string = recorditem
|
||||
if isinstance(xml_string, dict):
|
||||
recorditem_dic = xml_string
|
||||
else:
|
||||
recorditem_dic = xmltodict.parse(xml_string)
|
||||
|
||||
try:
|
||||
recorditem_dic = xmltodict.parse(xml_string)
|
||||
except:
|
||||
xml_string = process_xml(xml_string)
|
||||
recorditem_dic = xmltodict.parse(xml_string)
|
||||
# logger.error(recorditem_dic)
|
||||
datalist = recorditem_dic.get('recordinfo', {}).get('datalist', {})
|
||||
count = datalist.get('@count', 0)
|
||||
@ -522,7 +537,7 @@ def parser_record_item(recorditem, output_dir, wxid, msg_time, level=0):
|
||||
return result
|
||||
|
||||
|
||||
def parser_merged_messages(xml, output_dir, wxid, msg_time, level=0):
|
||||
def parser_merged_messages(xml: str, output_dir, wxid, msg_time, level=0):
|
||||
try:
|
||||
try:
|
||||
data_dic = xmltodict.parse(xml).get('msg', {})
|
||||
@ -543,8 +558,8 @@ def parser_merged_messages(xml, output_dir, wxid, msg_time, level=0):
|
||||
}
|
||||
except:
|
||||
logger.error(xml)
|
||||
logger.error(new_xml1)
|
||||
logger.error(new_xml2)
|
||||
# logger.error(new_xml1)
|
||||
# logger.error(new_xml2)
|
||||
logger.error(traceback.format_exc())
|
||||
# raise ValueError('合并转发的消息解析失败')
|
||||
return {
|
||||
|
@ -3,9 +3,11 @@ syntax = "proto3";
|
||||
message PackedInfoDataImg2 {
|
||||
int32 field1 = 1;
|
||||
int32 field2 = 2;
|
||||
ImageInfo imageInfo = 3;
|
||||
VideoInfo videoInfo = 4;
|
||||
FileInfo fileInfo = 7;
|
||||
ImageInfo imageInfo = 3; // 图片
|
||||
VideoInfo videoInfo = 4; // 视频
|
||||
AudioInfo audioInfo = 5; // 语音
|
||||
FileInfo fileInfo = 7; // 文件
|
||||
MergeInfo mergeInfo = 9; // 合并转发的聊天记录
|
||||
}
|
||||
|
||||
message ImageInfo {
|
||||
@ -35,4 +37,13 @@ message FileSubMessage2 {
|
||||
string field1 = 1;
|
||||
string field2 = 2;
|
||||
string field3 = 3;
|
||||
}
|
||||
|
||||
message MergeInfo {
|
||||
string dir = 1;
|
||||
}
|
||||
|
||||
message AudioInfo {
|
||||
uint32 field1 = 1;
|
||||
string audioTxt = 2; // 语音转文字结果
|
||||
}
|
@ -190,7 +190,7 @@ class Singleton:
|
||||
self.contacts[wxid] = manager.get_contact_by_username(wxid)
|
||||
if isinstance(message[12], bytes):
|
||||
message_content = decompress(message[12])
|
||||
message_content = message_content.replace('', '').replace(' ', ' ')
|
||||
message_content = message_content.replace('&#x01;', '').replace(' ', ' ')
|
||||
# logger.error(message_content)
|
||||
else:
|
||||
message_content = message[12]
|
||||
@ -198,7 +198,9 @@ class Singleton:
|
||||
2] != MessageType.Pat:
|
||||
# 群聊文字消息格式:<wxid>:<content>
|
||||
message_content = ':'.join(message_content.split(':')[1:]).strip()
|
||||
|
||||
if message_content and message_content.startswith(username):
|
||||
# md 微信不知道在搞什么,弄一些乱七八糟的东西 4.0.3.22
|
||||
message_content = message_content.strip(f'{username}:').replace('<?xml version="1.0"?>', '')
|
||||
return is_sender, wxid, message_content
|
||||
|
||||
|
||||
@ -876,7 +878,7 @@ class FileMessageFactory(MessageFactory, Singleton):
|
||||
is_sender, wxid, message_content = self.common_attribute(message, username, manager)
|
||||
info = parser_file(message_content)
|
||||
md5 = info.get('md5', '')
|
||||
filename = info.get('filename','')
|
||||
filename = info.get('filename', '')
|
||||
if not filename:
|
||||
try:
|
||||
# 2025年3月微信4.0.3正式版修改了img命名方式才有了这个东西
|
||||
|
Loading…
Reference in New Issue
Block a user