mirror of
https://github.com/LC044/WeChatMsg
synced 2025-04-08 05:28:03 +08:00
527 lines
23 KiB
Python
527 lines
23 KiB
Python
import os
|
||
import time
|
||
import traceback
|
||
|
||
from wxManager import Me, MessageType
|
||
from wxManager.decrypt.decrypt_dat import batch_decode_image_multiprocessing
|
||
from wxManager.log import logger
|
||
from wxManager.model import Message
|
||
from exporter.exporter import ExporterBase, copy_files, decode_audios, get_new_filename
|
||
|
||
from PIL import JpegImagePlugin
|
||
from PIL import ImageFile
|
||
|
||
from PIL import Image as PILImage
|
||
|
||
from wxManager.parser.link_parser import wx_sport, wx_collection_data, wx_pay_data
|
||
|
||
JpegImagePlugin._getmp = lambda x: None
|
||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||
|
||
|
||
def add_hyperlink(doc, row, column, hyperlink):
|
||
from openpyxl.styles import Font
|
||
import openpyxl
|
||
from openpyxl.drawing.image import Image
|
||
from openpyxl.utils import get_column_letter
|
||
Image.MAX_IMAGE_PIXELS = None
|
||
cell = doc.cell(row=row, column=column)
|
||
cell.hyperlink = hyperlink
|
||
# 添加样式来改变超链接文本的颜色和下划线
|
||
font = Font(color="0000FF", underline="single") # 蓝色和单下划线
|
||
cell.font = font
|
||
|
||
|
||
def find_image_with_known_extensions(img_path):
|
||
# 常见的图片后缀名
|
||
extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
|
||
directory = os.path.dirname(img_path)
|
||
filename = os.path.basename(img_path)
|
||
|
||
for ext in extensions:
|
||
# 构造完整路径
|
||
full_path = os.path.join(directory, f"{filename}{ext}")
|
||
# 检查文件是否存在
|
||
if os.path.isfile(full_path):
|
||
return full_path
|
||
|
||
return None
|
||
|
||
|
||
class ExcelExporter(ExporterBase):
|
||
row = 2
|
||
|
||
def add_member_info(self, sheet):
|
||
if self.contact.is_chatroom():
|
||
columns = ['wxid', '微信号', '类型', '群昵称', '昵称', '头像地址',
|
||
'头像原图', '标签', '性别', '个性签名', '国家(地区)', '省份', '城市']
|
||
self.group_contacts = self.database.get_chatroom_members(self.contact.wxid)
|
||
# 写入CSV文件
|
||
sheet.append(columns)
|
||
for wxid, contact in self.group_contacts.items():
|
||
sheet.append(
|
||
[
|
||
contact.wxid, contact.alias, contact.flag, contact.remark, contact.nickname,
|
||
contact.small_head_img_url, contact.big_head_img_url, contact.label_name(),
|
||
contact.gender, contact.signature, *contact.region
|
||
]
|
||
)
|
||
else:
|
||
if self.contact.is_public():
|
||
pass
|
||
else:
|
||
columns = (
|
||
'wxid', '微信号', '类型', '群昵称', '昵称', '头像地址', '头像原图', '标签', '性别', '电话',
|
||
'个性签名', '国家(地区)', '省份', '城市')
|
||
# 写入CSV文件
|
||
sheet.append(columns)
|
||
contact = self.contact
|
||
sheet.append(
|
||
[
|
||
contact.wxid, contact.alias, contact.flag, contact.remark, contact.nickname,
|
||
contact.small_head_img_url, contact.big_head_img_url, contact.label_name(),
|
||
contact.gender, contact.signature, *contact.region
|
||
]
|
||
)
|
||
|
||
def message_to_list(self, message: Message):
|
||
remark = message.display_name
|
||
nickname = message.display_name
|
||
if self.contact.is_chatroom():
|
||
contact = self.group_contacts.get(message.sender_id)
|
||
if contact:
|
||
remark = contact.remark
|
||
nickname = contact.nickname
|
||
else:
|
||
contact = Me() if message.is_sender else self.contact
|
||
remark = contact.remark
|
||
nickname = contact.nickname
|
||
res = [str(message.server_id), message.type_name(), message.display_name, message.str_time, message.to_text(),
|
||
remark, nickname, 'more']
|
||
return res
|
||
|
||
def to_excel(self):
|
||
from openpyxl.styles import Font
|
||
import openpyxl
|
||
from openpyxl.drawing.image import Image
|
||
from openpyxl.utils import get_column_letter
|
||
Image.MAX_IMAGE_PIXELS = None
|
||
print(f"【开始导出 XLSX {self.contact.remark}】")
|
||
os.makedirs(self.origin_path, exist_ok=True)
|
||
filename = os.path.join(self.origin_path, f"{self.contact.remark}.xlsx")
|
||
filename = get_new_filename(filename)
|
||
columns = ['消息ID', '类型', '发送人', '时间', '内容', '备注', '昵称', '更多信息']
|
||
messages = self.database.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
new_workbook = openpyxl.Workbook()
|
||
new_sheet = new_workbook.create_sheet("聊天记录", 0)
|
||
member_sheet = new_workbook.create_sheet("成员信息", 1)
|
||
self.add_member_info(member_sheet)
|
||
new_sheet.append(columns)
|
||
num = 1
|
||
total_num = len(messages)
|
||
image_tasks = []
|
||
video_tasks = []
|
||
file_tasks = []
|
||
audio_tasks = []
|
||
image_dir = os.path.join(self.origin_path, 'image')
|
||
video_dir = os.path.join(self.origin_path, 'video')
|
||
audio_dir = os.path.join(self.origin_path, 'voice')
|
||
file_dir = os.path.join(self.origin_path, 'file')
|
||
image_index = {}
|
||
|
||
def parser_merged(merged_message):
|
||
for msg in merged_message.messages:
|
||
type_ = msg.type
|
||
if type_ == MessageType.Image:
|
||
msg.set_file_name()
|
||
image_tasks.append(
|
||
(
|
||
os.path.join(Me().wx_dir, msg.path),
|
||
os.path.join(image_dir, msg.str_time[:7]),
|
||
msg.file_name
|
||
)
|
||
)
|
||
image_tasks.append(
|
||
(
|
||
os.path.join(Me().wx_dir, msg.thumb_path),
|
||
os.path.join(image_dir, msg.str_time[:7]),
|
||
msg.file_name + '_t'
|
||
)
|
||
)
|
||
msg.path = f"./image/{msg.str_time[:7]}/{msg.file_name}"
|
||
msg.thumb_path = f"./image/{msg.str_time[:7]}/{msg.file_name + '_t'}"
|
||
elif type_ == MessageType.File:
|
||
origin_file_path = os.path.join(Me().wx_dir, msg.path)
|
||
file_tasks.append(
|
||
(
|
||
origin_file_path,
|
||
os.path.join(file_dir, msg.str_time[:7]),
|
||
''
|
||
)
|
||
)
|
||
msg.path = f'./file/{msg.str_time[:7]}/{os.path.basename(origin_file_path)}'
|
||
elif type_ == MessageType.Video:
|
||
msg.set_file_name()
|
||
video_tasks.append(
|
||
(
|
||
os.path.join(Me().wx_dir, msg.path),
|
||
os.path.join(video_dir, msg.str_time[:7]),
|
||
msg.file_name
|
||
)
|
||
)
|
||
ext = os.path.basename(msg.path).split('.')[-1]
|
||
msg.path = f'./video/{msg.str_time[:7]}/{msg.file_name}.{ext}'
|
||
elif type_ == MessageType.MergedMessages:
|
||
parser_merged(msg)
|
||
|
||
for index, message in enumerate(messages):
|
||
if not self._is_running:
|
||
break
|
||
if index % 1000 == 0:
|
||
self.update_progress_callback(index / total_num)
|
||
if not self.is_selected(message):
|
||
continue
|
||
try:
|
||
new_sheet.append(self.message_to_list(message))
|
||
self.row += 1
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
continue
|
||
type_ = message.type
|
||
timestamp = message.timestamp
|
||
msgSvrId = message.server_id
|
||
if type_ == MessageType.Image:
|
||
message.set_file_name()
|
||
image_index[message.server_id] = self.row
|
||
image_tasks.append(
|
||
(
|
||
os.path.join(Me().wx_dir, message.path),
|
||
os.path.join(image_dir, message.str_time[:7]),
|
||
message.file_name
|
||
)
|
||
)
|
||
image_tasks.append(
|
||
(
|
||
os.path.join(Me().wx_dir, message.thumb_path),
|
||
os.path.join(image_dir, message.str_time[:7]),
|
||
message.file_name + '_t'
|
||
)
|
||
)
|
||
message.path = f"./image/{message.str_time[:7]}/{message.file_name}"
|
||
message.thumb_path = f"./image/{message.str_time[:7]}/{message.file_name + '_t'}"
|
||
elif type_ == MessageType.File:
|
||
origin_file_path = os.path.join(Me().wx_dir, message.path)
|
||
file_tasks.append(
|
||
(
|
||
origin_file_path,
|
||
os.path.join(file_dir, message.str_time[:7]),
|
||
''
|
||
)
|
||
)
|
||
if os.path.isfile(origin_file_path):
|
||
message.path = f'./file/{message.str_time[:7]}/{os.path.basename(origin_file_path)}'
|
||
add_hyperlink(new_sheet, self.row, 5, message.path)
|
||
elif type_ == MessageType.Video:
|
||
message.set_file_name()
|
||
video_tasks.append(
|
||
(
|
||
os.path.join(Me().wx_dir, message.path),
|
||
os.path.join(video_dir, message.str_time[:7]),
|
||
message.file_name
|
||
)
|
||
)
|
||
ext = os.path.basename(message.path).split('.')[-1]
|
||
message.path = f'./video/{message.str_time[:7]}/{message.file_name}.{ext}'
|
||
add_hyperlink(new_sheet, self.row, 5, message.path)
|
||
elif type_ == MessageType.Audio:
|
||
message.set_file_name()
|
||
audio_tasks.append(
|
||
(
|
||
self.database.get_media_buffer(message.server_id),
|
||
os.path.join(audio_dir, message.str_time[:7]),
|
||
message.file_name
|
||
)
|
||
)
|
||
message.path = f'./voice/{message.str_time[:7]}/{message.file_name + ".mp3"}'
|
||
add_hyperlink(new_sheet, self.row, 5, message.path)
|
||
elif type_ == MessageType.MergedMessages:
|
||
parser_merged(message)
|
||
# 使用多进程,导出所有图片
|
||
batch_decode_image_multiprocessing(Me().xor_key, image_tasks)
|
||
|
||
# 使用多线程,复制文件、视频到导出文件夹
|
||
copy_files(video_tasks + file_tasks)
|
||
|
||
decode_audios(audio_tasks)
|
||
if MessageType.Image in self.message_types:
|
||
for index, message in enumerate(messages):
|
||
if message.type == MessageType.Image:
|
||
row = image_index[message.server_id]
|
||
img_path = find_image_with_known_extensions(os.path.join(self.origin_path, message.path))
|
||
if not img_path:
|
||
img_path = find_image_with_known_extensions(os.path.join(self.origin_path, message.thumb_path))
|
||
if not img_path:
|
||
continue
|
||
try:
|
||
# 打开图片以获取其尺寸
|
||
with PILImage.open(img_path) as img:
|
||
width, height = img.size
|
||
max_height = 500
|
||
# 计算缩放比例
|
||
scale = min(1.0, max_height / height)
|
||
|
||
# 缩放后的图片尺寸
|
||
scaled_width = int(width * scale)
|
||
scaled_height = int(height * scale)
|
||
|
||
# 插入图片
|
||
img = Image(img_path)
|
||
img.width = scaled_width
|
||
img.height = scaled_height
|
||
|
||
# 计算单元格的坐标
|
||
cell = f"{get_column_letter(5)}{row}"
|
||
|
||
# 将图片添加到工作表
|
||
new_sheet.add_image(img, cell)
|
||
|
||
# 设置行高
|
||
new_sheet.row_dimensions[row].height = scaled_height * 0.75 # 0.75 是像素到 Excel 单位的转换因子
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
pass
|
||
# 获取列的字母表示(A、B、C...)
|
||
col_letter = get_column_letter(1)
|
||
# 设置整列的单元格格式为文本
|
||
for cell in new_sheet[col_letter]:
|
||
cell.number_format = "@" # "@" 表示文本格式
|
||
try:
|
||
new_workbook.save(filename)
|
||
except PermissionError:
|
||
filename = '.'.join(filename.split('.')[:-1]) + str(int(time.time())) + '.xlsx'
|
||
new_workbook.save(filename)
|
||
self.update_progress_callback(1)
|
||
self.finish_callback(self.exporter_id)
|
||
print(f"【完成导出 XLSX {self.contact.remark}】")
|
||
|
||
def public_to_excel(self):
|
||
from openpyxl.styles import Font
|
||
import openpyxl
|
||
from openpyxl.drawing.image import Image
|
||
from openpyxl.utils import get_column_letter
|
||
Image.MAX_IMAGE_PIXELS = None
|
||
|
||
print(f"【开始导出 XLSX {self.contact.remark}】")
|
||
os.makedirs(self.origin_path, exist_ok=True)
|
||
filename = os.path.join(self.origin_path, f"{self.contact.remark}.xlsx")
|
||
filename = get_new_filename(filename)
|
||
columns = ['日期', '时间', '标题', '描述', '链接', '更多信息']
|
||
messages = self.database.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
new_workbook = openpyxl.Workbook()
|
||
new_sheet = new_workbook.create_sheet("聊天记录", 0)
|
||
new_sheet.append(columns)
|
||
total_num = len(messages)
|
||
for index, message in enumerate(messages):
|
||
if not self._is_running:
|
||
break
|
||
if index % 1000 == 0:
|
||
self.update_progress_callback(index / total_num)
|
||
if not message.type in {MessageType.LinkMessage}:
|
||
continue
|
||
try:
|
||
new_sheet.append([*message.str_time.split(' '), message.title, message.description, message.href])
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
continue
|
||
# 获取列的字母表示(A、B、C...)
|
||
col_letter = get_column_letter(1)
|
||
# 设置整列的单元格格式为文本
|
||
for cell in new_sheet[col_letter]:
|
||
cell.number_format = "@" # "@" 表示文本格式
|
||
try:
|
||
new_workbook.save(filename)
|
||
except PermissionError:
|
||
filename = '.'.join(filename.split('.')[:-1]) + str(int(time.time())) + '.xlsx'
|
||
new_workbook.save(filename)
|
||
self.update_progress_callback(1)
|
||
self.finish_callback(self.exporter_id)
|
||
print(f"【完成导出 XLSX {self.contact.remark}】")
|
||
|
||
def wx_pay(self):
|
||
from openpyxl.styles import Font
|
||
import openpyxl
|
||
from openpyxl.drawing.image import Image
|
||
from openpyxl.utils import get_column_letter
|
||
Image.MAX_IMAGE_PIXELS = None
|
||
print(f"【开始导出 XLSX {self.contact.remark}】")
|
||
os.makedirs(self.origin_path, exist_ok=True)
|
||
filename = os.path.join(self.origin_path, f"{self.contact.remark}.xlsx")
|
||
filename = get_new_filename(filename)
|
||
columns = ['类型', '收款单位', '日期', '时间', '金额', '付款方式', '收单机构', '更多信息']
|
||
messages = self.database.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
new_workbook = openpyxl.Workbook()
|
||
new_sheet = new_workbook.create_sheet("聊天记录", 0)
|
||
new_sheet.append(columns)
|
||
total_num = len(messages)
|
||
for index, message in enumerate(messages):
|
||
if not self._is_running:
|
||
break
|
||
if index % 1000 == 0:
|
||
self.update_progress_callback(index / total_num)
|
||
if not message.type in {MessageType.LinkMessage}:
|
||
continue
|
||
try:
|
||
card_data = wx_pay_data(message.xml_content)
|
||
date, str_time = message.str_time.split(' ')
|
||
if card_data.get('title') in {'记账日报', '「先享后付」服务使用通知', '转入零钱通,五一享收益',
|
||
'转入零钱通,端午享收益', '智能手表支付服务已启用', '优惠券领取提醒',
|
||
'清明假期收益规则', '「先享后付」服务完成通知', '礼包领取提醒',
|
||
'五一假期收益规则提醒', '端午节假期收益规则', '中秋节假期收益规则',
|
||
'元旦假期收益规则', '春节假期收益规则', '五一假期收益规则',
|
||
'中秋及国庆假期收益规则', '春节赚收益攻略', '「先享后付」服务取消通知',
|
||
'揭开骗局,远离诈骗'}:
|
||
continue
|
||
new_sheet.append(
|
||
[
|
||
card_data.get('title'), card_data.get('display_name'), date, str_time,
|
||
card_data.get('money'), card_data.get('payment_type'), card_data.get('acquiring_institution'),
|
||
card_data.get('more')
|
||
]
|
||
)
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
continue
|
||
# 获取列的字母表示(A、B、C...)
|
||
col_letter = get_column_letter(1)
|
||
# 设置整列的单元格格式为文本
|
||
for cell in new_sheet[col_letter]:
|
||
cell.number_format = "@" # "@" 表示文本格式
|
||
try:
|
||
new_workbook.save(filename)
|
||
except PermissionError:
|
||
filename = '.'.join(filename.split('.')[:-1]) + str(int(time.time())) + '.xlsx'
|
||
new_workbook.save(filename)
|
||
self.update_progress_callback(1)
|
||
self.finish_callback(self.exporter_id)
|
||
print(f"【完成导出 XLSX {self.contact.remark}】")
|
||
|
||
def wx_collect(self):
|
||
from openpyxl.styles import Font
|
||
import openpyxl
|
||
from openpyxl.drawing.image import Image
|
||
from openpyxl.utils import get_column_letter
|
||
Image.MAX_IMAGE_PIXELS = None
|
||
|
||
print(f"【开始导出 XLSX {self.contact.remark}】")
|
||
os.makedirs(self.origin_path, exist_ok=True)
|
||
filename = os.path.join(self.origin_path, f"{self.contact.remark}.xlsx")
|
||
filename = get_new_filename(filename)
|
||
columns = ['类型', '日期', '时间', '金额', '详细信息', '汇总', '备注', '更多信息']
|
||
messages = self.database.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
new_workbook = openpyxl.Workbook()
|
||
new_sheet = new_workbook.create_sheet("聊天记录", 0)
|
||
new_sheet.append(columns)
|
||
total_num = len(messages)
|
||
for index, message in enumerate(messages):
|
||
if not self._is_running:
|
||
break
|
||
if index % 1000 == 0:
|
||
self.update_progress_callback(index / total_num)
|
||
if not message.type in {MessageType.LinkMessage}:
|
||
continue
|
||
try:
|
||
card_data = wx_collection_data(message.xml_content)
|
||
date, str_time = message.str_time.split(' ')
|
||
new_sheet.append(
|
||
[
|
||
card_data.get('title'), date, str_time, card_data.get('money'), card_data.get('display_name'),
|
||
card_data.get('summary'), card_data.get('more')
|
||
]
|
||
)
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
continue
|
||
# 获取列的字母表示(A、B、C...)
|
||
col_letter = get_column_letter(1)
|
||
# 设置整列的单元格格式为文本
|
||
for cell in new_sheet[col_letter]:
|
||
cell.number_format = "@" # "@" 表示文本格式
|
||
try:
|
||
new_workbook.save(filename)
|
||
except PermissionError:
|
||
filename = '.'.join(filename.split('.')[:-1]) + str(int(time.time())) + '.xlsx'
|
||
new_workbook.save(filename)
|
||
self.update_progress_callback(1)
|
||
self.finish_callback(self.exporter_id)
|
||
print(f"【完成导出 XLSX {self.contact.remark}】")
|
||
|
||
def wx_sport(self):
|
||
from openpyxl.styles import Font
|
||
import openpyxl
|
||
from openpyxl.drawing.image import Image
|
||
from openpyxl.utils import get_column_letter
|
||
Image.MAX_IMAGE_PIXELS = None
|
||
|
||
|
||
print(f"【开始导出 XLSX {self.contact.remark}】")
|
||
os.makedirs(self.origin_path, exist_ok=True)
|
||
filename = os.path.join(self.origin_path, f"{self.contact.remark}.xlsx")
|
||
filename = get_new_filename(filename)
|
||
columns = ['日期', '排名', '步数', '当日冠军', '当日冠军步数', '更多信息']
|
||
messages = self.database.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
new_workbook = openpyxl.Workbook()
|
||
new_sheet = new_workbook.create_sheet("聊天记录", 0)
|
||
new_sheet.append(columns)
|
||
total_num = len(messages)
|
||
for index, message in enumerate(messages):
|
||
if not self._is_running:
|
||
break
|
||
if index and index % 1000 == 0:
|
||
self.update_progress_callback(index / total_num)
|
||
if not message.type in {MessageType.LinkMessage}:
|
||
continue
|
||
try:
|
||
card_data = wx_sport(message.xml_content)
|
||
champion_name = ''
|
||
if not card_data.get('rank_list'):
|
||
champion = {}
|
||
else:
|
||
champion = card_data.get('rank_list')[0]
|
||
contact = self.database.get_contact_by_username(champion.get('username'))
|
||
champion_name = contact.remark
|
||
new_sheet.append(
|
||
[
|
||
message.str_time.split(' ')[0], card_data.get('rank'), card_data.get('score'),
|
||
champion_name, champion.get('score')
|
||
]
|
||
)
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
continue
|
||
# 获取列的字母表示(A、B、C...)
|
||
col_letter = get_column_letter(1)
|
||
# 设置整列的单元格格式为文本
|
||
for cell in new_sheet[col_letter]:
|
||
cell.number_format = "@" # "@" 表示文本格式
|
||
try:
|
||
new_workbook.save(filename)
|
||
except PermissionError:
|
||
filename = '.'.join(filename.split('.')[:-1]) + str(int(time.time())) + '.xlsx'
|
||
new_workbook.save(filename)
|
||
self.update_progress_callback(1)
|
||
self.finish_callback(self.exporter_id)
|
||
print(f"【完成导出 XLSX {self.contact.remark}】")
|
||
|
||
def run(self):
|
||
if self.contact.is_public():
|
||
if self.contact.wxid == 'gh_3dfda90e39d6':
|
||
self.wx_pay()
|
||
elif self.contact.wxid == 'gh_f0a92aa7146c':
|
||
self.wx_collect()
|
||
elif self.contact.wxid == 'gh_43f2581f6fd6':
|
||
self.wx_sport()
|
||
else:
|
||
self.public_to_excel()
|
||
else:
|
||
self.to_excel()
|