mirror of
https://github.com/LC044/WeChatMsg
synced 2025-04-17 00:48:23 +08:00
650 lines
26 KiB
Python
650 lines
26 KiB
Python
|
import csv
|
|||
|
import html
|
|||
|
import io
|
|||
|
import os
|
|||
|
import re
|
|||
|
import shutil
|
|||
|
import subprocess
|
|||
|
import sys
|
|||
|
import time
|
|||
|
import traceback
|
|||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||
|
from typing import List, Tuple
|
|||
|
|
|||
|
import pysilk
|
|||
|
|
|||
|
from wxManager import MessageType, DataBaseInterface
|
|||
|
from wxManager.model import Contact, Me, Message
|
|||
|
|
|||
|
from wxManager.log import logger
|
|||
|
from exporter.config import FileType
|
|||
|
|
|||
|
|
|||
|
def makedirs(path):
|
|||
|
if not os.path.exists(path):
|
|||
|
os.makedirs(path, exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'image'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'emoji'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'video'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'voice'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'file'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'avatar'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'music'), exist_ok=True)
|
|||
|
os.makedirs(os.path.join(path, 'icon'), exist_ok=True)
|
|||
|
|
|||
|
|
|||
|
def escape_js_and_html(input_str):
|
|||
|
if not input_str:
|
|||
|
return ''
|
|||
|
# 转义HTML特殊字符
|
|||
|
html_escaped = html.escape(input_str, quote=False)
|
|||
|
|
|||
|
# 手动处理JavaScript转义字符
|
|||
|
js_escaped = (
|
|||
|
html_escaped
|
|||
|
.replace('\\r\\n', '<br>')
|
|||
|
.replace('\\n', '<br>')
|
|||
|
.replace('\\t', ' ')
|
|||
|
.replace("\\", "\\\\")
|
|||
|
.replace("'", r"\'")
|
|||
|
.replace('"', r'\"')
|
|||
|
.replace("\n", r'\n')
|
|||
|
.replace("\r", r'\r')
|
|||
|
.replace("\t", r'\t')
|
|||
|
)
|
|||
|
|
|||
|
return js_escaped
|
|||
|
|
|||
|
|
|||
|
class ExporterBaseBase:
|
|||
|
exporter_id = 0
|
|||
|
|
|||
|
def __init__(self):
|
|||
|
ExporterBaseBase.exporter_id += 1
|
|||
|
self.id = ExporterBaseBase.exporter_id
|
|||
|
self._is_running = True
|
|||
|
self._is_paused = False
|
|||
|
|
|||
|
def cancel(self):
|
|||
|
print('cancel')
|
|||
|
|
|||
|
def pause(self):
|
|||
|
self._is_paused = True
|
|||
|
|
|||
|
def resume(self):
|
|||
|
self._is_paused = False
|
|||
|
|
|||
|
def stop(self):
|
|||
|
self._is_running = False
|
|||
|
self.resume() # 确保在停止时唤醒线程
|
|||
|
|
|||
|
|
|||
|
class ExporterBase(ExporterBaseBase):
|
|||
|
i = 1
|
|||
|
|
|||
|
def __init__(
|
|||
|
self,
|
|||
|
database: DataBaseInterface,
|
|||
|
contact: Contact,
|
|||
|
output_dir,
|
|||
|
type_=FileType.TXT, # 导出文件类型
|
|||
|
message_types: set[MessageType] = None, # 导出的消息类型
|
|||
|
time_range=None, # 导出的日期范围
|
|||
|
group_members: set[str] = None, # 群聊中只导出这些人的聊天记录
|
|||
|
progress_callback=None, # 进度回调函数,func(progress:float)
|
|||
|
finish_callback=None # 导出完成回调函数
|
|||
|
):
|
|||
|
"""
|
|||
|
@param database:
|
|||
|
@param contact: 要导出的联系人
|
|||
|
@param output_dir: 输出文件夹
|
|||
|
@param type_: 导出文件类型
|
|||
|
@param message_types: 导出的消息类型
|
|||
|
@param time_range: 导出的日期范围
|
|||
|
@param group_members: 群聊中筛选的群成员
|
|||
|
@param progress_callback: 导出进度回调函数
|
|||
|
"""
|
|||
|
super().__init__()
|
|||
|
if progress_callback:
|
|||
|
self.update_progress_callback = progress_callback
|
|||
|
else:
|
|||
|
self.update_progress_callback = self.print_progress
|
|||
|
if finish_callback:
|
|||
|
self.finish_callback = finish_callback
|
|||
|
else:
|
|||
|
self.finish_callback = self.finish
|
|||
|
self.database = database
|
|||
|
self.avatar_urls_dict = {} # 联系人头像地址的字典
|
|||
|
self.avatar_urls = [] # 联系人的头像地址(写入HTML)
|
|||
|
self.avatar_paths_dict = {} # 联系人本地头像地址的字典
|
|||
|
self.avatar_paths = [] # 联系人的本地头像地址(写入HTML)
|
|||
|
self.message_types = message_types # 导出的消息类型
|
|||
|
self.contact: Contact = contact # 联系人
|
|||
|
self.output_type = type_ # 导出文件类型
|
|||
|
self.total_num = 1 # 总的消息数量
|
|||
|
self.num = 0 # 当前处理的消息数量
|
|||
|
self.last_timestamp = 0
|
|||
|
self.time_range = time_range
|
|||
|
self.group_contacts = {} # 群聊里的所有联系人
|
|||
|
self.group_members = group_members # 要导出的群聊成员(用于群消息筛选)
|
|||
|
self.group_members_set = group_members
|
|||
|
self.origin_path = os.path.join(output_dir, '聊天记录', f'{self.contact.remark}({self.contact.wxid})')
|
|||
|
makedirs(self.origin_path)
|
|||
|
|
|||
|
def print_progress(self, progress):
|
|||
|
logger.info(f'导出进度:{progress * 100:.2f}%')
|
|||
|
# print()
|
|||
|
|
|||
|
def finish(self, success):
|
|||
|
if success:
|
|||
|
logger.info(f'导出完成\n{"-" * 20}')
|
|||
|
else:
|
|||
|
logger.info(f'导出失败\n{"-" * 20}')
|
|||
|
|
|||
|
def set_update_callback(self, callback):
|
|||
|
self.update_progress_callback = callback
|
|||
|
|
|||
|
def _is_select_by_type(self, message):
|
|||
|
# 筛选特定的消息类型
|
|||
|
if not self.message_types:
|
|||
|
return True
|
|||
|
else:
|
|||
|
return message.type in self.message_types
|
|||
|
|
|||
|
def _is_select_by_contact(self, message):
|
|||
|
# 筛选群聊里的指定群成员
|
|||
|
if self.contact.is_chatroom() and self.group_members_set:
|
|||
|
wxid = message.sender_id
|
|||
|
if wxid in self.group_members_set:
|
|||
|
return True
|
|||
|
else:
|
|||
|
return False
|
|||
|
else:
|
|||
|
return True
|
|||
|
|
|||
|
def is_selected(self, message):
|
|||
|
# 判断该消息是否应该导出
|
|||
|
return self._is_select_by_type(message) and self._is_select_by_contact(message)
|
|||
|
|
|||
|
def run(self):
|
|||
|
self.export()
|
|||
|
|
|||
|
def export(self):
|
|||
|
return True
|
|||
|
|
|||
|
def start(self):
|
|||
|
self.run()
|
|||
|
|
|||
|
def is_5_min(self, timestamp) -> bool:
|
|||
|
if abs(timestamp - self.last_timestamp) > 300:
|
|||
|
self.last_timestamp = timestamp
|
|||
|
return True
|
|||
|
return False
|
|||
|
|
|||
|
def save_avatars(self):
|
|||
|
if self.contact.is_chatroom():
|
|||
|
self.group_contacts = self.database.get_chatroom_members(self.contact.wxid)
|
|||
|
self.group_contacts[Me().wxid] = Me()
|
|||
|
else:
|
|||
|
self.group_contacts = {
|
|||
|
Me().wxid: Me(),
|
|||
|
self.contact.wxid: self.contact
|
|||
|
}
|
|||
|
for wxid, contact in self.group_contacts.items():
|
|||
|
self.save_avatar(contact)
|
|||
|
|
|||
|
def save_avatar(self, contact):
|
|||
|
avatar_buffer = self.database.get_avatar_buffer(contact.wxid)
|
|||
|
avatar_path = os.path.join(self.origin_path, 'avatar', f'{contact.wxid}.png')
|
|||
|
contact.avatar_path = avatar_path
|
|||
|
if not avatar_buffer:
|
|||
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|||
|
# 构建要读取的文件路径
|
|||
|
file_path = os.path.join(current_dir, 'resources', 'default_avatar.png')
|
|||
|
with open(file_path, 'rb') as f:
|
|||
|
avatar_buffer = f.read()
|
|||
|
try:
|
|||
|
with open(avatar_path, 'wb') as f:
|
|||
|
f.write(avatar_buffer)
|
|||
|
except:
|
|||
|
logger.error(traceback.format_exc())
|
|||
|
finally:
|
|||
|
return avatar_path
|
|||
|
|
|||
|
def get_avatar_path(self, message: Message, is_absolute_path=False) -> str | int:
|
|||
|
"""
|
|||
|
获取消息发送者的头像
|
|||
|
@param message: 消息元组
|
|||
|
@param is_absolute_path: 是否是绝对路径
|
|||
|
@return: True 返回本地的绝对路径,False 返回联系人的索引下标
|
|||
|
"""
|
|||
|
is_send = message.is_sender
|
|||
|
if is_absolute_path:
|
|||
|
# 返回头像的本地绝对路径
|
|||
|
if message.sender_id in self.group_contacts:
|
|||
|
avatar = self.group_contacts[message.sender_id].avatar_path
|
|||
|
else:
|
|||
|
# 针对那些退群的人,就保存为默认头像
|
|||
|
contact = self.database.get_contact_by_username(message.sender_id)
|
|||
|
avatar = self.save_avatar(contact)
|
|||
|
self.group_contacts[contact.wxid] = contact
|
|||
|
else:
|
|||
|
if self.contact.is_chatroom():
|
|||
|
avatar = self.avatar_urls_dict[message.sender_id]
|
|||
|
else:
|
|||
|
avatar = 0 if is_send else 1
|
|||
|
return avatar
|
|||
|
|
|||
|
def get_avatar_urls(self):
|
|||
|
index = 0
|
|||
|
if self.contact.is_chatroom():
|
|||
|
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
|||
|
for message in messages:
|
|||
|
contact = message[13]
|
|||
|
if contact.wxid not in self.avatar_urls_dict:
|
|||
|
avatar_path = os.path.join(self.origin_path, 'avatar', f'{contact.wxid}.png')
|
|||
|
contact.save_avatar(avatar_path)
|
|||
|
self.avatar_urls.append(contact.small_head_img_url)
|
|||
|
self.avatar_paths.append(f'./avatar/{contact.wxid}.png')
|
|||
|
self.avatar_urls_dict[contact.wxid] = index
|
|||
|
index += 1
|
|||
|
else:
|
|||
|
self.avatar_urls = [Me().small_head_img_url, self.contact.small_head_img_url]
|
|||
|
avatar_path = os.path.join(self.origin_path, 'avatar', f'{Me().wxid}.png')
|
|||
|
QMe().save_avatar(avatar_path)
|
|||
|
avatar_path1 = os.path.join(self.origin_path, 'avatar', f'{self.contact.wxid}.png')
|
|||
|
# self.contact.save_avatar(avatar_path1)
|
|||
|
self.avatar_paths = [f'./avatar/{Me().wxid}.png', f'./avatar/{self.contact.wxid}.png']
|
|||
|
return self.avatar_urls, self.avatar_paths
|
|||
|
|
|||
|
def get_avatar_paths(self):
|
|||
|
"""
|
|||
|
获取全部头像
|
|||
|
@return:
|
|||
|
"""
|
|||
|
index = 0
|
|||
|
if self.contact.is_chatroom():
|
|||
|
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
|||
|
for message in messages:
|
|||
|
contact = message[13]
|
|||
|
if contact.wxid not in self.avatar_paths_dict:
|
|||
|
self.avatar_paths.append(contact.small_head_img_url)
|
|||
|
self.avatar_paths_dict[contact.wxid] = index
|
|||
|
index += 1
|
|||
|
else:
|
|||
|
self.avatar_paths = [Me().small_head_img_url, self.contact.small_head_img_url]
|
|||
|
return self.avatar_paths
|
|||
|
|
|||
|
|
|||
|
class ImageExporter(ExporterBaseBase):
|
|||
|
def __init__(self, parent=None):
|
|||
|
super().__init__(parent)
|
|||
|
|
|||
|
def run(self):
|
|||
|
self.startSignal.emit(1)
|
|||
|
messages = database.get_messages_all()
|
|||
|
num = len(messages)
|
|||
|
os.makedirs(os.path.join(config.OUTPUT_DIR, 'image'), exist_ok=True)
|
|||
|
for index, message in enumerate(messages):
|
|||
|
type_ = message[2]
|
|||
|
timestamp = message[5]
|
|||
|
# 把时间戳转换为格式化时间
|
|||
|
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
|||
|
str_time = time.strftime("%Y%m%d_%H%M%S_", time_struct) # 把结构化时间转换为格式化时间
|
|||
|
MsgSvrID = str(message[9])
|
|||
|
if type_ == 3:
|
|||
|
base_path = os.path.join(config.OUTPUT_DIR, 'image')
|
|||
|
str_content = message[7]
|
|||
|
BytesExtra = message[10]
|
|||
|
str_content = escape_js_and_html(str_content)
|
|||
|
image_path = hard_link_db.get_image(str_content, BytesExtra, up_dir=Me().wx_dir, thumb=False)
|
|||
|
image_path = get_image(image_path, base_path=base_path, dst_name=str_time + MsgSvrID[:6])
|
|||
|
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{image_path}', 1))
|
|||
|
self.okSignal.emit(self.id)
|
|||
|
|
|||
|
|
|||
|
class VideoExporter(ExporterBaseBase):
|
|||
|
def run(self):
|
|||
|
self.startSignal.emit(1)
|
|||
|
messages = database.get_messages_all()
|
|||
|
num = len(messages)
|
|||
|
os.makedirs(os.path.join(config.OUTPUT_DIR, 'video'), exist_ok=True)
|
|||
|
for index, message in enumerate(messages):
|
|||
|
type_ = message[2]
|
|||
|
timestamp = message[5]
|
|||
|
# 把时间戳转换为格式化时间
|
|||
|
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
|||
|
str_time = time.strftime("%Y%m%d_%H%M%S_", time_struct) # 把结构化时间转换为格式化时间
|
|||
|
MsgSvrID = str(message[9])
|
|||
|
if type_ == 43:
|
|||
|
str_content = message[7]
|
|||
|
BytesExtra = message[10]
|
|||
|
video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False)
|
|||
|
image_path = hard_link_db.get_video(str_content, BytesExtra, thumb=True)
|
|||
|
if video_path:
|
|||
|
video_path = f'{Me().wx_dir}/{video_path}'
|
|||
|
if os.path.exists(video_path):
|
|||
|
new_path = os.path.join(config.OUTPUT_DIR, 'video', str_time + MsgSvrID[:6] + '.mp4')
|
|||
|
if not os.path.exists(new_path):
|
|||
|
shutil.copy(video_path, new_path)
|
|||
|
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{image_path}', 1))
|
|||
|
self.okSignal.emit(self.id)
|
|||
|
|
|||
|
|
|||
|
class FileExporter(ExporterBaseBase):
|
|||
|
def run(self):
|
|||
|
self.startSignal.emit(1)
|
|||
|
messages = database.get_messages_all()
|
|||
|
num = len(messages)
|
|||
|
origin_path = os.path.join(config.OUTPUT_DIR, 'files')
|
|||
|
os.makedirs(os.path.join(config.OUTPUT_DIR, 'files'), exist_ok=True)
|
|||
|
for index, message in enumerate(messages):
|
|||
|
type_ = message[2]
|
|||
|
sub_type = message[3]
|
|||
|
timestamp = message[5]
|
|||
|
# 把时间戳转换为格式化时间
|
|||
|
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
|||
|
str_time = time.strftime("%Y%m%d%H%M%S", time_struct) # 把结构化时间转换为格式化时间
|
|||
|
if type_ == 49 and sub_type == 6:
|
|||
|
bytesExtra = message[10]
|
|||
|
compress_content = message[13]
|
|||
|
file_info = file(bytesExtra, compress_content, output_path=origin_path)
|
|||
|
if not file_info.get('is_error'):
|
|||
|
file_path = file_info.get('file_path')
|
|||
|
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{file_path}', 1))
|
|||
|
self.okSignal.emit(self.id)
|
|||
|
|
|||
|
|
|||
|
class AudioExporter(ExporterBaseBase):
|
|||
|
def run(self):
|
|||
|
self.startSignal.emit(1)
|
|||
|
messages = msg_db.get_messages_all()
|
|||
|
num = len(messages)
|
|||
|
for index, message in enumerate(messages):
|
|||
|
type_ = message[2]
|
|||
|
timestamp = message[5]
|
|||
|
# 把时间戳转换为格式化时间
|
|||
|
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
|||
|
str_time = time.strftime("%Y%m%d%H%M%S", time_struct) # 把结构化时间转换为格式化时间
|
|||
|
MsgSvrID = str(message[9])
|
|||
|
if type_ == 43:
|
|||
|
str_content = message[7]
|
|||
|
BytesExtra = message[10]
|
|||
|
video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False)
|
|||
|
image_path = hard_link_db.get_video(str_content, BytesExtra, thumb=True)
|
|||
|
if video_path:
|
|||
|
video_path = f'{Me().wx_dir}/{video_path}'
|
|||
|
if os.path.exists(video_path):
|
|||
|
new_path = os.path.join(config.OUTPUT_DIR, 'video', str_time + '.mp4')
|
|||
|
if not os.path.exists(new_path):
|
|||
|
shutil.copy(video_path, new_path)
|
|||
|
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{image_path}', 1))
|
|||
|
self.okSignal.emit(self.id)
|
|||
|
|
|||
|
|
|||
|
class ContactExporter(ExporterBaseBase):
|
|||
|
def __init__(self, database, output_path):
|
|||
|
super().__init__()
|
|||
|
self.okSignal = None
|
|||
|
self.database = database
|
|||
|
self.output_path = output_path
|
|||
|
|
|||
|
def start(self):
|
|||
|
self.run()
|
|||
|
|
|||
|
def run(self):
|
|||
|
|
|||
|
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
|||
|
columns = ['UserName', 'Alias', 'Type', 'Remark', 'NickName', 'smallHeadImgUrl',
|
|||
|
'bigHeadImgUrl', 'label', 'gender', 'signature', 'country/region', 'province', 'city']
|
|||
|
|
|||
|
contacts = self.database.get_contacts()
|
|||
|
try:
|
|||
|
# 写入CSV文件
|
|||
|
with open(self.output_path, mode='w', newline='', encoding='utf-8-sig') as file:
|
|||
|
writer = csv.writer(file)
|
|||
|
writer.writerow(columns)
|
|||
|
# 写入数据
|
|||
|
for contact in contacts:
|
|||
|
writer.writerow(
|
|||
|
[contact.wxid, contact.alias, contact.flag, contact.remark, contact.nickname,
|
|||
|
contact.small_head_img_url, contact.big_head_img_url, contact.label_name(), contact.gender,
|
|||
|
contact.signature, *contact.region
|
|||
|
]
|
|||
|
)
|
|||
|
except PermissionError:
|
|||
|
print('另一个程序正在使用此文件,无法访问。')
|
|||
|
|
|||
|
|
|||
|
class GroupContactExporter(ExporterBaseBase):
|
|||
|
def __init__(self, database, output_dir, contact):
|
|||
|
super().__init__()
|
|||
|
self.contact = contact
|
|||
|
self.database = database
|
|||
|
if self.contact:
|
|||
|
if not isinstance(self.contact, list):
|
|||
|
self.origin_path = os.path.join(output_dir, '聊天记录',
|
|||
|
f'{self.contact.remark}({self.contact.wxid})')
|
|||
|
os.makedirs(self.origin_path, exist_ok=True)
|
|||
|
|
|||
|
def start(self):
|
|||
|
self.run()
|
|||
|
|
|||
|
def run(self):
|
|||
|
filename = os.path.join(self.origin_path, 'contacts.csv')
|
|||
|
filename = get_new_filename(filename)
|
|||
|
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
|||
|
columns = ['UserName', '微信号', '类型', '群昵称', '昵称', '头像地址',
|
|||
|
'头像原图', '标签', '性别', '个性签名', '国家(地区)', '省份', '城市']
|
|||
|
contacts = self.database.get_chatroom_members(self.contact.wxid)
|
|||
|
try:
|
|||
|
# 写入CSV文件
|
|||
|
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
|||
|
writer = csv.writer(file)
|
|||
|
writer.writerow(columns)
|
|||
|
# 写入数据
|
|||
|
# writer.writerows(contacts)
|
|||
|
for wxid, contact in contacts.items():
|
|||
|
writer.writerow(
|
|||
|
[
|
|||
|
contact.wxid, contact.alias, contact.flag, contact.remark, contact.nickname,
|
|||
|
contact.small_head_img_url, contact.big_head_img_url, contact.label_name(),
|
|||
|
contact.gender, contact.signature, *contact.region
|
|||
|
]
|
|||
|
)
|
|||
|
except PermissionError:
|
|||
|
print('另一个程序正在使用此文件,无法访问。')
|
|||
|
|
|||
|
|
|||
|
class CsvAllExporter(ExporterBaseBase):
|
|||
|
def run(self):
|
|||
|
filename = QFileDialog.getSaveFileName(None, "save file", os.path.join(os.getcwd(), 'messages.csv'),
|
|||
|
"csv files (*.csv);;all files(*.*)")
|
|||
|
if not filename[0]:
|
|||
|
return
|
|||
|
self.startSignal.emit(1)
|
|||
|
filename = filename[0]
|
|||
|
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
|||
|
columns = ['localId', 'TalkerId', 'Type', 'SubType',
|
|||
|
'IsSender', 'CreateTime', 'Status', 'StrContent',
|
|||
|
'StrTime', 'Remark', 'NickName', 'Sender']
|
|||
|
|
|||
|
packagemsg = PackageMsg()
|
|||
|
messages = packagemsg.get_package_message_all()
|
|||
|
try:
|
|||
|
# 写入CSV文件
|
|||
|
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
|||
|
writer = csv.writer(file)
|
|||
|
writer.writerow(columns)
|
|||
|
# 写入数据
|
|||
|
writer.writerows(messages)
|
|||
|
except PermissionError:
|
|||
|
globalSignals.information.emit('另一个程序正在使用此文件,无法访问。')
|
|||
|
self.okSignal.emit(self.id)
|
|||
|
|
|||
|
|
|||
|
def copy_file(source_file, destination_file):
|
|||
|
if os.path.isfile(source_file) and not os.path.exists(destination_file):
|
|||
|
try:
|
|||
|
# logger.info(f'开始复制:{destination_file}')
|
|||
|
shutil.copy(source_file, destination_file)
|
|||
|
except:
|
|||
|
pass
|
|||
|
# logger.error(traceback.format_exc())
|
|||
|
finally:
|
|||
|
print(f'复制:{destination_file}')
|
|||
|
# logger.info(f'复制:{destination_file}')
|
|||
|
|
|||
|
|
|||
|
def copy_files(file_tasks: List[Tuple[str, str, str]]):
|
|||
|
"""
|
|||
|
|
|||
|
:param file_tasks: List[
|
|||
|
(原始文件路径,
|
|||
|
输出文件夹,
|
|||
|
输出文件名
|
|||
|
)]
|
|||
|
:return:
|
|||
|
"""
|
|||
|
if len(file_tasks) < 1:
|
|||
|
return
|
|||
|
futures = []
|
|||
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
|||
|
for source_file, output_dir, dst_name in file_tasks:
|
|||
|
if dst_name:
|
|||
|
ext = os.path.basename(source_file).split('.')[-1]
|
|||
|
destination_file = os.path.join(output_dir, f'{dst_name}.{ext}')
|
|||
|
else:
|
|||
|
destination_file = os.path.join(output_dir, os.path.basename(source_file))
|
|||
|
if os.path.exists(destination_file):
|
|||
|
continue
|
|||
|
if not os.path.exists(os.path.dirname(destination_file)):
|
|||
|
os.makedirs(os.path.dirname(destination_file), exist_ok=True)
|
|||
|
futures.append(executor.submit(copy_file, source_file, destination_file))
|
|||
|
|
|||
|
# 等待所有任务完成
|
|||
|
for future in futures:
|
|||
|
future.result()
|
|||
|
|
|||
|
|
|||
|
def get_ffmpeg_path():
|
|||
|
# 获取打包后的资源目录
|
|||
|
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
|
|||
|
|
|||
|
# 构建 FFmpeg 可执行文件的路径
|
|||
|
ffmpeg_path = os.path.join(resource_dir, 'ffmpeg.exe')
|
|||
|
if not os.path.exists(ffmpeg_path):
|
|||
|
ffmpeg_path = os.path.join(resource_dir, 'resources', 'ffmpeg.exe')
|
|||
|
return ffmpeg_path
|
|||
|
|
|||
|
|
|||
|
def decode_audio_to_mp3(media_buffer, output_dir, filename):
|
|||
|
silk_path = f"{output_dir}/{filename}.silk"
|
|||
|
pcm_path = f"{output_dir}/{filename}.pcm"
|
|||
|
mp3_path = f"{output_dir}/{filename}.mp3"
|
|||
|
if os.path.exists(mp3_path):
|
|||
|
return mp3_path
|
|||
|
if not os.path.exists(output_dir):
|
|||
|
os.makedirs(output_dir, exist_ok=True)
|
|||
|
buf = media_buffer
|
|||
|
if not buf:
|
|||
|
return ''
|
|||
|
with open(silk_path, "wb") as f:
|
|||
|
f.write(buf)
|
|||
|
# open(silk_path, "wb").write()
|
|||
|
try:
|
|||
|
pcm_buf = pysilk.decode(buf, to_wav=False, sample_rate=44100)
|
|||
|
with open(pcm_path, 'wb') as f:
|
|||
|
f.write(pcm_buf)
|
|||
|
# pysilk.decode_file(open("brainpower.pcm", "rb"), to_wav=False)
|
|||
|
# 调用系统上的 ffmpeg 可执行文件
|
|||
|
# 获取 FFmpeg 可执行文件的路径
|
|||
|
ffmpeg_path = get_ffmpeg_path()
|
|||
|
# print(ffmpeg_path)
|
|||
|
# # 调用 FFmpeg
|
|||
|
if os.path.exists(ffmpeg_path):
|
|||
|
cmd = f'''"{ffmpeg_path}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
|
|||
|
# system(cmd)
|
|||
|
# 使用subprocess.run()执行命令
|
|||
|
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|||
|
else:
|
|||
|
# 源码运行的时候下面的有效
|
|||
|
# 这里不知道怎么捕捉异常
|
|||
|
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
|
|||
|
# system(cmd)
|
|||
|
# 使用subprocess.run()执行命令
|
|||
|
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|||
|
# if os.path.exists(silk_path):
|
|||
|
# os.remove(silk_path)
|
|||
|
# if os.path.exists(pcm_path):
|
|||
|
# os.remove(pcm_path)
|
|||
|
except Exception as e:
|
|||
|
print(f"Error: {e}")
|
|||
|
logger.error(f'语音错误\n{traceback.format_exc()}')
|
|||
|
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
|
|||
|
# system(cmd)
|
|||
|
# 使用subprocess.run()执行命令
|
|||
|
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|||
|
finally:
|
|||
|
return mp3_path
|
|||
|
|
|||
|
|
|||
|
def decode_audios(file_tasks: List[Tuple[str, str, str]]):
|
|||
|
"""
|
|||
|
|
|||
|
:param database:
|
|||
|
:param file_tasks: List[
|
|||
|
(原始文件路径,
|
|||
|
输出文件夹,
|
|||
|
输出文件名
|
|||
|
)]
|
|||
|
:return:
|
|||
|
"""
|
|||
|
if len(file_tasks) < 1:
|
|||
|
return
|
|||
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
|||
|
futures = []
|
|||
|
for media_buffer, output_dir, dst_name in file_tasks:
|
|||
|
futures.append(executor.submit(decode_audio_to_mp3, media_buffer, output_dir, dst_name))
|
|||
|
|
|||
|
# 等待所有任务完成
|
|||
|
for future in futures:
|
|||
|
future.result()
|
|||
|
|
|||
|
|
|||
|
def remove_privacy_info(text):
|
|||
|
# 正则表达式模式
|
|||
|
patterns = {
|
|||
|
'phone': r'\b(\+?86[-\s]?)?1[3-9]\d{9}\b', # 手机号
|
|||
|
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # 邮箱
|
|||
|
'id_card': r'\b\d{15}|\d{18}|\d{17}X\b', # 身份证号
|
|||
|
'password': r'\b(?:password|pwd|pass|psw)[\s=:]*\S+\b', # 密码
|
|||
|
'account': r'\b(?:account|username|user|acct)[\s=:]*\S+\b' # 账号
|
|||
|
}
|
|||
|
|
|||
|
for key, pattern in patterns.items():
|
|||
|
text = re.sub(pattern, f'[{key} xxx]', text)
|
|||
|
|
|||
|
return text
|
|||
|
|
|||
|
|
|||
|
def get_new_filename(filename):
|
|||
|
"""
|
|||
|
检查给定的文件是否存在,如果存在就加个括号标个号,返回新的文件名
|
|||
|
@param filename:
|
|||
|
@return:
|
|||
|
"""
|
|||
|
if not os.path.exists(filename):
|
|||
|
return filename
|
|||
|
else:
|
|||
|
for i in range(1, 10086):
|
|||
|
basename = os.path.basename(filename)
|
|||
|
tmp = basename.split('.')
|
|||
|
name = '.'.join(tmp[:-1])
|
|||
|
ext = tmp[-1]
|
|||
|
dir_name = os.path.dirname(filename)
|
|||
|
new_filename = os.path.join(dir_name, f'{name}({i}).{ext}')
|
|||
|
if not os.path.exists(new_filename):
|
|||
|
return new_filename
|
|||
|
return filename
|