mirror of
https://github.com/LC044/WeChatMsg
synced 2025-04-08 05:28:03 +08:00
650 lines
26 KiB
Python
650 lines
26 KiB
Python
import csv
|
||
import html
|
||
import io
|
||
import os
|
||
import re
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
import traceback
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
from typing import List, Tuple
|
||
|
||
import pysilk
|
||
|
||
from wxManager import MessageType, DataBaseInterface
|
||
from wxManager.model import Contact, Me, Message
|
||
|
||
from wxManager.log import logger
|
||
from exporter.config import FileType
|
||
|
||
|
||
def makedirs(path):
|
||
if not os.path.exists(path):
|
||
os.makedirs(path, exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'image'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'emoji'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'video'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'voice'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'file'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'avatar'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'music'), exist_ok=True)
|
||
os.makedirs(os.path.join(path, 'icon'), exist_ok=True)
|
||
|
||
|
||
def escape_js_and_html(input_str):
|
||
if not input_str:
|
||
return ''
|
||
# 转义HTML特殊字符
|
||
html_escaped = html.escape(input_str, quote=False)
|
||
|
||
# 手动处理JavaScript转义字符
|
||
js_escaped = (
|
||
html_escaped
|
||
.replace('\\r\\n', '<br>')
|
||
.replace('\\n', '<br>')
|
||
.replace('\\t', ' ')
|
||
.replace("\\", "\\\\")
|
||
.replace("'", r"\'")
|
||
.replace('"', r'\"')
|
||
.replace("\n", r'\n')
|
||
.replace("\r", r'\r')
|
||
.replace("\t", r'\t')
|
||
)
|
||
|
||
return js_escaped
|
||
|
||
|
||
class ExporterBaseBase:
|
||
exporter_id = 0
|
||
|
||
def __init__(self):
|
||
ExporterBaseBase.exporter_id += 1
|
||
self.id = ExporterBaseBase.exporter_id
|
||
self._is_running = True
|
||
self._is_paused = False
|
||
|
||
def cancel(self):
|
||
print('cancel')
|
||
|
||
def pause(self):
|
||
self._is_paused = True
|
||
|
||
def resume(self):
|
||
self._is_paused = False
|
||
|
||
def stop(self):
|
||
self._is_running = False
|
||
self.resume() # 确保在停止时唤醒线程
|
||
|
||
|
||
class ExporterBase(ExporterBaseBase):
|
||
i = 1
|
||
|
||
def __init__(
|
||
self,
|
||
database: DataBaseInterface,
|
||
contact: Contact,
|
||
output_dir,
|
||
type_=FileType.TXT, # 导出文件类型
|
||
message_types: set[MessageType] = None, # 导出的消息类型
|
||
time_range=None, # 导出的日期范围
|
||
group_members: set[str] = None, # 群聊中只导出这些人的聊天记录
|
||
progress_callback=None, # 进度回调函数,func(progress:float)
|
||
finish_callback=None # 导出完成回调函数
|
||
):
|
||
"""
|
||
@param database:
|
||
@param contact: 要导出的联系人
|
||
@param output_dir: 输出文件夹
|
||
@param type_: 导出文件类型
|
||
@param message_types: 导出的消息类型
|
||
@param time_range: 导出的日期范围
|
||
@param group_members: 群聊中筛选的群成员
|
||
@param progress_callback: 导出进度回调函数
|
||
"""
|
||
super().__init__()
|
||
if progress_callback:
|
||
self.update_progress_callback = progress_callback
|
||
else:
|
||
self.update_progress_callback = self.print_progress
|
||
if finish_callback:
|
||
self.finish_callback = finish_callback
|
||
else:
|
||
self.finish_callback = self.finish
|
||
self.database = database
|
||
self.avatar_urls_dict = {} # 联系人头像地址的字典
|
||
self.avatar_urls = [] # 联系人的头像地址(写入HTML)
|
||
self.avatar_paths_dict = {} # 联系人本地头像地址的字典
|
||
self.avatar_paths = [] # 联系人的本地头像地址(写入HTML)
|
||
self.message_types = message_types # 导出的消息类型
|
||
self.contact: Contact = contact # 联系人
|
||
self.output_type = type_ # 导出文件类型
|
||
self.total_num = 1 # 总的消息数量
|
||
self.num = 0 # 当前处理的消息数量
|
||
self.last_timestamp = 0
|
||
self.time_range = time_range
|
||
self.group_contacts = {} # 群聊里的所有联系人
|
||
self.group_members = group_members # 要导出的群聊成员(用于群消息筛选)
|
||
self.group_members_set = group_members
|
||
self.origin_path = os.path.join(output_dir, '聊天记录', f'{self.contact.remark}({self.contact.wxid})')
|
||
makedirs(self.origin_path)
|
||
|
||
def print_progress(self, progress):
|
||
logger.info(f'导出进度:{progress * 100:.2f}%')
|
||
# print()
|
||
|
||
def finish(self, success):
|
||
if success:
|
||
logger.info(f'导出完成\n{"-" * 20}')
|
||
else:
|
||
logger.info(f'导出失败\n{"-" * 20}')
|
||
|
||
def set_update_callback(self, callback):
|
||
self.update_progress_callback = callback
|
||
|
||
def _is_select_by_type(self, message):
|
||
# 筛选特定的消息类型
|
||
if not self.message_types:
|
||
return True
|
||
else:
|
||
return message.type in self.message_types
|
||
|
||
def _is_select_by_contact(self, message):
|
||
# 筛选群聊里的指定群成员
|
||
if self.contact.is_chatroom() and self.group_members_set:
|
||
wxid = message.sender_id
|
||
if wxid in self.group_members_set:
|
||
return True
|
||
else:
|
||
return False
|
||
else:
|
||
return True
|
||
|
||
def is_selected(self, message):
|
||
# 判断该消息是否应该导出
|
||
return self._is_select_by_type(message) and self._is_select_by_contact(message)
|
||
|
||
def run(self):
|
||
self.export()
|
||
|
||
def export(self):
|
||
return True
|
||
|
||
def start(self):
|
||
self.run()
|
||
|
||
def is_5_min(self, timestamp) -> bool:
|
||
if abs(timestamp - self.last_timestamp) > 300:
|
||
self.last_timestamp = timestamp
|
||
return True
|
||
return False
|
||
|
||
def save_avatars(self):
|
||
if self.contact.is_chatroom():
|
||
self.group_contacts = self.database.get_chatroom_members(self.contact.wxid)
|
||
self.group_contacts[Me().wxid] = Me()
|
||
else:
|
||
self.group_contacts = {
|
||
Me().wxid: Me(),
|
||
self.contact.wxid: self.contact
|
||
}
|
||
for wxid, contact in self.group_contacts.items():
|
||
self.save_avatar(contact)
|
||
|
||
def save_avatar(self, contact):
|
||
avatar_buffer = self.database.get_avatar_buffer(contact.wxid)
|
||
avatar_path = os.path.join(self.origin_path, 'avatar', f'{contact.wxid}.png')
|
||
contact.avatar_path = avatar_path
|
||
if not avatar_buffer:
|
||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||
# 构建要读取的文件路径
|
||
file_path = os.path.join(current_dir, 'resources', 'default_avatar.png')
|
||
with open(file_path, 'rb') as f:
|
||
avatar_buffer = f.read()
|
||
try:
|
||
with open(avatar_path, 'wb') as f:
|
||
f.write(avatar_buffer)
|
||
except:
|
||
logger.error(traceback.format_exc())
|
||
finally:
|
||
return avatar_path
|
||
|
||
def get_avatar_path(self, message: Message, is_absolute_path=False) -> str | int:
|
||
"""
|
||
获取消息发送者的头像
|
||
@param message: 消息元组
|
||
@param is_absolute_path: 是否是绝对路径
|
||
@return: True 返回本地的绝对路径,False 返回联系人的索引下标
|
||
"""
|
||
is_send = message.is_sender
|
||
if is_absolute_path:
|
||
# 返回头像的本地绝对路径
|
||
if message.sender_id in self.group_contacts:
|
||
avatar = self.group_contacts[message.sender_id].avatar_path
|
||
else:
|
||
# 针对那些退群的人,就保存为默认头像
|
||
contact = self.database.get_contact_by_username(message.sender_id)
|
||
avatar = self.save_avatar(contact)
|
||
self.group_contacts[contact.wxid] = contact
|
||
else:
|
||
if self.contact.is_chatroom():
|
||
avatar = self.avatar_urls_dict[message.sender_id]
|
||
else:
|
||
avatar = 0 if is_send else 1
|
||
return avatar
|
||
|
||
def get_avatar_urls(self):
|
||
index = 0
|
||
if self.contact.is_chatroom():
|
||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
for message in messages:
|
||
contact = message[13]
|
||
if contact.wxid not in self.avatar_urls_dict:
|
||
avatar_path = os.path.join(self.origin_path, 'avatar', f'{contact.wxid}.png')
|
||
contact.save_avatar(avatar_path)
|
||
self.avatar_urls.append(contact.small_head_img_url)
|
||
self.avatar_paths.append(f'./avatar/{contact.wxid}.png')
|
||
self.avatar_urls_dict[contact.wxid] = index
|
||
index += 1
|
||
else:
|
||
self.avatar_urls = [Me().small_head_img_url, self.contact.small_head_img_url]
|
||
avatar_path = os.path.join(self.origin_path, 'avatar', f'{Me().wxid}.png')
|
||
QMe().save_avatar(avatar_path)
|
||
avatar_path1 = os.path.join(self.origin_path, 'avatar', f'{self.contact.wxid}.png')
|
||
# self.contact.save_avatar(avatar_path1)
|
||
self.avatar_paths = [f'./avatar/{Me().wxid}.png', f'./avatar/{self.contact.wxid}.png']
|
||
return self.avatar_urls, self.avatar_paths
|
||
|
||
def get_avatar_paths(self):
|
||
"""
|
||
获取全部头像
|
||
@return:
|
||
"""
|
||
index = 0
|
||
if self.contact.is_chatroom():
|
||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||
for message in messages:
|
||
contact = message[13]
|
||
if contact.wxid not in self.avatar_paths_dict:
|
||
self.avatar_paths.append(contact.small_head_img_url)
|
||
self.avatar_paths_dict[contact.wxid] = index
|
||
index += 1
|
||
else:
|
||
self.avatar_paths = [Me().small_head_img_url, self.contact.small_head_img_url]
|
||
return self.avatar_paths
|
||
|
||
|
||
class ImageExporter(ExporterBaseBase):
|
||
def __init__(self, parent=None):
|
||
super().__init__(parent)
|
||
|
||
def run(self):
|
||
self.startSignal.emit(1)
|
||
messages = database.get_messages_all()
|
||
num = len(messages)
|
||
os.makedirs(os.path.join(config.OUTPUT_DIR, 'image'), exist_ok=True)
|
||
for index, message in enumerate(messages):
|
||
type_ = message[2]
|
||
timestamp = message[5]
|
||
# 把时间戳转换为格式化时间
|
||
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
||
str_time = time.strftime("%Y%m%d_%H%M%S_", time_struct) # 把结构化时间转换为格式化时间
|
||
MsgSvrID = str(message[9])
|
||
if type_ == 3:
|
||
base_path = os.path.join(config.OUTPUT_DIR, 'image')
|
||
str_content = message[7]
|
||
BytesExtra = message[10]
|
||
str_content = escape_js_and_html(str_content)
|
||
image_path = hard_link_db.get_image(str_content, BytesExtra, up_dir=Me().wx_dir, thumb=False)
|
||
image_path = get_image(image_path, base_path=base_path, dst_name=str_time + MsgSvrID[:6])
|
||
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{image_path}', 1))
|
||
self.okSignal.emit(self.id)
|
||
|
||
|
||
class VideoExporter(ExporterBaseBase):
|
||
def run(self):
|
||
self.startSignal.emit(1)
|
||
messages = database.get_messages_all()
|
||
num = len(messages)
|
||
os.makedirs(os.path.join(config.OUTPUT_DIR, 'video'), exist_ok=True)
|
||
for index, message in enumerate(messages):
|
||
type_ = message[2]
|
||
timestamp = message[5]
|
||
# 把时间戳转换为格式化时间
|
||
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
||
str_time = time.strftime("%Y%m%d_%H%M%S_", time_struct) # 把结构化时间转换为格式化时间
|
||
MsgSvrID = str(message[9])
|
||
if type_ == 43:
|
||
str_content = message[7]
|
||
BytesExtra = message[10]
|
||
video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False)
|
||
image_path = hard_link_db.get_video(str_content, BytesExtra, thumb=True)
|
||
if video_path:
|
||
video_path = f'{Me().wx_dir}/{video_path}'
|
||
if os.path.exists(video_path):
|
||
new_path = os.path.join(config.OUTPUT_DIR, 'video', str_time + MsgSvrID[:6] + '.mp4')
|
||
if not os.path.exists(new_path):
|
||
shutil.copy(video_path, new_path)
|
||
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{image_path}', 1))
|
||
self.okSignal.emit(self.id)
|
||
|
||
|
||
class FileExporter(ExporterBaseBase):
|
||
def run(self):
|
||
self.startSignal.emit(1)
|
||
messages = database.get_messages_all()
|
||
num = len(messages)
|
||
origin_path = os.path.join(config.OUTPUT_DIR, 'files')
|
||
os.makedirs(os.path.join(config.OUTPUT_DIR, 'files'), exist_ok=True)
|
||
for index, message in enumerate(messages):
|
||
type_ = message[2]
|
||
sub_type = message[3]
|
||
timestamp = message[5]
|
||
# 把时间戳转换为格式化时间
|
||
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
||
str_time = time.strftime("%Y%m%d%H%M%S", time_struct) # 把结构化时间转换为格式化时间
|
||
if type_ == 49 and sub_type == 6:
|
||
bytesExtra = message[10]
|
||
compress_content = message[13]
|
||
file_info = file(bytesExtra, compress_content, output_path=origin_path)
|
||
if not file_info.get('is_error'):
|
||
file_path = file_info.get('file_path')
|
||
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{file_path}', 1))
|
||
self.okSignal.emit(self.id)
|
||
|
||
|
||
class AudioExporter(ExporterBaseBase):
|
||
def run(self):
|
||
self.startSignal.emit(1)
|
||
messages = msg_db.get_messages_all()
|
||
num = len(messages)
|
||
for index, message in enumerate(messages):
|
||
type_ = message[2]
|
||
timestamp = message[5]
|
||
# 把时间戳转换为格式化时间
|
||
time_struct = time.localtime(timestamp) # 首先把时间戳转换为结构化时间
|
||
str_time = time.strftime("%Y%m%d%H%M%S", time_struct) # 把结构化时间转换为格式化时间
|
||
MsgSvrID = str(message[9])
|
||
if type_ == 43:
|
||
str_content = message[7]
|
||
BytesExtra = message[10]
|
||
video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False)
|
||
image_path = hard_link_db.get_video(str_content, BytesExtra, thumb=True)
|
||
if video_path:
|
||
video_path = f'{Me().wx_dir}/{video_path}'
|
||
if os.path.exists(video_path):
|
||
new_path = os.path.join(config.OUTPUT_DIR, 'video', str_time + '.mp4')
|
||
if not os.path.exists(new_path):
|
||
shutil.copy(video_path, new_path)
|
||
globalSignals.status_bar_message.emit((f'导出进度:{index + 1}/{num}——{image_path}', 1))
|
||
self.okSignal.emit(self.id)
|
||
|
||
|
||
class ContactExporter(ExporterBaseBase):
|
||
def __init__(self, database, output_path):
|
||
super().__init__()
|
||
self.okSignal = None
|
||
self.database = database
|
||
self.output_path = output_path
|
||
|
||
def start(self):
|
||
self.run()
|
||
|
||
def run(self):
|
||
|
||
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
||
columns = ['UserName', 'Alias', 'Type', 'Remark', 'NickName', 'smallHeadImgUrl',
|
||
'bigHeadImgUrl', 'label', 'gender', 'signature', 'country/region', 'province', 'city']
|
||
|
||
contacts = self.database.get_contacts()
|
||
try:
|
||
# 写入CSV文件
|
||
with open(self.output_path, mode='w', newline='', encoding='utf-8-sig') as file:
|
||
writer = csv.writer(file)
|
||
writer.writerow(columns)
|
||
# 写入数据
|
||
for contact in contacts:
|
||
writer.writerow(
|
||
[contact.wxid, contact.alias, contact.flag, contact.remark, contact.nickname,
|
||
contact.small_head_img_url, contact.big_head_img_url, contact.label_name(), contact.gender,
|
||
contact.signature, *contact.region
|
||
]
|
||
)
|
||
except PermissionError:
|
||
print('另一个程序正在使用此文件,无法访问。')
|
||
|
||
|
||
class GroupContactExporter(ExporterBaseBase):
|
||
def __init__(self, database, output_dir, contact):
|
||
super().__init__()
|
||
self.contact = contact
|
||
self.database = database
|
||
if self.contact:
|
||
if not isinstance(self.contact, list):
|
||
self.origin_path = os.path.join(output_dir, '聊天记录',
|
||
f'{self.contact.remark}({self.contact.wxid})')
|
||
os.makedirs(self.origin_path, exist_ok=True)
|
||
|
||
def start(self):
|
||
self.run()
|
||
|
||
def run(self):
|
||
filename = os.path.join(self.origin_path, 'contacts.csv')
|
||
filename = get_new_filename(filename)
|
||
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
||
columns = ['UserName', '微信号', '类型', '群昵称', '昵称', '头像地址',
|
||
'头像原图', '标签', '性别', '个性签名', '国家(地区)', '省份', '城市']
|
||
contacts = self.database.get_chatroom_members(self.contact.wxid)
|
||
try:
|
||
# 写入CSV文件
|
||
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
||
writer = csv.writer(file)
|
||
writer.writerow(columns)
|
||
# 写入数据
|
||
# writer.writerows(contacts)
|
||
for wxid, contact in contacts.items():
|
||
writer.writerow(
|
||
[
|
||
contact.wxid, contact.alias, contact.flag, contact.remark, contact.nickname,
|
||
contact.small_head_img_url, contact.big_head_img_url, contact.label_name(),
|
||
contact.gender, contact.signature, *contact.region
|
||
]
|
||
)
|
||
except PermissionError:
|
||
print('另一个程序正在使用此文件,无法访问。')
|
||
|
||
|
||
class CsvAllExporter(ExporterBaseBase):
|
||
def run(self):
|
||
filename = QFileDialog.getSaveFileName(None, "save file", os.path.join(os.getcwd(), 'messages.csv'),
|
||
"csv files (*.csv);;all files(*.*)")
|
||
if not filename[0]:
|
||
return
|
||
self.startSignal.emit(1)
|
||
filename = filename[0]
|
||
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
||
columns = ['localId', 'TalkerId', 'Type', 'SubType',
|
||
'IsSender', 'CreateTime', 'Status', 'StrContent',
|
||
'StrTime', 'Remark', 'NickName', 'Sender']
|
||
|
||
packagemsg = PackageMsg()
|
||
messages = packagemsg.get_package_message_all()
|
||
try:
|
||
# 写入CSV文件
|
||
with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
|
||
writer = csv.writer(file)
|
||
writer.writerow(columns)
|
||
# 写入数据
|
||
writer.writerows(messages)
|
||
except PermissionError:
|
||
globalSignals.information.emit('另一个程序正在使用此文件,无法访问。')
|
||
self.okSignal.emit(self.id)
|
||
|
||
|
||
def copy_file(source_file, destination_file):
|
||
if os.path.isfile(source_file) and not os.path.exists(destination_file):
|
||
try:
|
||
# logger.info(f'开始复制:{destination_file}')
|
||
shutil.copy(source_file, destination_file)
|
||
except:
|
||
pass
|
||
# logger.error(traceback.format_exc())
|
||
finally:
|
||
print(f'复制:{destination_file}')
|
||
# logger.info(f'复制:{destination_file}')
|
||
|
||
|
||
def copy_files(file_tasks: List[Tuple[str, str, str]]):
|
||
"""
|
||
|
||
:param file_tasks: List[
|
||
(原始文件路径,
|
||
输出文件夹,
|
||
输出文件名
|
||
)]
|
||
:return:
|
||
"""
|
||
if len(file_tasks) < 1:
|
||
return
|
||
futures = []
|
||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||
for source_file, output_dir, dst_name in file_tasks:
|
||
if dst_name:
|
||
ext = os.path.basename(source_file).split('.')[-1]
|
||
destination_file = os.path.join(output_dir, f'{dst_name}.{ext}')
|
||
else:
|
||
destination_file = os.path.join(output_dir, os.path.basename(source_file))
|
||
if os.path.exists(destination_file):
|
||
continue
|
||
if not os.path.exists(os.path.dirname(destination_file)):
|
||
os.makedirs(os.path.dirname(destination_file), exist_ok=True)
|
||
futures.append(executor.submit(copy_file, source_file, destination_file))
|
||
|
||
# 等待所有任务完成
|
||
for future in futures:
|
||
future.result()
|
||
|
||
|
||
def get_ffmpeg_path():
|
||
# 获取打包后的资源目录
|
||
resource_dir = getattr(sys, '_MEIPASS', os.path.abspath(os.path.dirname(__file__)))
|
||
|
||
# 构建 FFmpeg 可执行文件的路径
|
||
ffmpeg_path = os.path.join(resource_dir, 'ffmpeg.exe')
|
||
if not os.path.exists(ffmpeg_path):
|
||
ffmpeg_path = os.path.join(resource_dir, 'resources', 'ffmpeg.exe')
|
||
return ffmpeg_path
|
||
|
||
|
||
def decode_audio_to_mp3(media_buffer, output_dir, filename):
|
||
silk_path = f"{output_dir}/{filename}.silk"
|
||
pcm_path = f"{output_dir}/{filename}.pcm"
|
||
mp3_path = f"{output_dir}/{filename}.mp3"
|
||
if os.path.exists(mp3_path):
|
||
return mp3_path
|
||
if not os.path.exists(output_dir):
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
buf = media_buffer
|
||
if not buf:
|
||
return ''
|
||
with open(silk_path, "wb") as f:
|
||
f.write(buf)
|
||
# open(silk_path, "wb").write()
|
||
try:
|
||
pcm_buf = pysilk.decode(buf, to_wav=False, sample_rate=44100)
|
||
with open(pcm_path, 'wb') as f:
|
||
f.write(pcm_buf)
|
||
# pysilk.decode_file(open("brainpower.pcm", "rb"), to_wav=False)
|
||
# 调用系统上的 ffmpeg 可执行文件
|
||
# 获取 FFmpeg 可执行文件的路径
|
||
ffmpeg_path = get_ffmpeg_path()
|
||
# print(ffmpeg_path)
|
||
# # 调用 FFmpeg
|
||
if os.path.exists(ffmpeg_path):
|
||
cmd = f'''"{ffmpeg_path}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
|
||
# system(cmd)
|
||
# 使用subprocess.run()执行命令
|
||
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||
else:
|
||
# 源码运行的时候下面的有效
|
||
# 这里不知道怎么捕捉异常
|
||
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
|
||
# system(cmd)
|
||
# 使用subprocess.run()执行命令
|
||
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||
# if os.path.exists(silk_path):
|
||
# os.remove(silk_path)
|
||
# if os.path.exists(pcm_path):
|
||
# os.remove(pcm_path)
|
||
except Exception as e:
|
||
print(f"Error: {e}")
|
||
logger.error(f'语音错误\n{traceback.format_exc()}')
|
||
cmd = f'''"{os.path.join(os.getcwd(), 'app', 'resources', 'data', 'ffmpeg.exe')}" -loglevel quiet -y -f s16le -i "{pcm_path}" -ar 44100 -ac 1 "{mp3_path}"'''
|
||
# system(cmd)
|
||
# 使用subprocess.run()执行命令
|
||
subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||
finally:
|
||
return mp3_path
|
||
|
||
|
||
def decode_audios(file_tasks: List[Tuple[str, str, str]]):
|
||
"""
|
||
|
||
:param database:
|
||
:param file_tasks: List[
|
||
(原始文件路径,
|
||
输出文件夹,
|
||
输出文件名
|
||
)]
|
||
:return:
|
||
"""
|
||
if len(file_tasks) < 1:
|
||
return
|
||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||
futures = []
|
||
for media_buffer, output_dir, dst_name in file_tasks:
|
||
futures.append(executor.submit(decode_audio_to_mp3, media_buffer, output_dir, dst_name))
|
||
|
||
# 等待所有任务完成
|
||
for future in futures:
|
||
future.result()
|
||
|
||
|
||
def remove_privacy_info(text):
|
||
# 正则表达式模式
|
||
patterns = {
|
||
'phone': r'\b(\+?86[-\s]?)?1[3-9]\d{9}\b', # 手机号
|
||
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # 邮箱
|
||
'id_card': r'\b\d{15}|\d{18}|\d{17}X\b', # 身份证号
|
||
'password': r'\b(?:password|pwd|pass|psw)[\s=:]*\S+\b', # 密码
|
||
'account': r'\b(?:account|username|user|acct)[\s=:]*\S+\b' # 账号
|
||
}
|
||
|
||
for key, pattern in patterns.items():
|
||
text = re.sub(pattern, f'[{key} xxx]', text)
|
||
|
||
return text
|
||
|
||
|
||
def get_new_filename(filename):
|
||
"""
|
||
检查给定的文件是否存在,如果存在就加个括号标个号,返回新的文件名
|
||
@param filename:
|
||
@return:
|
||
"""
|
||
if not os.path.exists(filename):
|
||
return filename
|
||
else:
|
||
for i in range(1, 10086):
|
||
basename = os.path.basename(filename)
|
||
tmp = basename.split('.')
|
||
name = '.'.join(tmp[:-1])
|
||
ext = tmp[-1]
|
||
dir_name = os.path.dirname(filename)
|
||
new_filename = os.path.join(dir_name, f'{name}({i}).{ext}')
|
||
if not os.path.exists(new_filename):
|
||
return new_filename
|
||
return filename
|