利用多线程处理语音和表情包

This commit is contained in:
shuaikangzhou 2023-12-22 22:42:24 +08:00
parent e2100f9b77
commit b1a6f52148
5 changed files with 145 additions and 29 deletions

View File

@ -55,12 +55,16 @@ class MediaMsg:
try:
lock.acquire(True)
self.cursor.execute(sql, [reserved0])
return self.cursor.fetchone()[0]
result = self.cursor.fetchone()
finally:
lock.release()
return result[0] if result else None
def get_audio(self, reserved0, output_path):
buf = self.get_media_buffer(reserved0)
if not buf:
return ''
silk_path = f"{output_path}\\{reserved0}.silk"
pcm_path = f"{output_path}\\{reserved0}.pcm"
mp3_path = f"{output_path}\\{reserved0}.mp3"
@ -76,10 +80,10 @@ class MediaMsg:
try:
# 调用系统上的 ffmpeg 可执行文件
# 获取 FFmpeg 可执行文件的路径
ffmpeg_path = get_ffmpeg_path()
# 调用 FFmpeg
cmd = f'''{ffmpeg_path} -loglevel quiet -y -f s16le -i {pcm_path} -ar 44100 -ac 1 {mp3_path}'''
system(cmd)
# ffmpeg_path = get_ffmpeg_path()
# # 调用 FFmpeg
# cmd = f'''{ffmpeg_path} -loglevel quiet -y -f s16le -i {pcm_path} -ar 44100 -ac 1 {mp3_path}'''
# system(cmd)
# 源码运行的时候下面的有效
# 这里不知道怎么捕捉异常
cmd = f'''{os.path.join(os.getcwd(), 'app', 'resources', 'ffmpeg.exe')} -loglevel quiet -y -f s16le -i {pcm_path} -ar 44100 -ac 1 {mp3_path}'''
@ -88,11 +92,14 @@ class MediaMsg:
print(f"Error: {e}")
cmd = f'''{os.path.join(os.getcwd(),'app','resources','ffmpeg.exe')} -loglevel quiet -y -f s16le -i {pcm_path} -ar 44100 -ac 1 {mp3_path}'''
system(cmd)
system(f'del {silk_path}')
system(f'del {pcm_path}')
# system(f'del {silk_path}')
# system(f'del {pcm_path}')
print(mp3_path)
return mp3_path
def get_audio_path(self, reserved0, output_path):
mp3_path = f"{output_path}\\{reserved0}.mp3"
mp3_path = mp3_path.replace("/", "\\")
return mp3_path
def get_audio_text(self, content):
try:
root = ET.fromstring(content)

View File

@ -154,15 +154,15 @@ class Msg:
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
where StrTalker=? and Type=?
where StrTalker=? and Type=?
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, type_])
result = self.cursor.fetchall()
finally:
lock.release()
result = self.cursor.fetchall()
else:
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
@ -610,3 +610,4 @@ if __name__ == '__main__':
msg.init_database()
print(msg.get_latest_time_of_message('wxid_0o18ef858vnu22', year_='2023'))
print(msg.get_messages_number('wxid_0o18ef858vnu22', year_='2023'))
print(msg.get_messages_by_type('wxid_0o18ef858vnu22',34))

View File

@ -15,7 +15,7 @@ from ..util import path
import shutil
from ..util.compress_content import parser_reply
from ..util.emoji import get_emoji
from ..util.emoji import get_emoji, get_emoji_path
os.makedirs('./data/聊天记录', exist_ok=True)
@ -131,17 +131,33 @@ class Output(QThread):
self.Child = ChildThread(self.contact, type_=self.output_type, message_types=self.message_types)
self.Child.progressSignal.connect(self.progress)
self.Child.rangeSignal.connect(self.rangeSignal)
self.Child.okSignal.connect(self.okSignal)
self.Child.okSignal.connect(self.count_finish_num)
self.Child.start()
# 语音消息单独的线程
self.output_media = OutputMedia(self.contact)
self.output_media.okSingal.connect(self.count_finish_num)
self.output_media.progressSignal.connect(self.progress)
self.output_media.start()
# emoji消息单独的线程
self.output_emoji = OutputEmoji(self.contact)
self.output_emoji.okSingal.connect(self.count_finish_num)
self.output_emoji.progressSignal.connect(self.progress)
self.output_emoji.start()
self.total_num = 3
def count_finish_num(self, num):
self.num += 1
if self.num == self.total_num:
self.okSignal.emit(1)
def cancel(self):
self.requestInterruption()
def modify_audio_metadata(audiofile, new_artist): # 修改音频元数据中的“创作者”标签
def modify_audio_metadata(audiofile, new_artist): # 修改音频元数据中的“创作者”标签
return
audiofile = load(audiofile)
# 检查文件是否有标签
if audiofile.tag is None:
audiofile.initTag()
@ -246,7 +262,7 @@ class ChildThread(QThread):
try:
os.utime(origin_docx_path + image_path[1:], (timestamp, timestamp))
except:
print("网络图片",image_path)
print("网络图片", image_path)
pass
image_path = image_path.replace('\\', '/')
doc.write(
@ -280,7 +296,7 @@ class ChildThread(QThread):
displayname = escape_js_and_html(displayname)
if self.output_type == Output.HTML:
try:
audio_path = media_msg_db.get_audio(msgSvrId, output_path=origin_docx_path + "/voice")
audio_path = media_msg_db.get_audio_path(msgSvrId, output_path=origin_docx_path + "/voice")
audio_path = audio_path.replace('/', '\\')
modify_audio_metadata(audio_path, displayname)
os.utime(audio_path, (timestamp, timestamp))
@ -318,7 +334,7 @@ class ChildThread(QThread):
displayname = MePC().name if is_send else self.contact.remark
displayname = escape_js_and_html(displayname)
if self.output_type == Output.HTML:
emoji_path = get_emoji(str_content, thumb=True, output_path=origin_docx_path + '/emoji')
emoji_path = get_emoji_path(str_content, thumb=True, output_path=origin_docx_path + '/emoji')
emoji_path = './emoji/' + os.path.basename(emoji_path)
doc.write(
f'''{{ type:{3}, text: '{emoji_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
@ -388,7 +404,8 @@ class ChildThread(QThread):
str_time = message[8]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
str_content = str_content.replace('<![CDATA[', "").replace(' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
str_content = str_content.replace('<![CDATA[', "").replace(
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
res = findall('(</{0,1}(img|revo|_wc_cus|a).*?>)', str_content)
for xmlstr, b in res:
str_content = str_content.replace(xmlstr, "")
@ -495,13 +512,13 @@ class ChildThread(QThread):
stream.setCodec('utf-8')
content = stream.readAll()
file.close()
html_head,html_end = content.split('/*注意看这是分割线*/')
html_head, html_end = content.split('/*注意看这是分割线*/')
f = open(filename, 'w', encoding='utf-8')
f.write(html_head.replace("<title>Chat Records</title>", f"<title>{self.contact.remark}</title>"))
MePC().avatar.save(os.path.join(f"{origin_docx_path}/avatar/{MePC().wxid}.png"))
if self.contact.is_chatroom:
for message in messages:
if message[4]: # is_send
if message[4]: # is_send
continue
try:
chatroom_avatar_path = f"{origin_docx_path}/avatar/{message[12].wxid}.png"
@ -513,12 +530,12 @@ class ChildThread(QThread):
else:
self.contact.avatar.save(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
self.rangeSignal.emit(len(messages))
total_steps = len(messages)
for index, message in enumerate(messages):
type_ = message[2]
sub_type = message[3]
timestamp = message[5]
self.progressSignal.emit(int((index + 1) / total_steps * 100))
if type_ != 34 and type_ != 47:
self.progressSignal.emit(1)
if self.is_5_min(timestamp):
str_time = message[8]
f.write(
@ -582,4 +599,52 @@ class ChildThread(QThread):
self.to_txt()
def cancel(self):
self.requestInterruption()
self.requestInterruption()
class OutputMedia(QThread):
okSingal = pyqtSignal(int)
progressSignal = pyqtSignal(int)
def __init__(self, contact):
super().__init__()
self.contact = contact
def run(self):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
messages = msg_db.get_messages_by_type(self.contact.wxid, 34)
for message in messages:
is_send = message[4]
msgSvrId = message[9]
audio_path = media_msg_db.get_audio(msgSvrId, output_path=origin_docx_path + "/voice")
audio_path = audio_path.replace('/', '\\')
if self.contact.is_chatroom:
if is_send:
displayname = MePC().name
else:
displayname = message[12].remark
else:
displayname = MePC().name if is_send else self.contact.remark
displayname = escape_js_and_html(displayname)
modify_audio_metadata(audio_path, displayname)
# os.utime(audio_path, (timestamp, timestamp))
self.progressSignal.emit(1)
self.okSingal.emit(34)
class OutputEmoji(QThread):
okSingal = pyqtSignal(int)
progressSignal = pyqtSignal(int)
def __init__(self, contact):
super().__init__()
self.contact = contact
def run(self):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
messages = msg_db.get_messages_by_type(self.contact.wxid, 47)
for message in messages:
str_content = message[7]
emoji_path = get_emoji(str_content, thumb=True, output_path=origin_docx_path + '/emoji')
self.progressSignal.emit(1)
self.okSingal.emit(34)

View File

@ -1,3 +1,6 @@
import time
from PyQt5.QtCore import QTimer
from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QDialog, QVBoxLayout, QCheckBox, QHBoxLayout, \
QProgressBar, QLabel, QMessageBox
@ -20,6 +23,7 @@ QPushButton:hover {
}
"""
class ExportDialog(QDialog):
def __init__(self, contact=None, title="选择导出的类型", file_type="csv", parent=None):
super(ExportDialog, self).__init__(parent)
@ -27,14 +31,15 @@ class ExportDialog(QDialog):
self.contact = contact
if file_type == 'html':
self.export_type = Output.HTML
self.export_choices = {"文本": True, "图片": True, "语音": True, "视频": True, "表情包": True,
self.export_choices = {"文本": True, "图片": True, "语音": False, "视频": False, "表情包": False,
'拍一拍等系统消息': True} # 定义导出的数据类型,默认全部选择
elif file_type == 'csv':
self.export_type = Output.CSV
self.export_choices = {"文本": True, "图片": True, "视频": True, "表情包": True} # 定义导出的数据类型,默认全部选择
elif file_type == 'txt':
self.export_type = Output.TXT
self.export_choices = {"文本": True, "图片": True, "语音": True, "视频": True, "表情包": True} # 定义导出的数据类型,默认全部选择
self.export_choices = {"文本": True, "图片": True, "语音": True, "视频": True,
"表情包": True} # 定义导出的数据类型,默认全部选择
else:
self.export_choices = {"文本": True, "图片": True, "视频": True, "表情包": True} # 定义导出的数据类型,默认全部选择
self.setWindowTitle(title)
@ -43,14 +48,17 @@ class ExportDialog(QDialog):
self.worker = None # 导出线程
self.progress_bar = QProgressBar(self)
self.progress_label = QLabel(self)
self.time_label = QLabel(self)
for export_type, default_state in self.export_choices.items():
checkbox = QCheckBox(export_type)
checkbox.setChecked(default_state)
layout.addWidget(checkbox)
layout.addWidget(self.progress_bar)
layout.addWidget(self.progress_label)
layout.addWidget(self.time_label)
self.notice_label = QLabel(self)
self.notice_label.setText("注意:导出HTML时选择图片、视频、语音、表情包特别是表情包\n会导致大大影响导出速度,请合理选择导出的类型")
self.notice_label.setText(
"注意:导出HTML时选择图片、视频、语音、表情包特别是表情包\n会导致大大影响导出速度,请合理选择导出的类型")
layout.addWidget(self.notice_label)
hlayout = QHBoxLayout(self)
self.export_button = QPushButton("导出")
@ -62,6 +70,11 @@ class ExportDialog(QDialog):
hlayout.addWidget(self.cancel_button)
layout.addLayout(hlayout)
self.setLayout(layout)
self.timer = QTimer(self)
self.time = 0
self.total_msg_num = 100 # 总的消息个数
self.num = 0 # 当前完成的消息个数
self.timer.timeout.connect(self.update_elapsed_time)
def export_data(self):
self.export_button.setEnabled(False)
@ -75,12 +88,22 @@ class ExportDialog(QDialog):
self.worker = Output(self.contact, type_=self.export_type, message_types=selected_types)
self.worker.progressSignal.connect(self.update_progress)
self.worker.okSignal.connect(self.export_finished)
self.worker.rangeSignal.connect(self.set_total_msg_num)
self.worker.start()
# 启动定时器每1000毫秒更新一次任务进度
self.timer.start(1000)
self.start_time = time.time()
# self.accept() # 使用accept关闭对话框
def set_total_msg_num(self, num):
self.total_msg_num = num
def export_finished(self):
self.export_button.setEnabled(True)
self.cancel_button.setEnabled(True)
self.time = 0
end_time = time.time()
print(f'总耗时:{end_time - self.start_time}s')
reply = QMessageBox(self)
reply.setIcon(QMessageBox.Information)
reply.setWindowTitle('OK')
@ -90,7 +113,13 @@ class ExportDialog(QDialog):
api = reply.exec_()
self.accept()
def update_elapsed_time(self):
self.time += 1
self.time_label.setText(f"耗时: {self.time}s")
def update_progress(self, progress_percentage):
self.num += 1
progress_percentage = int((self.num) / self.total_msg_num * 100)
self.progress_bar.setValue(progress_percentage)
self.progress_label.setText(f"导出进度: {progress_percentage}%")

View File

@ -174,9 +174,9 @@ def get_most_emoji(messages):
except:
dic[md5] = [1, emoji_info]
md5_nums = [(num[0], key, num[1]) for key, num in dic.items()]
md5_nums.sort(key=lambda x: x[0],reverse=True)
md5_nums.sort(key=lambda x: x[0], reverse=True)
if not md5_nums:
return '',0
return '', 0
md5 = md5_nums[0][1]
num = md5_nums[0][0]
emoji_info = md5_nums[0][2]
@ -195,7 +195,6 @@ def get_emoji(xml_string, thumb=True, output_path=root_path) -> str:
prefix = 'th_' if thumb else ''
file_path = os.path.join(output_path, prefix + md5 + f)
if os.path.exists(file_path):
print('表情包已存在')
return file_path
url = emoji_info['thumburl'] if thumb else emoji_info['cdnurl']
if not url or url == "":
@ -231,6 +230,21 @@ def get_emoji(xml_string, thumb=True, output_path=root_path) -> str:
return output_path
def get_emoji_path(xml_string, thumb=True, output_path=root_path) -> str:
try:
emoji_info = parser_xml(xml_string)
md5 = emoji_info['md5']
image_format = ['.png', '.gif', '.jpeg']
for f in image_format:
prefix = 'th_' if thumb else ''
file_path = os.path.join(output_path, prefix + md5 + f)
return file_path
except:
logger.error(traceback.format_exc())
output_path = os.path.join(output_path, "404.png")
return output_path
if __name__ == '__main__':
# xml_string = '<msg><emoji fromusername = "wxid_0o18ef858vnu22" tousername = "wxid_27hqbq7vx5hf22" type="2" idbuffer="media:0_0" md5="71ce49ed3ce9e57e43e07f802983bf45" len = "352588" productid="com.tencent.xin.emoticon.person.stiker_1678703862259eb01f2ef4a313" androidmd5="71ce49ed3ce9e57e43e07f802983bf45" androidlen="352588" s60v3md5 = "71ce49ed3ce9e57e43e07f802983bf45" s60v3len="352588" s60v5md5 = "71ce49ed3ce9e57e43e07f802983bf45" s60v5len="352588" cdnurl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=71ce49ed3ce9e57e43e07f802983bf45&amp;filekey=30350201010421301f020201060402535a041071ce49ed3ce9e57e43e07f802983bf45020305614c040d00000004627466730000000132&amp;hy=SZ&amp;storeid=263ffa00b000720d03274c5820000010600004f50535a1ca0c950b64287022&amp;bizid=1023" designerid = "" thumburl = "http://mmbiz.qpic.cn/mmemoticon/ajNVdqHZLLDSKTMRgM8agiadpFhKz9IJ3cD5Ra2sTROibOaShdt3D4z6PfE92WkjQY/0" encrypturl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=cbaae1d847aac6389652b65562bacaa2&amp;filekey=30350201010421301f020201060402535a0410cbaae1d847aac6389652b65562bacaa20203056150040d00000004627466730000000132&amp;hy=SZ&amp;storeid=263ffa00b0008d8223274c5820000010600004f50535a17b82910b64764739&amp;bizid=1023" aeskey= "7051ab2a34442dec63434832463f45ce" externurl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=960f68693454dfa64b9966ca5d70dbd3&amp;filekey=30340201010420301e020201060402535a0410960f68693454dfa64b9966ca5d70dbd3020221a0040d00000004627466730000000132&amp;hy=SZ&amp;storeid=26423dbe3000793a8720e40de0000010600004f50535a1d40c950b71be0a50&amp;bizid=1023" externmd5 = "41895664fc5a77878e2155fc96209a19" width= "240" height= "240" tpurl= "" tpauthkey= "" attachedtext= "" attachedtextcolor= "" lensid= "" emojiattr= "" linkid= "" desc= "ChEKB2RlZmF1bHQSBuWNlee6rw==" ></emoji> </msg>'
# res1 = parser_xml(xml_string)