2023-11-25 00:40:09 +08:00
|
|
|
|
import csv
|
2023-11-28 21:30:56 +08:00
|
|
|
|
import html
|
2023-11-15 23:53:39 +08:00
|
|
|
|
import os
|
2023-12-10 18:55:17 +08:00
|
|
|
|
from re import findall
|
2023-12-22 21:21:30 +08:00
|
|
|
|
from PyQt5.QtCore import pyqtSignal, QThread, QFile, QIODevice, QTextStream
|
2023-12-07 20:35:51 +08:00
|
|
|
|
from PyQt5.QtWidgets import QFileDialog
|
2023-12-17 20:31:38 +08:00
|
|
|
|
# from eyed3 import load
|
2023-11-16 22:39:59 +08:00
|
|
|
|
|
2023-12-07 20:35:51 +08:00
|
|
|
|
from . import msg_db, micro_msg_db
|
2023-12-06 15:34:56 +08:00
|
|
|
|
from .package_msg import PackageMsg
|
2023-12-01 22:37:45 +08:00
|
|
|
|
from ..DataBase import hard_link_db
|
2023-12-12 01:17:21 +08:00
|
|
|
|
from ..DataBase import media_msg_db
|
2023-12-13 22:12:50 +08:00
|
|
|
|
from ..person import MePC
|
2023-12-06 20:50:32 +08:00
|
|
|
|
from ..util import path
|
2023-12-09 22:48:15 +08:00
|
|
|
|
import shutil
|
|
|
|
|
|
2023-12-11 22:49:17 +08:00
|
|
|
|
from ..util.compress_content import parser_reply
|
2023-12-09 22:48:15 +08:00
|
|
|
|
from ..util.emoji import get_emoji
|
2023-11-28 21:51:49 +08:00
|
|
|
|
|
2023-12-03 00:03:00 +08:00
|
|
|
|
os.makedirs('./data/聊天记录', exist_ok=True)
|
2023-11-20 22:30:31 +08:00
|
|
|
|
|
2023-11-15 23:53:39 +08:00
|
|
|
|
|
2023-12-06 20:50:32 +08:00
|
|
|
|
def makedirs(path):
|
2023-12-07 20:35:51 +08:00
|
|
|
|
os.makedirs(path, exist_ok=True)
|
2023-12-06 20:50:32 +08:00
|
|
|
|
os.makedirs(os.path.join(path, 'image'), exist_ok=True)
|
|
|
|
|
os.makedirs(os.path.join(path, 'emoji'), exist_ok=True)
|
|
|
|
|
os.makedirs(os.path.join(path, 'video'), exist_ok=True)
|
|
|
|
|
os.makedirs(os.path.join(path, 'voice'), exist_ok=True)
|
|
|
|
|
os.makedirs(os.path.join(path, 'file'), exist_ok=True)
|
2023-12-17 18:33:06 +08:00
|
|
|
|
os.makedirs(os.path.join(path, 'avatar'), exist_ok=True)
|
2023-12-06 20:50:32 +08:00
|
|
|
|
|
|
|
|
|
|
2023-11-28 21:30:56 +08:00
|
|
|
|
def escape_js_and_html(input_str):
|
|
|
|
|
# 转义HTML特殊字符
|
|
|
|
|
html_escaped = html.escape(input_str, quote=False)
|
|
|
|
|
|
|
|
|
|
# 手动处理JavaScript转义字符
|
|
|
|
|
js_escaped = (
|
|
|
|
|
html_escaped
|
|
|
|
|
.replace("\\", "\\\\")
|
|
|
|
|
.replace("'", r"\'")
|
|
|
|
|
.replace('"', r'\"')
|
|
|
|
|
.replace("\n", r'\n')
|
|
|
|
|
.replace("\r", r'\r')
|
|
|
|
|
.replace("\t", r'\t')
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return js_escaped
|
|
|
|
|
|
|
|
|
|
|
2023-11-15 23:53:39 +08:00
|
|
|
|
class Output(QThread):
|
|
|
|
|
"""
|
|
|
|
|
发送信息线程
|
|
|
|
|
"""
|
|
|
|
|
progressSignal = pyqtSignal(int)
|
|
|
|
|
rangeSignal = pyqtSignal(int)
|
|
|
|
|
okSignal = pyqtSignal(int)
|
|
|
|
|
i = 1
|
|
|
|
|
CSV = 0
|
|
|
|
|
DOCX = 1
|
|
|
|
|
HTML = 2
|
2023-11-27 21:23:26 +08:00
|
|
|
|
CSV_ALL = 3
|
2023-12-07 20:35:51 +08:00
|
|
|
|
CONTACT_CSV = 4
|
2023-12-10 19:36:08 +08:00
|
|
|
|
TXT = 5
|
2023-11-15 23:53:39 +08:00
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def __init__(self, contact, type_=DOCX, message_types={}, parent=None):
|
2023-11-15 23:53:39 +08:00
|
|
|
|
super().__init__(parent)
|
2023-11-27 21:23:26 +08:00
|
|
|
|
self.Child0 = None
|
2023-11-21 22:23:23 +08:00
|
|
|
|
self.last_timestamp = 0
|
2023-12-10 18:55:17 +08:00
|
|
|
|
self.message_types = message_types
|
2023-11-15 23:53:39 +08:00
|
|
|
|
self.sec = 2 # 默认1000秒
|
2023-11-16 22:39:59 +08:00
|
|
|
|
self.contact = contact
|
2023-11-27 21:23:26 +08:00
|
|
|
|
self.ta_username = contact.wxid if contact else ''
|
2023-11-15 23:53:39 +08:00
|
|
|
|
self.msg_id = 0
|
|
|
|
|
self.output_type = type_
|
|
|
|
|
self.total_num = 0
|
2023-11-16 22:39:59 +08:00
|
|
|
|
self.num = 0
|
2023-11-15 23:53:39 +08:00
|
|
|
|
|
2023-11-22 00:22:50 +08:00
|
|
|
|
def progress(self, value):
|
|
|
|
|
self.progressSignal.emit(value)
|
|
|
|
|
|
2023-11-27 21:23:26 +08:00
|
|
|
|
def to_csv_all(self):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/"
|
2023-12-03 00:03:00 +08:00
|
|
|
|
os.makedirs(origin_docx_path, exist_ok=True)
|
2023-12-09 22:48:15 +08:00
|
|
|
|
filename = QFileDialog.getSaveFileName(None, "save file", os.path.join(os.getcwd(), 'messages.csv'),
|
|
|
|
|
"csv files (*.csv);;all files(*.*)")
|
|
|
|
|
if not filename[0]:
|
2023-12-07 20:35:51 +08:00
|
|
|
|
return
|
|
|
|
|
filename = filename[0]
|
2023-11-27 21:23:26 +08:00
|
|
|
|
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
|
|
|
|
columns = ['localId', 'TalkerId', 'Type', 'SubType',
|
|
|
|
|
'IsSender', 'CreateTime', 'Status', 'StrContent',
|
2023-12-06 15:34:56 +08:00
|
|
|
|
'StrTime', 'Remark', 'NickName', 'Sender']
|
2023-12-07 20:35:51 +08:00
|
|
|
|
|
2023-12-06 15:34:56 +08:00
|
|
|
|
packagemsg = PackageMsg()
|
|
|
|
|
messages = packagemsg.get_package_message_all()
|
2023-11-27 21:23:26 +08:00
|
|
|
|
# 写入CSV文件
|
|
|
|
|
with open(filename, mode='w', newline='', encoding='utf-8') as file:
|
|
|
|
|
writer = csv.writer(file)
|
|
|
|
|
writer.writerow(columns)
|
|
|
|
|
# 写入数据
|
|
|
|
|
writer.writerows(messages)
|
|
|
|
|
self.okSignal.emit(1)
|
|
|
|
|
|
2023-12-07 20:35:51 +08:00
|
|
|
|
def contact_to_csv(self):
|
2023-12-09 22:48:15 +08:00
|
|
|
|
filename = QFileDialog.getSaveFileName(None, "save file", os.path.join(os.getcwd(), 'contacts.csv'),
|
|
|
|
|
"csv files (*.csv);;all files(*.*)")
|
|
|
|
|
if not filename[0]:
|
2023-12-07 20:35:51 +08:00
|
|
|
|
return
|
|
|
|
|
filename = filename[0]
|
|
|
|
|
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
2023-12-09 22:48:15 +08:00
|
|
|
|
columns = ['UserName', 'Alias', 'Type', 'Remark', 'NickName', 'PYInitial', 'RemarkPYInitial', 'smallHeadImgUrl',
|
|
|
|
|
'bigHeadImgUrl']
|
2023-12-07 20:35:51 +08:00
|
|
|
|
contacts = micro_msg_db.get_contact()
|
|
|
|
|
# 写入CSV文件
|
|
|
|
|
with open(filename, mode='w', newline='', encoding='utf-8') as file:
|
|
|
|
|
writer = csv.writer(file)
|
|
|
|
|
writer.writerow(columns)
|
|
|
|
|
# 写入数据
|
|
|
|
|
writer.writerows(contacts)
|
|
|
|
|
self.okSignal.emit(1)
|
|
|
|
|
|
2023-11-22 00:22:50 +08:00
|
|
|
|
def run(self):
|
|
|
|
|
if self.output_type == self.DOCX:
|
|
|
|
|
return
|
2023-11-27 21:23:26 +08:00
|
|
|
|
elif self.output_type == self.CSV_ALL:
|
|
|
|
|
self.to_csv_all()
|
2023-12-07 20:35:51 +08:00
|
|
|
|
elif self.output_type == self.CONTACT_CSV:
|
|
|
|
|
self.contact_to_csv()
|
2023-11-27 21:23:26 +08:00
|
|
|
|
else:
|
2023-12-10 19:36:08 +08:00
|
|
|
|
self.Child = ChildThread(self.contact, type_=self.output_type, message_types=self.message_types)
|
2023-12-02 22:02:17 +08:00
|
|
|
|
self.Child.progressSignal.connect(self.progress)
|
|
|
|
|
self.Child.rangeSignal.connect(self.rangeSignal)
|
|
|
|
|
self.Child.okSignal.connect(self.okSignal)
|
|
|
|
|
self.Child.start()
|
2023-11-27 21:23:26 +08:00
|
|
|
|
|
|
|
|
|
def cancel(self):
|
|
|
|
|
self.requestInterruption()
|
2023-11-22 00:22:50 +08:00
|
|
|
|
|
|
|
|
|
|
2023-12-15 14:48:54 +08:00
|
|
|
|
def modify_audio_metadata(audiofile, new_artist): # 修改音频元数据中的“创作者”标签
|
2023-12-17 20:31:38 +08:00
|
|
|
|
return
|
2023-12-15 14:48:54 +08:00
|
|
|
|
audiofile = load(audiofile)
|
|
|
|
|
|
|
|
|
|
# 检查文件是否有标签
|
|
|
|
|
if audiofile.tag is None:
|
|
|
|
|
audiofile.initTag()
|
|
|
|
|
|
|
|
|
|
# 修改艺术家名称
|
|
|
|
|
audiofile.tag.artist = new_artist
|
|
|
|
|
audiofile.tag.save()
|
|
|
|
|
|
|
|
|
|
|
2023-11-22 00:22:50 +08:00
|
|
|
|
class ChildThread(QThread):
|
|
|
|
|
"""
|
|
|
|
|
子线程,用于导出部分聊天记录
|
|
|
|
|
"""
|
|
|
|
|
progressSignal = pyqtSignal(int)
|
|
|
|
|
rangeSignal = pyqtSignal(int)
|
|
|
|
|
okSignal = pyqtSignal(int)
|
|
|
|
|
i = 1
|
|
|
|
|
CSV = 0
|
|
|
|
|
DOCX = 1
|
|
|
|
|
HTML = 2
|
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def __init__(self, contact, type_=DOCX, message_types={}, parent=None):
|
2023-11-22 00:22:50 +08:00
|
|
|
|
super().__init__(parent)
|
|
|
|
|
self.contact = contact
|
2023-12-10 18:55:17 +08:00
|
|
|
|
self.message_types = message_types
|
2023-11-22 00:22:50 +08:00
|
|
|
|
self.last_timestamp = 0
|
|
|
|
|
self.sec = 2 # 默认1000秒
|
|
|
|
|
self.msg_id = 0
|
|
|
|
|
self.output_type = type_
|
|
|
|
|
|
|
|
|
|
def is_5_min(self, timestamp):
|
|
|
|
|
if abs(timestamp - self.last_timestamp) > 300:
|
|
|
|
|
self.last_timestamp = timestamp
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def text(self, doc, message):
|
|
|
|
|
type_ = message[2]
|
|
|
|
|
str_content = message[7]
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
is_send = message[4]
|
|
|
|
|
timestamp = message[5]
|
2023-12-17 18:33:06 +08:00
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
|
|
|
|
if is_chatroom:
|
|
|
|
|
avatar = f"./avatar/{message[12].wxid}.png"
|
|
|
|
|
else:
|
2023-12-17 19:37:16 +08:00
|
|
|
|
avatar = f"./avatar/{MePC().wxid if is_send else self.contact.wxid}.png"
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if is_chatroom:
|
|
|
|
|
if is_send:
|
|
|
|
|
displayname = MePC().name
|
|
|
|
|
else:
|
|
|
|
|
displayname = message[12].remark
|
|
|
|
|
else:
|
|
|
|
|
displayname = MePC().name if is_send else self.contact.remark
|
|
|
|
|
displayname = escape_js_and_html(displayname)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
|
|
|
|
str_content = escape_js_and_html(str_content)
|
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:{1}, text: '{str_content}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-10 19:36:08 +08:00
|
|
|
|
)
|
|
|
|
|
elif self.output_type == Output.TXT:
|
|
|
|
|
name = '你' if is_send else self.contact.remark
|
2023-12-10 18:55:17 +08:00
|
|
|
|
doc.write(
|
2023-12-10 19:36:08 +08:00
|
|
|
|
f'''{str_time} {name}\n{str_content}\n\n'''
|
2023-12-10 18:55:17 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def image(self, doc, message):
|
2023-12-13 21:03:03 +08:00
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
2023-12-10 18:55:17 +08:00
|
|
|
|
type_ = message[2]
|
|
|
|
|
str_content = message[7]
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
is_send = message[4]
|
|
|
|
|
BytesExtra = message[10]
|
2023-12-17 18:33:06 +08:00
|
|
|
|
timestamp = message[5]
|
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
|
|
|
|
if is_chatroom:
|
|
|
|
|
avatar = f"./avatar/{message[12].wxid}.png"
|
|
|
|
|
else:
|
2023-12-17 19:37:16 +08:00
|
|
|
|
avatar = f"./avatar/{MePC().wxid if is_send else self.contact.wxid}.png"
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if is_chatroom:
|
|
|
|
|
if is_send:
|
|
|
|
|
displayname = MePC().name
|
|
|
|
|
else:
|
|
|
|
|
displayname = message[12].remark
|
|
|
|
|
else:
|
|
|
|
|
displayname = MePC().name if is_send else self.contact.remark
|
|
|
|
|
displayname = escape_js_and_html(displayname)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
|
|
|
|
str_content = escape_js_and_html(str_content)
|
|
|
|
|
image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False)
|
|
|
|
|
image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True)
|
2023-12-12 10:32:16 +08:00
|
|
|
|
if not os.path.exists(os.path.join(MePC().wx_dir, image_path)):
|
|
|
|
|
image_path = None
|
|
|
|
|
if not os.path.exists(os.path.join(MePC().wx_dir, image_thumb_path)):
|
|
|
|
|
image_thumb_path = None
|
2023-12-10 19:36:08 +08:00
|
|
|
|
if image_path is None and image_thumb_path is not None:
|
|
|
|
|
image_path = image_thumb_path
|
|
|
|
|
if image_path is None and image_thumb_path is None:
|
|
|
|
|
return
|
|
|
|
|
image_path = path.get_relative_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image')
|
2023-12-13 21:03:03 +08:00
|
|
|
|
image_path = image_path.replace('/', '\\')
|
2023-12-17 22:28:57 +08:00
|
|
|
|
try:
|
|
|
|
|
os.utime(origin_docx_path + image_path[1:], (timestamp, timestamp))
|
|
|
|
|
except:
|
|
|
|
|
print("网络图片",image_path)
|
|
|
|
|
pass
|
2023-12-10 19:36:08 +08:00
|
|
|
|
image_path = image_path.replace('\\', '/')
|
2023-12-10 18:55:17 +08:00
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:{type_}, text: '{image_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-10 19:36:08 +08:00
|
|
|
|
)
|
|
|
|
|
elif self.output_type == Output.TXT:
|
|
|
|
|
name = '你' if is_send else self.contact.remark
|
|
|
|
|
doc.write(
|
|
|
|
|
f'''{str_time} {name}\n[图片]\n\n'''
|
2023-12-10 18:55:17 +08:00
|
|
|
|
)
|
2023-11-22 00:22:50 +08:00
|
|
|
|
|
2023-12-12 01:17:21 +08:00
|
|
|
|
def audio(self, doc, message):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
|
|
|
|
str_content = message[7]
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
is_send = message[4]
|
|
|
|
|
msgSvrId = message[9]
|
2023-12-17 18:33:06 +08:00
|
|
|
|
timestamp = message[5]
|
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
|
|
|
|
if is_chatroom:
|
|
|
|
|
avatar = f"./avatar/{message[12].wxid}.png"
|
|
|
|
|
else:
|
2023-12-17 19:37:16 +08:00
|
|
|
|
avatar = f"./avatar/{MePC().wxid if is_send else self.contact.wxid}.png"
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if is_chatroom:
|
|
|
|
|
if is_send:
|
|
|
|
|
displayname = MePC().name
|
|
|
|
|
else:
|
|
|
|
|
displayname = message[12].remark
|
|
|
|
|
else:
|
|
|
|
|
displayname = MePC().name if is_send else self.contact.remark
|
|
|
|
|
displayname = escape_js_and_html(displayname)
|
2023-12-12 01:17:21 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
|
|
|
|
try:
|
|
|
|
|
audio_path = media_msg_db.get_audio(msgSvrId, output_path=origin_docx_path + "/voice")
|
2023-12-13 21:03:03 +08:00
|
|
|
|
audio_path = audio_path.replace('/', '\\')
|
2023-12-17 18:33:06 +08:00
|
|
|
|
modify_audio_metadata(audio_path, displayname)
|
2023-12-13 21:03:03 +08:00
|
|
|
|
os.utime(audio_path, (timestamp, timestamp))
|
2023-12-12 01:17:21 +08:00
|
|
|
|
audio_path = audio_path.replace('\\', '/')
|
2023-12-19 14:26:15 +08:00
|
|
|
|
audio_path = "./voice/" + os.path.basename(audio_path)
|
2023-12-16 12:58:29 +08:00
|
|
|
|
voice_to_text = escape_js_and_html(media_msg_db.get_audio_text(str_content))
|
2023-12-12 01:17:21 +08:00
|
|
|
|
except:
|
|
|
|
|
return
|
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:34, text:'{audio_path}',is_send:{is_send},avatar_path:'{avatar}',voice_to_text:'{voice_to_text}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-12 01:17:21 +08:00
|
|
|
|
)
|
2023-12-13 21:23:43 +08:00
|
|
|
|
if self.output_type == Output.TXT:
|
|
|
|
|
name = '你' if is_send else self.contact.remark
|
|
|
|
|
doc.write(
|
|
|
|
|
f'''{str_time} {name}\n[语音]\n\n'''
|
|
|
|
|
)
|
2023-12-12 01:17:21 +08:00
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def emoji(self, doc, message):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
|
|
|
|
str_content = message[7]
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
is_send = message[4]
|
|
|
|
|
timestamp = message[5]
|
2023-12-17 18:33:06 +08:00
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
|
|
|
|
if is_chatroom:
|
|
|
|
|
avatar = f"./avatar/{message[12].wxid}.png"
|
|
|
|
|
else:
|
2023-12-17 19:37:16 +08:00
|
|
|
|
avatar = f"./avatar/{MePC().wxid if is_send else self.contact.wxid}.png"
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if is_chatroom:
|
|
|
|
|
if is_send:
|
|
|
|
|
displayname = MePC().name
|
|
|
|
|
else:
|
|
|
|
|
displayname = message[12].remark
|
|
|
|
|
else:
|
|
|
|
|
displayname = MePC().name if is_send else self.contact.remark
|
|
|
|
|
displayname = escape_js_and_html(displayname)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
|
|
|
|
emoji_path = get_emoji(str_content, thumb=True, output_path=origin_docx_path + '/emoji')
|
2023-12-22 19:22:30 +08:00
|
|
|
|
emoji_path = './emoji/' + os.path.basename(emoji_path)
|
2023-12-10 18:55:17 +08:00
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:{3}, text: '{emoji_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-10 19:36:08 +08:00
|
|
|
|
)
|
|
|
|
|
elif self.output_type == Output.TXT:
|
|
|
|
|
name = '你' if is_send else self.contact.remark
|
|
|
|
|
doc.write(
|
|
|
|
|
f'''{str_time} {name}\n[表情包]\n\n'''
|
2023-12-10 18:55:17 +08:00
|
|
|
|
)
|
2023-11-22 00:22:50 +08:00
|
|
|
|
|
|
|
|
|
def wx_file(self, doc, isSend, content, status):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
def retract_message(self, doc, isSend, content, status):
|
|
|
|
|
return
|
|
|
|
|
|
2023-12-17 13:15:49 +08:00
|
|
|
|
def refermsg(self, doc, message):
|
2023-12-11 22:49:17 +08:00
|
|
|
|
"""
|
|
|
|
|
处理回复消息
|
|
|
|
|
@param doc:
|
|
|
|
|
@param message:
|
|
|
|
|
@return:
|
|
|
|
|
"""
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
is_send = message[4]
|
|
|
|
|
content = parser_reply(message[11])
|
|
|
|
|
refer_msg = content.get('refer')
|
2023-12-17 18:33:06 +08:00
|
|
|
|
timestamp = message[5]
|
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
|
|
|
|
if is_chatroom:
|
|
|
|
|
avatar = f"./avatar/{message[12].wxid}.png"
|
|
|
|
|
else:
|
2023-12-17 19:37:16 +08:00
|
|
|
|
avatar = f"./avatar/{MePC().wxid if is_send else self.contact.wxid}.png"
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if is_chatroom:
|
|
|
|
|
if is_send:
|
|
|
|
|
displayname = MePC().name
|
|
|
|
|
else:
|
|
|
|
|
displayname = message[12].remark
|
|
|
|
|
else:
|
|
|
|
|
displayname = MePC().name if is_send else self.contact.remark
|
|
|
|
|
displayname = escape_js_and_html(displayname)
|
2023-12-11 22:49:17 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
2023-12-20 18:19:17 +08:00
|
|
|
|
contentText = escape_js_and_html(content.get('title'))
|
2023-12-13 13:19:31 +08:00
|
|
|
|
if refer_msg:
|
2023-12-20 18:19:17 +08:00
|
|
|
|
referText = f"{escape_js_and_html(refer_msg.get('displayname'))}:{escape_js_and_html(refer_msg.get('content'))}"
|
2023-12-13 13:19:31 +08:00
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:49, text: '{contentText}',is_send:{is_send},sub_type:{content.get('type')},refer_text: '{referText}',avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-13 13:19:31 +08:00
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:49, text: '{contentText}',is_send:{is_send},sub_type:{content.get('type')},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-13 13:19:31 +08:00
|
|
|
|
)
|
2023-12-17 13:15:49 +08:00
|
|
|
|
elif self.output_type == Output.TXT:
|
2023-12-11 22:49:17 +08:00
|
|
|
|
name = '你' if is_send else self.contact.remark
|
2023-12-13 20:45:53 +08:00
|
|
|
|
if refer_msg:
|
|
|
|
|
doc.write(
|
|
|
|
|
f'''{str_time} {name}\n{content.get('title')}\n引用:{refer_msg.get('displayname')}:{refer_msg.get('content')}\n\n'''
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
doc.write(
|
|
|
|
|
f'''{str_time} {name}\n{content.get('title')}\n引用:未知\n\n'''
|
|
|
|
|
)
|
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def system_msg(self, doc, message):
|
|
|
|
|
str_content = message[7]
|
|
|
|
|
is_send = message[4]
|
2023-12-10 19:36:08 +08:00
|
|
|
|
str_time = message[8]
|
2023-12-17 18:33:06 +08:00
|
|
|
|
timestamp = message[5]
|
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
2023-12-17 22:09:44 +08:00
|
|
|
|
str_content = str_content.replace('<![CDATA[', "").replace(' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
|
|
|
|
|
res = findall('(</{0,1}(img|revo|_wc_cus|a).*?>)', str_content)
|
2023-12-17 21:52:44 +08:00
|
|
|
|
for xmlstr, b in res:
|
|
|
|
|
str_content = str_content.replace(xmlstr, "")
|
|
|
|
|
str_content = escape_js_and_html(str_content)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:0, text: '{str_content}',is_send:{is_send},avatar_path:'',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:''}},'''
|
2023-12-10 19:36:08 +08:00
|
|
|
|
)
|
|
|
|
|
elif self.output_type == Output.TXT:
|
|
|
|
|
name = '你' if is_send else self.contact.remark
|
|
|
|
|
doc.write(
|
|
|
|
|
f'''{str_time} {name}\n{str_content}\n\n'''
|
|
|
|
|
)
|
2023-11-22 00:22:50 +08:00
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def video(self, doc, message):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
|
|
|
|
type_ = message[2]
|
|
|
|
|
str_content = message[7]
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
is_send = message[4]
|
|
|
|
|
BytesExtra = message[10]
|
|
|
|
|
timestamp = message[5]
|
2023-12-17 18:33:06 +08:00
|
|
|
|
is_chatroom = 1 if self.contact.is_chatroom else 0
|
|
|
|
|
if is_chatroom:
|
|
|
|
|
avatar = f"./avatar/{message[12].wxid}.png"
|
|
|
|
|
else:
|
2023-12-17 19:37:16 +08:00
|
|
|
|
avatar = f"./avatar/{MePC().wxid if is_send else self.contact.wxid}.png"
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if is_chatroom:
|
|
|
|
|
if is_send:
|
|
|
|
|
displayname = MePC().name
|
|
|
|
|
else:
|
|
|
|
|
displayname = message[12].remark
|
|
|
|
|
else:
|
|
|
|
|
displayname = MePC().name if is_send else self.contact.remark
|
|
|
|
|
displayname = escape_js_and_html(displayname)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
if self.output_type == Output.HTML:
|
|
|
|
|
video_path = hard_link_db.get_video(str_content, BytesExtra, thumb=False)
|
|
|
|
|
image_path = hard_link_db.get_video(str_content, BytesExtra, thumb=True)
|
|
|
|
|
if video_path is None and image_path is not None:
|
|
|
|
|
image_path = path.get_relative_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image')
|
2023-12-16 18:00:44 +08:00
|
|
|
|
try:
|
2023-12-16 20:06:43 +08:00
|
|
|
|
# todo 网络图片问题
|
2023-12-16 18:00:44 +08:00
|
|
|
|
print(origin_docx_path + image_path[1:])
|
|
|
|
|
os.utime(origin_docx_path + image_path[1:], (timestamp, timestamp))
|
|
|
|
|
image_path = image_path.replace('\\', '/')
|
|
|
|
|
# print(f"tohtml:---{image_path}")
|
2023-12-22 21:21:30 +08:00
|
|
|
|
|
2023-12-10 19:36:08 +08:00
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:3, text: '{image_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-16 18:00:44 +08:00
|
|
|
|
)
|
|
|
|
|
except:
|
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:1, text: '视频丢失',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-10 19:36:08 +08:00
|
|
|
|
)
|
|
|
|
|
return
|
|
|
|
|
if video_path is None and image_path is None:
|
|
|
|
|
return
|
|
|
|
|
video_path = f'{MePC().wx_dir}/{video_path}'
|
|
|
|
|
if os.path.exists(video_path):
|
|
|
|
|
new_path = origin_docx_path + '/video/' + os.path.basename(video_path)
|
|
|
|
|
if not os.path.exists(new_path):
|
|
|
|
|
shutil.copy(video_path, os.path.join(origin_docx_path, 'video'))
|
2023-12-13 21:03:03 +08:00
|
|
|
|
os.utime(new_path, (timestamp, timestamp))
|
2023-12-10 19:36:08 +08:00
|
|
|
|
video_path = f'./video/{os.path.basename(video_path)}'
|
|
|
|
|
video_path = video_path.replace('\\', '/')
|
2023-12-10 18:55:17 +08:00
|
|
|
|
doc.write(
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f'''{{ type:{type_}, text: '{video_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{displayname}'}},'''
|
2023-12-10 18:55:17 +08:00
|
|
|
|
)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
elif self.output_type == Output.TXT:
|
|
|
|
|
name = '你' if is_send else self.contact.remark
|
2023-12-10 18:55:17 +08:00
|
|
|
|
doc.write(
|
2023-12-10 19:36:08 +08:00
|
|
|
|
f'''{str_time} {name}\n[视频]\n\n'''
|
2023-12-10 18:55:17 +08:00
|
|
|
|
)
|
2023-11-22 00:22:50 +08:00
|
|
|
|
|
2023-11-25 00:40:09 +08:00
|
|
|
|
def to_csv(self):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
2023-12-03 00:03:00 +08:00
|
|
|
|
os.makedirs(origin_docx_path, exist_ok=True)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv"
|
2023-11-25 00:40:09 +08:00
|
|
|
|
# columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"]
|
|
|
|
|
columns = ['localId', 'TalkerId', 'Type', 'SubType',
|
|
|
|
|
'IsSender', 'CreateTime', 'Status', 'StrContent',
|
|
|
|
|
'StrTime']
|
2023-12-01 22:37:45 +08:00
|
|
|
|
messages = msg_db.get_messages(self.contact.wxid)
|
2023-11-25 00:40:09 +08:00
|
|
|
|
# 写入CSV文件
|
|
|
|
|
with open(filename, mode='w', newline='', encoding='utf-8') as file:
|
|
|
|
|
writer = csv.writer(file)
|
|
|
|
|
writer.writerow(columns)
|
|
|
|
|
# 写入数据
|
|
|
|
|
writer.writerows(messages)
|
|
|
|
|
self.okSignal.emit('ok')
|
|
|
|
|
|
2023-11-22 00:22:50 +08:00
|
|
|
|
def to_html_(self):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
2023-12-06 20:50:32 +08:00
|
|
|
|
makedirs(origin_docx_path)
|
2023-12-17 18:33:06 +08:00
|
|
|
|
if self.contact.is_chatroom:
|
|
|
|
|
packagemsg = PackageMsg()
|
|
|
|
|
messages = packagemsg.get_package_message_by_wxid(self.contact.wxid)
|
|
|
|
|
else:
|
|
|
|
|
messages = msg_db.get_messages(self.contact.wxid)
|
2023-11-22 00:22:50 +08:00
|
|
|
|
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html"
|
2023-12-22 21:21:30 +08:00
|
|
|
|
file = QFile(':/data/template.html')
|
|
|
|
|
if file.open(QIODevice.ReadOnly | QIODevice.Text):
|
|
|
|
|
stream = QTextStream(file)
|
|
|
|
|
stream.setCodec('utf-8')
|
|
|
|
|
content = stream.readAll()
|
|
|
|
|
file.close()
|
|
|
|
|
html_head,html_end = content.split('/*注意看这是分割线*/')
|
2023-11-22 00:22:50 +08:00
|
|
|
|
f = open(filename, 'w', encoding='utf-8')
|
2023-12-17 18:33:06 +08:00
|
|
|
|
f.write(html_head.replace("<title>Chat Records</title>", f"<title>{self.contact.remark}</title>"))
|
|
|
|
|
MePC().avatar.save(os.path.join(f"{origin_docx_path}/avatar/{MePC().wxid}.png"))
|
|
|
|
|
if self.contact.is_chatroom:
|
|
|
|
|
for message in messages:
|
|
|
|
|
if message[4]: # is_send
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
chatroom_avatar_path = f"{origin_docx_path}/avatar/{message[12].wxid}.png"
|
|
|
|
|
if not os.path.exists(chatroom_avatar_path):
|
|
|
|
|
message[12].avatar.save(chatroom_avatar_path)
|
|
|
|
|
except:
|
2023-12-22 18:39:30 +08:00
|
|
|
|
print(message)
|
2023-12-17 18:33:06 +08:00
|
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
self.contact.avatar.save(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
|
2023-12-10 18:55:17 +08:00
|
|
|
|
self.rangeSignal.emit(len(messages))
|
|
|
|
|
total_steps = len(messages)
|
|
|
|
|
for index, message in enumerate(messages):
|
|
|
|
|
type_ = message[2]
|
2023-12-11 22:49:17 +08:00
|
|
|
|
sub_type = message[3]
|
2023-12-22 21:21:30 +08:00
|
|
|
|
timestamp = message[5]
|
2023-12-10 19:36:08 +08:00
|
|
|
|
self.progressSignal.emit(int((index + 1) / total_steps * 100))
|
2023-12-22 21:21:30 +08:00
|
|
|
|
if self.is_5_min(timestamp):
|
|
|
|
|
str_time = message[8]
|
|
|
|
|
f.write(
|
|
|
|
|
f'''{{ type:0, text: '{str_time}',is_send:0,avatar_path:'',timestamp:{timestamp}}},'''
|
|
|
|
|
)
|
2023-12-10 18:55:17 +08:00
|
|
|
|
if type_ == 1 and self.message_types.get(type_):
|
|
|
|
|
self.text(f, message)
|
|
|
|
|
elif type_ == 3 and self.message_types.get(type_):
|
|
|
|
|
self.image(f, message)
|
2023-12-12 01:17:21 +08:00
|
|
|
|
elif type_ == 34 and self.message_types.get(type_):
|
|
|
|
|
self.audio(f, message)
|
2023-12-10 18:55:17 +08:00
|
|
|
|
elif type_ == 43 and self.message_types.get(type_):
|
|
|
|
|
self.video(f, message)
|
|
|
|
|
elif type_ == 47 and self.message_types.get(type_):
|
|
|
|
|
self.emoji(f, message)
|
|
|
|
|
elif type_ == 10000 and self.message_types.get(type_):
|
|
|
|
|
self.system_msg(f, message)
|
2023-12-17 18:48:22 +08:00
|
|
|
|
elif type_ == 49 and sub_type == 57 and self.message_types.get(1):
|
2023-12-17 13:15:49 +08:00
|
|
|
|
self.refermsg(f, message)
|
2023-12-10 18:55:17 +08:00
|
|
|
|
f.write(html_end)
|
|
|
|
|
f.close()
|
|
|
|
|
self.okSignal.emit(1)
|
|
|
|
|
|
2023-12-10 19:36:08 +08:00
|
|
|
|
def to_txt(self):
|
|
|
|
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
|
|
|
|
os.makedirs(origin_docx_path, exist_ok=True)
|
|
|
|
|
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt"
|
|
|
|
|
messages = msg_db.get_messages(self.contact.wxid)
|
|
|
|
|
total_steps = len(messages)
|
|
|
|
|
with open(filename, mode='w', newline='', encoding='utf-8') as f:
|
|
|
|
|
for index, message in enumerate(messages):
|
|
|
|
|
type_ = message[2]
|
2023-12-11 22:49:17 +08:00
|
|
|
|
sub_type = message[3]
|
2023-12-10 19:36:08 +08:00
|
|
|
|
self.progressSignal.emit(int((index + 1) / total_steps * 100))
|
|
|
|
|
if type_ == 1 and self.message_types.get(type_):
|
|
|
|
|
self.text(f, message)
|
|
|
|
|
elif type_ == 3 and self.message_types.get(type_):
|
|
|
|
|
self.image(f, message)
|
2023-12-13 21:23:43 +08:00
|
|
|
|
elif type_ == 34 and self.message_types.get(type_):
|
|
|
|
|
self.audio(f, message)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
elif type_ == 43 and self.message_types.get(type_):
|
|
|
|
|
self.video(f, message)
|
|
|
|
|
elif type_ == 47 and self.message_types.get(type_):
|
|
|
|
|
self.emoji(f, message)
|
|
|
|
|
elif type_ == 10000 and self.message_types.get(type_):
|
|
|
|
|
self.system_msg(f, message)
|
2023-12-11 22:49:17 +08:00
|
|
|
|
elif type_ == 49 and sub_type == 57:
|
|
|
|
|
self.refermsg(f, message)
|
2023-12-10 19:36:08 +08:00
|
|
|
|
self.okSignal.emit(1)
|
2023-12-17 13:15:49 +08:00
|
|
|
|
|
2023-12-10 18:55:17 +08:00
|
|
|
|
def run(self):
|
|
|
|
|
if self.output_type == Output.DOCX:
|
|
|
|
|
return
|
|
|
|
|
elif self.output_type == Output.CSV:
|
|
|
|
|
self.to_csv()
|
|
|
|
|
elif self.output_type == Output.HTML:
|
|
|
|
|
self.to_html_()
|
|
|
|
|
elif self.output_type == Output.CSV_ALL:
|
|
|
|
|
self.to_csv_all()
|
2023-12-10 19:36:08 +08:00
|
|
|
|
elif self.output_type == Output.TXT:
|
|
|
|
|
self.to_txt()
|
2023-12-10 18:55:17 +08:00
|
|
|
|
|
|
|
|
|
def cancel(self):
|
2023-12-22 21:21:30 +08:00
|
|
|
|
self.requestInterruption()
|