修改导出docx的方式

This commit is contained in:
shuaikangzhou 2024-01-18 23:06:37 +08:00
parent 62154823cf
commit 4fa56b888a
4 changed files with 98 additions and 21 deletions

View File

@ -25,6 +25,7 @@ encoded_chars = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x
# 创建一个字典,将要删除的字符映射为 None # 创建一个字典,将要删除的字符映射为 None
char_mapping = {char: None for char in encoded_chars} char_mapping = {char: None for char in encoded_chars}
def filter_control_characters(input_string): def filter_control_characters(input_string):
""" """
过滤掉不可打印字符 过滤掉不可打印字符
@ -39,6 +40,84 @@ def filter_control_characters(input_string):
class DocxExporter(ExporterBase): class DocxExporter(ExporterBase):
def merge_docx(self, n):
self.process_num += 1
conRemark = self.contact.remark
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}"
filename = f"{origin_docx_path}/{conRemark}_{n}.docx"
# print(all_file_path)
doc = docx.Document(filename)
if self.merged_doc_index == [-1, -1]:
self.document.append(doc)
self.merged_doc_index = [n, n]
else:
if n == self.merged_doc_index[0] - 1:
self.document.insert(0, doc)
self.merged_doc_index[0] -= 1
elif n == self.merged_doc_index[1] + 1:
self.document.append(doc)
self.merged_doc_index[1] += 1
else:
self.docs.append([doc, n])
self.docs_set.add(n)
new_docx = []
new_set = set()
# print(self.docs)
while new_set!=self.docs_set:
self.docs.sort(key=lambda x: x[1])
for doc_, index in self.docs:
if index == self.merged_doc_index[0] - 1:
self.document.insert(0, doc_)
self.merged_doc_index[0] -= 1
elif index == self.merged_doc_index[1] + 1:
self.document.append(doc_)
self.merged_doc_index[1] += 1
else:
new_docx.append([doc_, index])
new_set.add(index)
self.docs = new_docx
self.docs_set = new_set
os.remove(filename)
if self.process_num == self.child_thread_num:
# self.document.append(self.document)
file = os.path.join(origin_docx_path, f'{conRemark}.docx')
try:
self.document.save(file)
except PermissionError:
file = file[:-5] + f'{time.time()}' + '.docx'
self.document.save(file)
self.okSignal.emit(1)
def export(self):
self.child_threads = []
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
# 计算每个子列表的长度
num = 1
# num = len(messages) // 500 +1
sublist_length = len(messages) // num
# 使用列表切片将列表分成n个子列表
divided_list = [messages[i:i + sublist_length] for i in range(0, len(messages), sublist_length)]
self.child_thread_num = len(divided_list)
self.process_num = 0
doc = docx.Document()
doc.styles["Normal"].font.name = "Cambria"
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
self.document = Composer(doc)
self.merged_doc_index = [-1, -1]
self.docs = []
self.docs_set = set()
# self.document.append(self.document)
for i in range(self.child_thread_num):
child_thread = DocxExporterChildThread(self.contact, type_=self.DOCX, message_types=self.message_types,
time_range=self.time_range, messages=divided_list[i], index=i)
self.child_threads.append(child_thread)
child_thread.okSignal.connect(self.merge_docx)
child_thread.progressSignal.connect(self.progressSignal)
child_thread.start()
class DocxExporterChildThread(ExporterBase):
def text(self, doc, message): def text(self, doc, message):
type_ = message[2] type_ = message[2]
str_content = message[7] str_content = message[7]
@ -313,7 +392,7 @@ class DocxExporter(ExporterBase):
def export(self): def export(self):
print(f"【开始导出 DOCX {self.contact.remark}") print(f"【开始导出 DOCX {self.contact.remark}")
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) messages = self.messages
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png")) Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
if self.contact.is_chatroom: if self.contact.is_chatroom:
for message in messages: for message in messages:
@ -329,6 +408,8 @@ class DocxExporter(ExporterBase):
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png")) self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
self.rangeSignal.emit(len(messages)) self.rangeSignal.emit(len(messages))
index = 0
def newdoc(): def newdoc():
nonlocal n, doc nonlocal n, doc
doc = docx.Document() doc = docx.Document()
@ -347,8 +428,8 @@ class DocxExporter(ExporterBase):
for index, message in enumerate(messages): for index, message in enumerate(messages):
if index % 200 == 0 and index: if index % 200 == 0 and index:
# doc = document.add_paragraph() # doc = document.add_paragraph()
filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx") # filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx")
doc.save(filename) # doc.save(filename)
newdoc() newdoc()
type_ = message[2] type_ = message[2]
@ -391,6 +472,7 @@ class DocxExporter(ExporterBase):
for index, dx in enumerate(docs[::-1]): for index, dx in enumerate(docs[::-1]):
print(f"【合并 DOCX {self.contact.remark}{index + 1}/{len(docs)}") print(f"【合并 DOCX {self.contact.remark}{index + 1}/{len(docs)}")
doc.insert(0, dx) doc.insert(0, dx)
filename = os.path.join(origin_docx_path, f"{self.contact.remark}_{self.index}.docx")
try: try:
# document.save(filename) # document.save(filename)
doc.save(filename) doc.save(filename)
@ -399,4 +481,4 @@ class DocxExporter(ExporterBase):
# document.save(filename) # document.save(filename)
doc.save(filename) doc.save(filename)
print(f"【完成导出 DOCX {self.contact.remark}") print(f"【完成导出 DOCX {self.contact.remark}")
self.okSignal.emit(1) self.okSignal.emit(self.index)

View File

@ -127,10 +127,7 @@ class HtmlExporter(ExporterBase):
if file_path != "": if file_path != "":
file_path = './file/' + file_info.get('file_name') file_path = './file/' + file_info.get('file_name')
doc.write( doc.write(
f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp} f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}',sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}',app_name: '{file_info.get('app_name')}'}},'''
,is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}'
,sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}'
,app_name: '{file_info.get('app_name')}'}},'''
) )
def refermsg(self, doc, message): def refermsg(self, doc, message):
@ -234,9 +231,7 @@ class HtmlExporter(ExporterBase):
display_name = self.get_display_name(is_send, message) display_name = self.get_display_name(is_send, message)
music_path = escape_js_and_html(music_path) music_path = escape_js_and_html(music_path)
doc.write( doc.write(
f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}', f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',
artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
) )
def share_card(self, doc, message): def share_card(self, doc, message):
@ -266,11 +261,7 @@ class HtmlExporter(ExporterBase):
else: else:
app_logo = card_data.get('app_logo') app_logo = card_data.get('app_logo')
doc.write( doc.write(
f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}', f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',app_name:'{card_data.get('app_name')}'}},\n'''
timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',
description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',
app_name:'{card_data.get('app_name')}'
}},\n'''
) )
def export(self): def export(self):

View File

@ -90,19 +90,23 @@ class ExporterBase(QThread):
CONTACT_CSV = 4 CONTACT_CSV = 4
TXT = 5 TXT = 5
def __init__(self, contact, type_=DOCX, message_types={},time_range=None, parent=None): def __init__(self, contact, type_=DOCX, message_types={}, time_range=None, messages=None,index=0, parent=None):
super().__init__(parent) super().__init__(parent)
self.message_types = message_types # 导出的消息类型 self.message_types = message_types # 导出的消息类型
self.contact: Contact = contact # 联系人 self.contact: Contact = contact # 联系人
self.output_type = type_ # 导出文件类型 self.output_type = type_ # 导出文件类型
self.total_num = 1 # 总的消息数量 self.total_num = 1 # 总的消息数量
self.num = 0 # 当前处理的消息数量 self.num = 0 # 当前处理的消息数量
self.index = index #
self.last_timestamp = 0 self.last_timestamp = 0
self.time_range = time_range self.time_range = time_range
self.messages = messages
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
makedirs(origin_docx_path) makedirs(origin_docx_path)
def run(self): def run(self):
self.export() self.export()
def export(self): def export(self):
raise NotImplementedError("export method must be implemented in subclasses") raise NotImplementedError("export method must be implemented in subclasses")

View File

@ -3,7 +3,7 @@ import os
import traceback import traceback
from typing import List from typing import List
from PyQt5.QtCore import pyqtSignal, QThread from PyQt5.QtCore import pyqtSignal, QThread, QObject
from PyQt5.QtWidgets import QFileDialog from PyQt5.QtWidgets import QFileDialog
from app.DataBase.exporter_csv import CSVExporter from app.DataBase.exporter_csv import CSVExporter
@ -20,7 +20,7 @@ from ..util.image import get_image
os.makedirs('./data/聊天记录', exist_ok=True) os.makedirs('./data/聊天记录', exist_ok=True)
class Output(QThread): class Output(QObject):
""" """
发送信息线程 发送信息线程
""" """
@ -222,7 +222,7 @@ class Output(QThread):
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one) Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
Child.start() Child.start()
def run(self): def start(self):
if self.output_type == self.DOCX: if self.output_type == self.DOCX:
self.to_docx(self.contact, self.message_types) self.to_docx(self.contact, self.message_types)
elif self.output_type == self.CSV_ALL: elif self.output_type == self.CSV_ALL: