mirror of
https://github.com/LC044/WeChatMsg
synced 2025-02-23 11:42:20 +08:00
修改导出docx的方式
This commit is contained in:
parent
62154823cf
commit
4fa56b888a
@ -25,6 +25,7 @@ encoded_chars = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11\x
|
|||||||
# 创建一个字典,将要删除的字符映射为 None
|
# 创建一个字典,将要删除的字符映射为 None
|
||||||
char_mapping = {char: None for char in encoded_chars}
|
char_mapping = {char: None for char in encoded_chars}
|
||||||
|
|
||||||
|
|
||||||
def filter_control_characters(input_string):
|
def filter_control_characters(input_string):
|
||||||
"""
|
"""
|
||||||
过滤掉不可打印字符
|
过滤掉不可打印字符
|
||||||
@ -39,6 +40,84 @@ def filter_control_characters(input_string):
|
|||||||
|
|
||||||
|
|
||||||
class DocxExporter(ExporterBase):
|
class DocxExporter(ExporterBase):
|
||||||
|
def merge_docx(self, n):
|
||||||
|
self.process_num += 1
|
||||||
|
conRemark = self.contact.remark
|
||||||
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}"
|
||||||
|
filename = f"{origin_docx_path}/{conRemark}_{n}.docx"
|
||||||
|
# print(all_file_path)
|
||||||
|
doc = docx.Document(filename)
|
||||||
|
if self.merged_doc_index == [-1, -1]:
|
||||||
|
self.document.append(doc)
|
||||||
|
self.merged_doc_index = [n, n]
|
||||||
|
else:
|
||||||
|
if n == self.merged_doc_index[0] - 1:
|
||||||
|
self.document.insert(0, doc)
|
||||||
|
self.merged_doc_index[0] -= 1
|
||||||
|
elif n == self.merged_doc_index[1] + 1:
|
||||||
|
self.document.append(doc)
|
||||||
|
self.merged_doc_index[1] += 1
|
||||||
|
else:
|
||||||
|
self.docs.append([doc, n])
|
||||||
|
self.docs_set.add(n)
|
||||||
|
new_docx = []
|
||||||
|
new_set = set()
|
||||||
|
# print(self.docs)
|
||||||
|
while new_set!=self.docs_set:
|
||||||
|
self.docs.sort(key=lambda x: x[1])
|
||||||
|
for doc_, index in self.docs:
|
||||||
|
if index == self.merged_doc_index[0] - 1:
|
||||||
|
self.document.insert(0, doc_)
|
||||||
|
self.merged_doc_index[0] -= 1
|
||||||
|
elif index == self.merged_doc_index[1] + 1:
|
||||||
|
self.document.append(doc_)
|
||||||
|
self.merged_doc_index[1] += 1
|
||||||
|
else:
|
||||||
|
new_docx.append([doc_, index])
|
||||||
|
new_set.add(index)
|
||||||
|
self.docs = new_docx
|
||||||
|
self.docs_set = new_set
|
||||||
|
os.remove(filename)
|
||||||
|
if self.process_num == self.child_thread_num:
|
||||||
|
# self.document.append(self.document)
|
||||||
|
file = os.path.join(origin_docx_path, f'{conRemark}.docx')
|
||||||
|
try:
|
||||||
|
self.document.save(file)
|
||||||
|
except PermissionError:
|
||||||
|
file = file[:-5] + f'{time.time()}' + '.docx'
|
||||||
|
self.document.save(file)
|
||||||
|
self.okSignal.emit(1)
|
||||||
|
|
||||||
|
def export(self):
|
||||||
|
self.child_threads = []
|
||||||
|
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||||
|
# 计算每个子列表的长度
|
||||||
|
num = 1
|
||||||
|
# num = len(messages) // 500 +1
|
||||||
|
sublist_length = len(messages) // num
|
||||||
|
|
||||||
|
# 使用列表切片将列表分成n个子列表
|
||||||
|
divided_list = [messages[i:i + sublist_length] for i in range(0, len(messages), sublist_length)]
|
||||||
|
self.child_thread_num = len(divided_list)
|
||||||
|
self.process_num = 0
|
||||||
|
doc = docx.Document()
|
||||||
|
doc.styles["Normal"].font.name = "Cambria"
|
||||||
|
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
|
||||||
|
self.document = Composer(doc)
|
||||||
|
self.merged_doc_index = [-1, -1]
|
||||||
|
self.docs = []
|
||||||
|
self.docs_set = set()
|
||||||
|
# self.document.append(self.document)
|
||||||
|
for i in range(self.child_thread_num):
|
||||||
|
child_thread = DocxExporterChildThread(self.contact, type_=self.DOCX, message_types=self.message_types,
|
||||||
|
time_range=self.time_range, messages=divided_list[i], index=i)
|
||||||
|
self.child_threads.append(child_thread)
|
||||||
|
child_thread.okSignal.connect(self.merge_docx)
|
||||||
|
child_thread.progressSignal.connect(self.progressSignal)
|
||||||
|
child_thread.start()
|
||||||
|
|
||||||
|
|
||||||
|
class DocxExporterChildThread(ExporterBase):
|
||||||
def text(self, doc, message):
|
def text(self, doc, message):
|
||||||
type_ = message[2]
|
type_ = message[2]
|
||||||
str_content = message[7]
|
str_content = message[7]
|
||||||
@ -313,7 +392,7 @@ class DocxExporter(ExporterBase):
|
|||||||
def export(self):
|
def export(self):
|
||||||
print(f"【开始导出 DOCX {self.contact.remark}】")
|
print(f"【开始导出 DOCX {self.contact.remark}】")
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
||||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
messages = self.messages
|
||||||
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
|
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
|
||||||
if self.contact.is_chatroom:
|
if self.contact.is_chatroom:
|
||||||
for message in messages:
|
for message in messages:
|
||||||
@ -329,6 +408,8 @@ class DocxExporter(ExporterBase):
|
|||||||
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
|
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
|
||||||
self.rangeSignal.emit(len(messages))
|
self.rangeSignal.emit(len(messages))
|
||||||
|
|
||||||
|
index = 0
|
||||||
|
|
||||||
def newdoc():
|
def newdoc():
|
||||||
nonlocal n, doc
|
nonlocal n, doc
|
||||||
doc = docx.Document()
|
doc = docx.Document()
|
||||||
@ -347,8 +428,8 @@ class DocxExporter(ExporterBase):
|
|||||||
for index, message in enumerate(messages):
|
for index, message in enumerate(messages):
|
||||||
if index % 200 == 0 and index:
|
if index % 200 == 0 and index:
|
||||||
# doc = document.add_paragraph()
|
# doc = document.add_paragraph()
|
||||||
filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx")
|
# filename = os.path.join(origin_docx_path, f"{self.contact.remark}{n}.docx")
|
||||||
doc.save(filename)
|
# doc.save(filename)
|
||||||
newdoc()
|
newdoc()
|
||||||
|
|
||||||
type_ = message[2]
|
type_ = message[2]
|
||||||
@ -391,6 +472,7 @@ class DocxExporter(ExporterBase):
|
|||||||
for index, dx in enumerate(docs[::-1]):
|
for index, dx in enumerate(docs[::-1]):
|
||||||
print(f"【合并 DOCX {self.contact.remark}】{index + 1}/{len(docs)}")
|
print(f"【合并 DOCX {self.contact.remark}】{index + 1}/{len(docs)}")
|
||||||
doc.insert(0, dx)
|
doc.insert(0, dx)
|
||||||
|
filename = os.path.join(origin_docx_path, f"{self.contact.remark}_{self.index}.docx")
|
||||||
try:
|
try:
|
||||||
# document.save(filename)
|
# document.save(filename)
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
@ -399,4 +481,4 @@ class DocxExporter(ExporterBase):
|
|||||||
# document.save(filename)
|
# document.save(filename)
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
print(f"【完成导出 DOCX {self.contact.remark}】")
|
print(f"【完成导出 DOCX {self.contact.remark}】")
|
||||||
self.okSignal.emit(1)
|
self.okSignal.emit(self.index)
|
||||||
|
@ -127,10 +127,7 @@ class HtmlExporter(ExporterBase):
|
|||||||
if file_path != "":
|
if file_path != "":
|
||||||
file_path = './file/' + file_info.get('file_name')
|
file_path = './file/' + file_info.get('file_name')
|
||||||
doc.write(
|
doc.write(
|
||||||
f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp}
|
f'''{{ type:49, text: '{file_path}',is_send:{is_send},avatar_path:'{avatar}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}',sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}',app_name: '{file_info.get('app_name')}'}},'''
|
||||||
,is_chatroom:{is_chatroom},displayname:'{display_name}',icon_path: '{icon_path}'
|
|
||||||
,sub_type:6,file_name: '{file_info.get('file_name')}',file_size: '{file_info.get('file_len')}'
|
|
||||||
,app_name: '{file_info.get('app_name')}'}},'''
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def refermsg(self, doc, message):
|
def refermsg(self, doc, message):
|
||||||
@ -234,9 +231,7 @@ class HtmlExporter(ExporterBase):
|
|||||||
display_name = self.get_display_name(is_send, message)
|
display_name = self.get_display_name(is_send, message)
|
||||||
music_path = escape_js_and_html(music_path)
|
music_path = escape_js_and_html(music_path)
|
||||||
doc.write(
|
doc.write(
|
||||||
f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',
|
f'''{{ type:49, text:'{music_path}',is_send:{is_send},avatar_path:'{avatar}',link_url:'{content.get('link_url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
|
||||||
timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',sub_type:3,title:'{content.get('title')}',
|
|
||||||
artist:'{content.get('artist')}', website_name:'{content.get('website_name')}'}},'''
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def share_card(self, doc, message):
|
def share_card(self, doc, message):
|
||||||
@ -266,11 +261,7 @@ class HtmlExporter(ExporterBase):
|
|||||||
else:
|
else:
|
||||||
app_logo = card_data.get('app_logo')
|
app_logo = card_data.get('app_logo')
|
||||||
doc.write(
|
doc.write(
|
||||||
f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',
|
f'''{{ type:49,sub_type:5, text:'',is_send:{is_send},avatar_path:'{avatar}',url:'{card_data.get('url')}',timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',app_name:'{card_data.get('app_name')}'}},\n'''
|
||||||
timestamp:{timestamp},is_chatroom:{is_chatroom},displayname:'{display_name}',title:'{card_data.get('title')}',
|
|
||||||
description:'{card_data.get('description')}',thumbnail:'{thumbnail}',app_logo:'{app_logo}',
|
|
||||||
app_name:'{card_data.get('app_name')}'
|
|
||||||
}},\n'''
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def export(self):
|
def export(self):
|
||||||
|
@ -90,19 +90,23 @@ class ExporterBase(QThread):
|
|||||||
CONTACT_CSV = 4
|
CONTACT_CSV = 4
|
||||||
TXT = 5
|
TXT = 5
|
||||||
|
|
||||||
def __init__(self, contact, type_=DOCX, message_types={},time_range=None, parent=None):
|
def __init__(self, contact, type_=DOCX, message_types={}, time_range=None, messages=None,index=0, parent=None):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
self.message_types = message_types # 导出的消息类型
|
self.message_types = message_types # 导出的消息类型
|
||||||
self.contact: Contact = contact # 联系人
|
self.contact: Contact = contact # 联系人
|
||||||
self.output_type = type_ # 导出文件类型
|
self.output_type = type_ # 导出文件类型
|
||||||
self.total_num = 1 # 总的消息数量
|
self.total_num = 1 # 总的消息数量
|
||||||
self.num = 0 # 当前处理的消息数量
|
self.num = 0 # 当前处理的消息数量
|
||||||
|
self.index = index #
|
||||||
self.last_timestamp = 0
|
self.last_timestamp = 0
|
||||||
self.time_range = time_range
|
self.time_range = time_range
|
||||||
|
self.messages = messages
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
||||||
makedirs(origin_docx_path)
|
makedirs(origin_docx_path)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.export()
|
self.export()
|
||||||
|
|
||||||
def export(self):
|
def export(self):
|
||||||
raise NotImplementedError("export method must be implemented in subclasses")
|
raise NotImplementedError("export method must be implemented in subclasses")
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import os
|
|||||||
import traceback
|
import traceback
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from PyQt5.QtCore import pyqtSignal, QThread
|
from PyQt5.QtCore import pyqtSignal, QThread, QObject
|
||||||
from PyQt5.QtWidgets import QFileDialog
|
from PyQt5.QtWidgets import QFileDialog
|
||||||
|
|
||||||
from app.DataBase.exporter_csv import CSVExporter
|
from app.DataBase.exporter_csv import CSVExporter
|
||||||
@ -20,7 +20,7 @@ from ..util.image import get_image
|
|||||||
os.makedirs('./data/聊天记录', exist_ok=True)
|
os.makedirs('./data/聊天记录', exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
class Output(QThread):
|
class Output(QObject):
|
||||||
"""
|
"""
|
||||||
发送信息线程
|
发送信息线程
|
||||||
"""
|
"""
|
||||||
@ -222,7 +222,7 @@ class Output(QThread):
|
|||||||
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||||
Child.start()
|
Child.start()
|
||||||
|
|
||||||
def run(self):
|
def start(self):
|
||||||
if self.output_type == self.DOCX:
|
if self.output_type == self.DOCX:
|
||||||
self.to_docx(self.contact, self.message_types)
|
self.to_docx(self.contact, self.message_types)
|
||||||
elif self.output_type == self.CSV_ALL:
|
elif self.output_type == self.CSV_ALL:
|
||||||
|
Loading…
Reference in New Issue
Block a user