Merge pull request #296 from zetaloop/faster-docx-clearer-progress

加快docx导出速度、更详细的过程输出
This commit is contained in:
SiYuan 2024-01-11 21:50:09 +08:00 committed by GitHub
commit 189f89a017
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 46 additions and 6 deletions

View File

@ -7,6 +7,7 @@ from app.DataBase.output import ExporterBase
class CSVExporter(ExporterBase): class CSVExporter(ExporterBase):
def to_csv(self): def to_csv(self):
print("【开始导出 CSV {self.contact.remark}")
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
os.makedirs(origin_docx_path, exist_ok=True) os.makedirs(origin_docx_path, exist_ok=True)
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv" filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv"
@ -23,7 +24,8 @@ class CSVExporter(ExporterBase):
for msg in messages: for msg in messages:
other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else [] other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else []
writer.writerow([*msg[:9], *other_data]) writer.writerow([*msg[:9], *other_data])
self.okSignal.emit('ok') print("【完成导出 CSV {self.contact.remark}")
self.okSignal.emit(1)
def run(self): def run(self):
self.to_csv() self.to_csv()

View File

@ -8,6 +8,7 @@ from docx import shared
from docx.enum.table import WD_ALIGN_VERTICAL from docx.enum.table import WD_ALIGN_VERTICAL
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn from docx.oxml.ns import qn
from docxcompose.composer import Composer
from app.DataBase import msg_db, hard_link_db from app.DataBase import msg_db, hard_link_db
from app.DataBase.output import ExporterBase, escape_js_and_html from app.DataBase.output import ExporterBase, escape_js_and_html
@ -282,12 +283,8 @@ class DocxExporter(ExporterBase):
middle_new_docx.save(origin_docx_path + '/' + filename) middle_new_docx.save(origin_docx_path + '/' + filename)
def export(self): def export(self):
print('导出docx') print(f"【开始导出 DOCX {self.contact.remark}")
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
doc = docx.Document()
doc.styles['Normal'].font.name = u'Cambria'
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png")) Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
if self.contact.is_chatroom: if self.contact.is_chatroom:
@ -303,7 +300,23 @@ class DocxExporter(ExporterBase):
else: else:
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png")) self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
self.rangeSignal.emit(len(messages)) self.rangeSignal.emit(len(messages))
def newdoc():
nonlocal n, doc
doc = docx.Document()
doc.styles["Normal"].font.name = "Cambria"
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
docs.append(doc)
n += 1
doc = None
docs = []
n = 0
index = 0
newdoc()
for index, message in enumerate(messages): for index, message in enumerate(messages):
if index % 200 == 0 and index:
newdoc()
type_ = message[2] type_ = message[2]
sub_type = message[3] sub_type = message[3]
timestamp = message[5] timestamp = message[5]
@ -327,9 +340,27 @@ class DocxExporter(ExporterBase):
self.refermsg(doc, message) self.refermsg(doc, message)
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906): elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
self.file(doc, message) self.file(doc, message)
if index % 25 == 0:
print(f"【导出 DOCX {self.contact.remark}{index}/{len(messages)}")
if index % 25:
print(f"【导出 DOCX {self.contact.remark}{index+1}/{len(messages)}")
filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
doc = docx.Document()
doc.styles["Normal"].font.name = "Cambria"
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
# doc = Composer(doc)
# for index, dx in enumerate(docs):
# print(f"【MERGE Export DOCX {self.contact.remark}】{index}/{len(docs)}")
# doc.append(dx)
# print(f"【MERGE Export DOCX {self.contact.remark}】{len(docs)}")
doc = Composer(doc) # 针对11188条消息56组所测反排比正排更快正排65s反排54s
for index, dx in enumerate(docs[::-1]):
print(f"【合并 DOCX {self.contact.remark}{index+1}/{len(docs)}")
doc.insert(0, dx)
try: try:
doc.save(filename) doc.save(filename)
except PermissionError: except PermissionError:
filename = filename[:-5] + f'{time.time()}' + '.docx' filename = filename[:-5] + f'{time.time()}' + '.docx'
doc.save(filename) doc.save(filename)
print(f"【完成导出 DOCX {self.contact.remark}")
self.okSignal.emit(1) self.okSignal.emit(1)

View File

@ -275,6 +275,7 @@ class HtmlExporter(ExporterBase):
) )
def export(self): def export(self):
print(f"【开始导出 HTML {self.contact.remark}")
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html" filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html"
file_path = './app/resources/data/template.html' file_path = './app/resources/data/template.html'
@ -318,8 +319,11 @@ class HtmlExporter(ExporterBase):
self.music_share(f, message) self.music_share(f, message)
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905): elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
self.share_card(f, message) self.share_card(f, message)
if index % 2000 == 0:
print(f"【导出 HTML {self.contact.remark}{index}/{len(messages)}")
f.write(html_end) f.write(html_end)
f.close() f.close()
print(f"【完成导出 HTML {self.contact.remark}{len(messages)}")
self.count_finish_num(1) self.count_finish_num(1)
def count_finish_num(self, num): def count_finish_num(self, num):

View File

@ -110,6 +110,7 @@ class TxtExporter(ExporterBase):
def export(self): def export(self):
# 实现导出为txt的逻辑 # 实现导出为txt的逻辑
print("【开始导出 TXT {self.contact.remark}")
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
os.makedirs(origin_docx_path, exist_ok=True) os.makedirs(origin_docx_path, exist_ok=True)
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt" filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt"
@ -140,4 +141,5 @@ class TxtExporter(ExporterBase):
self.music_share(f, message) self.music_share(f, message)
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905): elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
self.share_card(f, message) self.share_card(f, message)
print("【完成导出 TXT {self.contact.remark}")
self.okSignal.emit(1) self.okSignal.emit(1)

View File

@ -17,4 +17,5 @@ soupsieve==2.5
lz4==4.3.2 lz4==4.3.2
pilk==0.2.4 pilk==0.2.4
python-docx==1.1.0 python-docx==1.1.0
docxcompose==1.4.0
eyed3==0.9.7 eyed3==0.9.7