Merge pull request #296 from zetaloop/faster-docx-clearer-progress

加快docx导出速度、更详细的过程输出
2025-04-29 01:08:11 +08:00 · 2024-01-11 21:50:09 +08:00 · 2024-01-11 21:50:09 +08:00 · 189f89a017
commit 189f89a017
parent f0d537a51d 2d069aac6d
5 changed files with 46 additions and 6 deletions
--- a/app/DataBase/exporter_csv.py
+++ b/app/DataBase/exporter_csv.py
@ -7,6 +7,7 @@ from app.DataBase.output import ExporterBase
 class CSVExporter(ExporterBase):
    def to_csv(self):
        print("【开始导出 CSV {self.contact.remark}】")
        origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
        os.makedirs(origin_docx_path, exist_ok=True)
        filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv"
@ -23,7 +24,8 @@ class CSVExporter(ExporterBase):
            for msg in messages:
                other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else []
                writer.writerow([*msg[:9], *other_data])
-        self.okSignal.emit('ok')
+        print("【完成导出 CSV {self.contact.remark}】")
        self.okSignal.emit(1)
    def run(self):
        self.to_csv()
--- a/app/DataBase/exporter_docx.py
+++ b/app/DataBase/exporter_docx.py
@ -8,6 +8,7 @@ from docx import shared
 from docx.enum.table import WD_ALIGN_VERTICAL
 from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
 from docx.oxml.ns import qn
 from docxcompose.composer import Composer
 from app.DataBase import msg_db, hard_link_db
 from app.DataBase.output import ExporterBase, escape_js_and_html
@ -282,12 +283,8 @@ class DocxExporter(ExporterBase):
        middle_new_docx.save(origin_docx_path + '/' + filename)
    def export(self):
-        print('导出docx')
+        print(f"【开始导出 DOCX {self.contact.remark}】")
        origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
        filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
        doc = docx.Document()
        doc.styles['Normal'].font.name = u'Cambria'
        doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
        messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
        Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
        if self.contact.is_chatroom:
@ -303,7 +300,23 @@ class DocxExporter(ExporterBase):
        else:
            self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
        self.rangeSignal.emit(len(messages))
        def newdoc():
            nonlocal n, doc
            doc = docx.Document()
            doc.styles["Normal"].font.name = "Cambria"
            doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
            docs.append(doc)
            n += 1
        doc = None
        docs = []
        n = 0
        index = 0
        newdoc()
        for index, message in enumerate(messages):
            if index % 200 == 0 and index:
                newdoc()
            type_ = message[2]
            sub_type = message[3]
            timestamp = message[5]
@ -327,9 +340,27 @@ class DocxExporter(ExporterBase):
                self.refermsg(doc, message)
            elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
                self.file(doc, message)
            if index % 25 == 0:
                print(f"【导出 DOCX {self.contact.remark}】{index}/{len(messages)}")
        if index % 25:
            print(f"【导出 DOCX {self.contact.remark}】{index+1}/{len(messages)}")
        filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
        doc = docx.Document()
        doc.styles["Normal"].font.name = "Cambria"
        doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
        # doc = Composer(doc)
        # for index, dx in enumerate(docs):
        #     print(f"【MERGE Export DOCX {self.contact.remark}】{index}/{len(docs)}")
        #     doc.append(dx)
        # print(f"【MERGE Export DOCX {self.contact.remark}】{len(docs)}")
        doc = Composer(doc)  # 针对11188条消息（56组）所测，反排比正排更快，正排65s，反排54s
        for index, dx in enumerate(docs[::-1]):
            print(f"【合并 DOCX {self.contact.remark}】{index+1}/{len(docs)}")
            doc.insert(0, dx)
        try:
            doc.save(filename)
        except PermissionError:
            filename = filename[:-5] + f'{time.time()}' + '.docx'
            doc.save(filename)
        print(f"【完成导出 DOCX {self.contact.remark}】")
        self.okSignal.emit(1)
--- a/app/DataBase/exporter_html.py
+++ b/app/DataBase/exporter_html.py
@ -275,6 +275,7 @@ class HtmlExporter(ExporterBase):
        )
    def export(self):
        print(f"【开始导出 HTML {self.contact.remark}】")
        messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
        filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html"
        file_path = './app/resources/data/template.html'
@ -318,8 +319,11 @@ class HtmlExporter(ExporterBase):
                self.music_share(f, message)
            elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
                self.share_card(f, message)
            if index % 2000 == 0:
                print(f"【导出 HTML {self.contact.remark}】{index}/{len(messages)}")
        f.write(html_end)
        f.close()
        print(f"【完成导出 HTML {self.contact.remark}】{len(messages)}")
        self.count_finish_num(1)
    def count_finish_num(self, num):
--- a/app/DataBase/exporter_txt.py
+++ b/app/DataBase/exporter_txt.py
@ -110,6 +110,7 @@ class TxtExporter(ExporterBase):
    def export(self):
        # 实现导出为txt的逻辑
        print("【开始导出 TXT {self.contact.remark}】")
        origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
        os.makedirs(origin_docx_path, exist_ok=True)
        filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt"
@ -140,4 +141,5 @@ class TxtExporter(ExporterBase):
                    self.music_share(f, message)
                elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
                    self.share_card(f, message)
        print("【完成导出 TXT {self.contact.remark}】")
        self.okSignal.emit(1)
--- a/requirements.txt
+++ b/requirements.txt
@ -17,4 +17,5 @@ soupsieve==2.5
 lz4==4.3.2
 pilk==0.2.4
 python-docx==1.1.0
 docxcompose==1.4.0
 eyed3==0.9.7