From 20bededea8bb26b304125f0681c5a2cd960e6006 Mon Sep 17 00:00:00 2001 From: zetaloop Date: Thu, 11 Jan 2024 20:01:50 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=B1=BB=E5=9E=8B?= =?UTF-8?q?=E4=B8=8D=E6=AD=A3=E7=A1=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit okSignal = pyqtSignal(int),见 output.py --- app/DataBase/exporter_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/DataBase/exporter_csv.py b/app/DataBase/exporter_csv.py index ae463d8..522b66d 100644 --- a/app/DataBase/exporter_csv.py +++ b/app/DataBase/exporter_csv.py @@ -23,7 +23,7 @@ class CSVExporter(ExporterBase): for msg in messages: other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else [] writer.writerow([*msg[:9], *other_data]) - self.okSignal.emit('ok') + self.okSignal.emit(1) def run(self): self.to_csv() From c89ae94f640e6aaaf2dccd2071f4421fc063f5c9 Mon Sep 17 00:00:00 2001 From: zetaloop Date: Thu, 11 Jan 2024 20:14:21 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=9B=B4=E8=AF=A6?= =?UTF-8?q?=E7=BB=86=E7=9A=84=E8=BF=90=E8=A1=8C=E8=BF=87=E7=A8=8B=E8=BE=93?= =?UTF-8?q?=E5=87=BA=EF=BC=8C=E5=8F=AF=E7=94=A8=E4=BA=8E...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 可用于判断程序是在缓慢处理中还是卡了,对于特别多的对话记录,ui进度条是几乎不动的 --- app/DataBase/exporter_csv.py | 2 ++ app/DataBase/exporter_docx.py | 4 +++- app/DataBase/exporter_html.py | 4 ++++ app/DataBase/exporter_txt.py | 2 ++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/app/DataBase/exporter_csv.py b/app/DataBase/exporter_csv.py index 522b66d..d79831f 100644 --- a/app/DataBase/exporter_csv.py +++ b/app/DataBase/exporter_csv.py @@ -7,6 +7,7 @@ from app.DataBase.output import ExporterBase class CSVExporter(ExporterBase): def to_csv(self): + print("【开始导出 CSV {self.contact.remark}】") origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" os.makedirs(origin_docx_path, exist_ok=True) filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv" @@ -23,6 +24,7 @@ class CSVExporter(ExporterBase): for msg in messages: other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else [] writer.writerow([*msg[:9], *other_data]) + print("【完成导出 CSV {self.contact.remark}】") self.okSignal.emit(1) def run(self): diff --git a/app/DataBase/exporter_docx.py b/app/DataBase/exporter_docx.py index 7ef7db8..9d52566 100644 --- a/app/DataBase/exporter_docx.py +++ b/app/DataBase/exporter_docx.py @@ -282,7 +282,7 @@ class DocxExporter(ExporterBase): middle_new_docx.save(origin_docx_path + '/' + filename) def export(self): - print('导出docx') + print(f"【开始导出 DOCX {self.contact.remark}】") origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx") doc = docx.Document() @@ -327,9 +327,11 @@ class DocxExporter(ExporterBase): self.refermsg(doc, message) elif type_ == 49 and sub_type == 6 and self.message_types.get(4906): self.file(doc, message) + print(f"【导出 DOCX {self.contact.remark}】{index}/{len(messages)}") try: doc.save(filename) except PermissionError: filename = filename[:-5] + f'{time.time()}' + '.docx' doc.save(filename) + print(f"【完成导出 DOCX {self.contact.remark}】") self.okSignal.emit(1) diff --git a/app/DataBase/exporter_html.py b/app/DataBase/exporter_html.py index af506ed..83c6a48 100644 --- a/app/DataBase/exporter_html.py +++ b/app/DataBase/exporter_html.py @@ -275,6 +275,7 @@ class HtmlExporter(ExporterBase): ) def export(self): + print(f"【开始导出 HTML {self.contact.remark}】") messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html" file_path = './app/resources/data/template.html' @@ -318,8 +319,11 @@ class HtmlExporter(ExporterBase): self.music_share(f, message) elif type_ == 49 and sub_type == 5 and self.message_types.get(4905): self.share_card(f, message) + if index % 2000 == 0: + print(f"【导出 HTML {self.contact.remark}】{index}/{len(messages)}") f.write(html_end) f.close() + print(f"【完成导出 HTML {self.contact.remark}】{len(messages)}") self.count_finish_num(1) def count_finish_num(self, num): diff --git a/app/DataBase/exporter_txt.py b/app/DataBase/exporter_txt.py index f4df086..da494b0 100644 --- a/app/DataBase/exporter_txt.py +++ b/app/DataBase/exporter_txt.py @@ -110,6 +110,7 @@ class TxtExporter(ExporterBase): def export(self): # 实现导出为txt的逻辑 + print("【开始导出 TXT {self.contact.remark}】") origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" os.makedirs(origin_docx_path, exist_ok=True) filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt" @@ -140,4 +141,5 @@ class TxtExporter(ExporterBase): self.music_share(f, message) elif type_ == 49 and sub_type == 5 and self.message_types.get(4905): self.share_card(f, message) + print("【完成导出 TXT {self.contact.remark}】") self.okSignal.emit(1) \ No newline at end of file From a01819a061d3a4c38d561013bdc2ae29f89ac183 Mon Sep 17 00:00:00 2001 From: zetaloop Date: Thu, 11 Jan 2024 20:18:24 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E5=8A=A0=E5=BF=ABdocx=E5=AF=BC=E5=87=BA?= =?UTF-8?q?=E9=80=9F=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 对话记录特别多的时候,越往后word文件越大,添加一个记录所需时间也越长。 先将每200条记录保存,最后合并在一起,将整个过程大大加快。 --- app/DataBase/exporter_docx.py | 39 ++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/app/DataBase/exporter_docx.py b/app/DataBase/exporter_docx.py index 9d52566..6c51bfe 100644 --- a/app/DataBase/exporter_docx.py +++ b/app/DataBase/exporter_docx.py @@ -8,6 +8,7 @@ from docx import shared from docx.enum.table import WD_ALIGN_VERTICAL from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT from docx.oxml.ns import qn +from docxcompose.composer import Composer from app.DataBase import msg_db, hard_link_db from app.DataBase.output import ExporterBase, escape_js_and_html @@ -284,10 +285,6 @@ class DocxExporter(ExporterBase): def export(self): print(f"【开始导出 DOCX {self.contact.remark}】") origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}" - filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx") - doc = docx.Document() - doc.styles['Normal'].font.name = u'Cambria' - doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体') messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range) Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png")) if self.contact.is_chatroom: @@ -303,7 +300,23 @@ class DocxExporter(ExporterBase): else: self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png")) self.rangeSignal.emit(len(messages)) + + def newdoc(): + nonlocal n, doc + doc = docx.Document() + doc.styles["Normal"].font.name = "Cambria" + doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + docs.append(doc) + n += 1 + + doc = None + docs = [] + n = 0 + index = 0 + newdoc() for index, message in enumerate(messages): + if index % 200 == 0 and index: + newdoc() type_ = message[2] sub_type = message[3] timestamp = message[5] @@ -327,7 +340,23 @@ class DocxExporter(ExporterBase): self.refermsg(doc, message) elif type_ == 49 and sub_type == 6 and self.message_types.get(4906): self.file(doc, message) - print(f"【导出 DOCX {self.contact.remark}】{index}/{len(messages)}") + if index % 25 == 0: + print(f"【导出 DOCX {self.contact.remark}】{index}/{len(messages)}") + if index % 25: + print(f"【导出 DOCX {self.contact.remark}】{index+1}/{len(messages)}") + filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx") + doc = docx.Document() + doc.styles["Normal"].font.name = "Cambria" + doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + # doc = Composer(doc) + # for index, dx in enumerate(docs): + # print(f"【MERGE Export DOCX {self.contact.remark}】{index}/{len(docs)}") + # doc.append(dx) + # print(f"【MERGE Export DOCX {self.contact.remark}】{len(docs)}") + doc = Composer(doc) # 针对11188条消息(56组)所测,反排比正排更快,正排65s,反排54s + for index, dx in enumerate(docs[::-1]): + print(f"【合并 DOCX {self.contact.remark}】{index+1}/{len(docs)}") + doc.insert(0, dx) try: doc.save(filename) except PermissionError: From 2d069aac6d280a4861b510f639aedc64d33ef8c1 Mon Sep 17 00:00:00 2001 From: zetaloop Date: Thu, 11 Jan 2024 20:41:38 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BE=9D=E8=B5=96?= =?UTF-8?q?=E5=BA=93docxcompose?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 90cc6ca..485931a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,5 @@ soupsieve==2.5 lz4==4.3.2 pilk==0.2.4 python-docx==1.1.0 +docxcompose==1.4.0 eyed3==0.9.7 \ No newline at end of file