mirror of
https://github.com/LC044/WeChatMsg
synced 2025-02-23 03:22:17 +08:00
Merge pull request #296 from zetaloop/faster-docx-clearer-progress
加快docx导出速度、更详细的过程输出
This commit is contained in:
commit
189f89a017
@ -7,6 +7,7 @@ from app.DataBase.output import ExporterBase
|
|||||||
|
|
||||||
class CSVExporter(ExporterBase):
|
class CSVExporter(ExporterBase):
|
||||||
def to_csv(self):
|
def to_csv(self):
|
||||||
|
print("【开始导出 CSV {self.contact.remark}】")
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
||||||
os.makedirs(origin_docx_path, exist_ok=True)
|
os.makedirs(origin_docx_path, exist_ok=True)
|
||||||
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv"
|
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}_utf8.csv"
|
||||||
@ -23,7 +24,8 @@ class CSVExporter(ExporterBase):
|
|||||||
for msg in messages:
|
for msg in messages:
|
||||||
other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else []
|
other_data = [msg[12].remark, msg[12].nickName, msg[12].wxid] if self.contact.is_chatroom else []
|
||||||
writer.writerow([*msg[:9], *other_data])
|
writer.writerow([*msg[:9], *other_data])
|
||||||
self.okSignal.emit('ok')
|
print("【完成导出 CSV {self.contact.remark}】")
|
||||||
|
self.okSignal.emit(1)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.to_csv()
|
self.to_csv()
|
||||||
|
@ -8,6 +8,7 @@ from docx import shared
|
|||||||
from docx.enum.table import WD_ALIGN_VERTICAL
|
from docx.enum.table import WD_ALIGN_VERTICAL
|
||||||
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
|
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
|
||||||
from docx.oxml.ns import qn
|
from docx.oxml.ns import qn
|
||||||
|
from docxcompose.composer import Composer
|
||||||
|
|
||||||
from app.DataBase import msg_db, hard_link_db
|
from app.DataBase import msg_db, hard_link_db
|
||||||
from app.DataBase.output import ExporterBase, escape_js_and_html
|
from app.DataBase.output import ExporterBase, escape_js_and_html
|
||||||
@ -282,12 +283,8 @@ class DocxExporter(ExporterBase):
|
|||||||
middle_new_docx.save(origin_docx_path + '/' + filename)
|
middle_new_docx.save(origin_docx_path + '/' + filename)
|
||||||
|
|
||||||
def export(self):
|
def export(self):
|
||||||
print('导出docx')
|
print(f"【开始导出 DOCX {self.contact.remark}】")
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
||||||
filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
|
|
||||||
doc = docx.Document()
|
|
||||||
doc.styles['Normal'].font.name = u'Cambria'
|
|
||||||
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
|
|
||||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||||
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
|
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
|
||||||
if self.contact.is_chatroom:
|
if self.contact.is_chatroom:
|
||||||
@ -303,7 +300,23 @@ class DocxExporter(ExporterBase):
|
|||||||
else:
|
else:
|
||||||
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
|
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
|
||||||
self.rangeSignal.emit(len(messages))
|
self.rangeSignal.emit(len(messages))
|
||||||
|
|
||||||
|
def newdoc():
|
||||||
|
nonlocal n, doc
|
||||||
|
doc = docx.Document()
|
||||||
|
doc.styles["Normal"].font.name = "Cambria"
|
||||||
|
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
|
||||||
|
docs.append(doc)
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
doc = None
|
||||||
|
docs = []
|
||||||
|
n = 0
|
||||||
|
index = 0
|
||||||
|
newdoc()
|
||||||
for index, message in enumerate(messages):
|
for index, message in enumerate(messages):
|
||||||
|
if index % 200 == 0 and index:
|
||||||
|
newdoc()
|
||||||
type_ = message[2]
|
type_ = message[2]
|
||||||
sub_type = message[3]
|
sub_type = message[3]
|
||||||
timestamp = message[5]
|
timestamp = message[5]
|
||||||
@ -327,9 +340,27 @@ class DocxExporter(ExporterBase):
|
|||||||
self.refermsg(doc, message)
|
self.refermsg(doc, message)
|
||||||
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
|
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
|
||||||
self.file(doc, message)
|
self.file(doc, message)
|
||||||
|
if index % 25 == 0:
|
||||||
|
print(f"【导出 DOCX {self.contact.remark}】{index}/{len(messages)}")
|
||||||
|
if index % 25:
|
||||||
|
print(f"【导出 DOCX {self.contact.remark}】{index+1}/{len(messages)}")
|
||||||
|
filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
|
||||||
|
doc = docx.Document()
|
||||||
|
doc.styles["Normal"].font.name = "Cambria"
|
||||||
|
doc.styles["Normal"]._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
|
||||||
|
# doc = Composer(doc)
|
||||||
|
# for index, dx in enumerate(docs):
|
||||||
|
# print(f"【MERGE Export DOCX {self.contact.remark}】{index}/{len(docs)}")
|
||||||
|
# doc.append(dx)
|
||||||
|
# print(f"【MERGE Export DOCX {self.contact.remark}】{len(docs)}")
|
||||||
|
doc = Composer(doc) # 针对11188条消息(56组)所测,反排比正排更快,正排65s,反排54s
|
||||||
|
for index, dx in enumerate(docs[::-1]):
|
||||||
|
print(f"【合并 DOCX {self.contact.remark}】{index+1}/{len(docs)}")
|
||||||
|
doc.insert(0, dx)
|
||||||
try:
|
try:
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
filename = filename[:-5] + f'{time.time()}' + '.docx'
|
filename = filename[:-5] + f'{time.time()}' + '.docx'
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
|
print(f"【完成导出 DOCX {self.contact.remark}】")
|
||||||
self.okSignal.emit(1)
|
self.okSignal.emit(1)
|
||||||
|
@ -275,6 +275,7 @@ class HtmlExporter(ExporterBase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def export(self):
|
def export(self):
|
||||||
|
print(f"【开始导出 HTML {self.contact.remark}】")
|
||||||
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
messages = msg_db.get_messages(self.contact.wxid, time_range=self.time_range)
|
||||||
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html"
|
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.html"
|
||||||
file_path = './app/resources/data/template.html'
|
file_path = './app/resources/data/template.html'
|
||||||
@ -318,8 +319,11 @@ class HtmlExporter(ExporterBase):
|
|||||||
self.music_share(f, message)
|
self.music_share(f, message)
|
||||||
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
|
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
|
||||||
self.share_card(f, message)
|
self.share_card(f, message)
|
||||||
|
if index % 2000 == 0:
|
||||||
|
print(f"【导出 HTML {self.contact.remark}】{index}/{len(messages)}")
|
||||||
f.write(html_end)
|
f.write(html_end)
|
||||||
f.close()
|
f.close()
|
||||||
|
print(f"【完成导出 HTML {self.contact.remark}】{len(messages)}")
|
||||||
self.count_finish_num(1)
|
self.count_finish_num(1)
|
||||||
|
|
||||||
def count_finish_num(self, num):
|
def count_finish_num(self, num):
|
||||||
|
@ -110,6 +110,7 @@ class TxtExporter(ExporterBase):
|
|||||||
|
|
||||||
def export(self):
|
def export(self):
|
||||||
# 实现导出为txt的逻辑
|
# 实现导出为txt的逻辑
|
||||||
|
print("【开始导出 TXT {self.contact.remark}】")
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
||||||
os.makedirs(origin_docx_path, exist_ok=True)
|
os.makedirs(origin_docx_path, exist_ok=True)
|
||||||
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt"
|
filename = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}/{self.contact.remark}.txt"
|
||||||
@ -140,4 +141,5 @@ class TxtExporter(ExporterBase):
|
|||||||
self.music_share(f, message)
|
self.music_share(f, message)
|
||||||
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
|
elif type_ == 49 and sub_type == 5 and self.message_types.get(4905):
|
||||||
self.share_card(f, message)
|
self.share_card(f, message)
|
||||||
|
print("【完成导出 TXT {self.contact.remark}】")
|
||||||
self.okSignal.emit(1)
|
self.okSignal.emit(1)
|
@ -17,4 +17,5 @@ soupsieve==2.5
|
|||||||
lz4==4.3.2
|
lz4==4.3.2
|
||||||
pilk==0.2.4
|
pilk==0.2.4
|
||||||
python-docx==1.1.0
|
python-docx==1.1.0
|
||||||
|
docxcompose==1.4.0
|
||||||
eyed3==0.9.7
|
eyed3==0.9.7
|
Loading…
Reference in New Issue
Block a user