WeChatMsg/app/DataBase/exporter_docx.py
2024-01-02 22:33:46 +08:00

348 lines
15 KiB
Python

import os
import shutil
import sys
import time
import traceback
from re import findall
import docx
from PyQt5.QtCore import pyqtSignal, QThread
from docx import shared
from docx.enum.table import WD_ALIGN_VERTICAL
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn
from app.DataBase import msg_db, hard_link_db, media_msg_db
from app.DataBase.output import ExporterBase, escape_js_and_html
from app.DataBase.package_msg import PackageMsg
from app.log import logger
from app.person import Me
from app.util import path
from app.util.compress_content import parser_reply, share_card, music_share
from app.util.emoji import get_emoji_url
from app.util.file import get_file
from app.util.image import get_image_path, get_image, get_image_abs_path
from app.util.music import get_music_path
class DocxExporter(ExporterBase):
def text(self, doc, message):
type_ = message[2]
str_content = message[7]
str_time = message[8]
is_send = message[4]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content_cell = self.create_table(doc, is_send, avatar)
content_cell.paragraphs[0].add_run(str_content)
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
if is_send:
p = content_cell.paragraphs[0]
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
doc.add_paragraph()
def image(self, doc, message):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
type_ = message[2]
str_content = message[7]
str_time = message[8]
is_send = message[4]
BytesExtra = message[10]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content = self.create_table(doc, is_send, avatar)
run = content.paragraphs[0].add_run()
str_content = escape_js_and_html(str_content)
image_path = hard_link_db.get_image(str_content, BytesExtra, thumb=True)
if not os.path.exists(os.path.join(Me().wx_dir, image_path)):
image_thumb_path = hard_link_db.get_image(str_content, BytesExtra, thumb=False)
if not os.path.exists(os.path.join(Me().wx_dir, image_thumb_path)):
return
image_path = image_thumb_path
image_path = get_image_abs_path(image_path, base_path=f'/data/聊天记录/{self.contact.remark}/image')
try:
run.add_picture(image_path, height=shared.Inches(2))
doc.add_paragraph()
except Exception:
print("Error!image")
def audio(self, doc, message):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
str_content = message[7]
str_time = message[8]
is_send = message[4]
msgSvrId = message[9]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content_cell = self.create_table(doc, is_send, avatar)
content_cell.paragraphs[0].add_run('【表情包】')
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
if is_send:
p = content_cell.paragraphs[0]
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
doc.add_paragraph()
def emoji(self, doc, message):
str_content = message[7]
str_time = message[8]
is_send = message[4]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content_cell = self.create_table(doc, is_send, avatar)
content_cell.paragraphs[0].add_run('【表情包】')
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
if is_send:
p = content_cell.paragraphs[0]
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
doc.add_paragraph()
def file(self, doc, message):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
bytesExtra = message[10]
str_time = message[8]
is_send = message[4]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content_cell = self.create_table(doc, is_send, avatar)
content_cell.paragraphs[0].add_run('【文件】')
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
if is_send:
p = content_cell.paragraphs[0]
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
doc.add_paragraph()
def refermsg(self, doc, message):
"""
处理回复消息
@param doc:
@param message:
@return:
"""
str_time = message[8]
is_send = message[4]
content = parser_reply(message[11])
refer_msg = content.get('refer')
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content_cell = self.create_table(doc, is_send, avatar)
content_cell.paragraphs[0].add_run(content.get('title'))
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
reply_p = content_cell.add_paragraph()
reply_content = f"{refer_msg.get('displayname')}:{refer_msg.get('content')}" if refer_msg else '未知引用'
run = content_cell.paragraphs[1].add_run(reply_content)
'''设置被回复内容格式'''
run.font.color.rgb = shared.RGBColor(121, 121, 121)
run.font_size = shared.Inches(0.3)
run.font.highlight_color = WD_COLOR_INDEX.GRAY_25
if is_send:
p = content_cell.paragraphs[0]
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
reply_p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
doc.add_paragraph()
def system_msg(self, doc, message):
str_content = message[7]
is_send = message[4]
str_time = message[8]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
str_content = str_content.replace('<![CDATA[', "").replace(
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
res = findall('(</{0,1}(img|revo|_wc_cus|a).*?>)', str_content)
for xmlstr, b in res:
str_content = str_content.replace(xmlstr, "")
doc.add_paragraph(str_content).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
def video(self, doc, message):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
type_ = message[2]
str_content = message[7]
str_time = message[8]
is_send = message[4]
BytesExtra = message[10]
timestamp = message[5]
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
avatar = self.get_avatar_path(is_send, message, True)
content_cell = self.create_table(doc, is_send, avatar)
content_cell.paragraphs[0].add_run('【视频】')
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
if is_send:
p = content_cell.paragraphs[0]
p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
doc.add_paragraph()
def create_table(self, doc, is_send, avatar_path):
'''
#! 创建一个1*2表格
#! isSend = 1 (0,0)存聊天内容,(0,1)存头像
#! isSend = 0 (0,0)存头像,(0,1)存聊天内容
#! 返回聊天内容的坐标
'''
table = doc.add_table(rows=1, cols=2, style='Normal Table')
table.cell(0, 1).height = shared.Inches(0.5)
table.cell(0, 0).height = shared.Inches(0.5)
if is_send:
'''表格右对齐'''
table.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
avatar = table.cell(0, 1).paragraphs[0].add_run()
'''插入头像,设置头像宽度'''
avatar.add_picture(avatar_path, width=shared.Inches(0.5))
'''设置单元格宽度跟头像一致'''
table.cell(0, 1).width = shared.Inches(0.5)
content_cell = table.cell(0, 0)
'''聊天内容右对齐'''
content_cell.paragraphs[0].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
else:
avatar = table.cell(0, 0).paragraphs[0].add_run()
avatar.add_picture(avatar_path, width=shared.Inches(0.5))
'''设置单元格宽度'''
table.cell(0, 0).width = shared.Inches(0.5)
content_cell = table.cell(0, 1)
'''聊天内容垂直居中对齐'''
content_cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
return content_cell
def music_share(self, doc, message):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
is_send = message[4]
timestamp = message[5]
content = music_share(message[11])
music_path = ''
if content.get('audio_url') != '':
music_path = get_music_path(content.get('audio_url'), content.get('title'),
output_path=origin_docx_path + '/music')
if music_path != '':
music_path = f'./music/{os.path.basename(music_path)}'
music_path = music_path.replace('\\', '/')
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
def share_card(self, doc, message):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
is_send = message[4]
timestamp = message[5]
bytesExtra = message[10]
compress_content_ = message[11]
card_data = share_card(bytesExtra, compress_content_)
is_chatroom = 1 if self.contact.is_chatroom else 0
avatar = self.get_avatar_path(is_send, message)
display_name = self.get_display_name(is_send, message)
thumbnail = ''
if card_data.get('thumbnail'):
thumbnail = os.path.join(Me().wx_dir, card_data.get('thumbnail'))
if os.path.exists(thumbnail):
shutil.copy(thumbnail, os.path.join(origin_docx_path, 'image', os.path.basename(thumbnail)))
thumbnail = './image/' + os.path.basename(thumbnail)
else:
thumbnail = ''
app_logo = ''
if card_data.get('app_logo'):
app_logo = os.path.join(Me().wx_dir, card_data.get('app_logo'))
if os.path.exists(app_logo):
shutil.copy(app_logo, os.path.join(origin_docx_path, 'image', os.path.basename(app_logo)))
app_logo = './image/' + os.path.basename(app_logo)
else:
app_logo = ''
def merge_docx(self, conRemark, n):
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}"
all_file_path = []
for i in range(n):
file_name = f"{conRemark}{i}.docx"
all_file_path.append(origin_docx_path + '/' + file_name)
filename = f"{conRemark}.docx"
# print(all_file_path)
doc = docx.Document()
doc.save(origin_docx_path + '/' + filename)
master = docx.Document(origin_docx_path + '/' + filename)
middle_new_docx = Composer(master)
num = 0
for word in all_file_path:
word_document = docx.Document(word)
word_document.add_page_break()
if num != 0:
middle_new_docx.append(word_document)
num = num + 1
os.remove(word)
middle_new_docx.save(origin_docx_path + '/' + filename)
def export(self):
print('导出docx')
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
filename = os.path.join(origin_docx_path, f"{self.contact.remark}.docx")
doc = docx.Document()
doc.styles['Normal'].font.name = u'Cambria'
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
if self.contact.is_chatroom:
packagemsg = PackageMsg()
messages = packagemsg.get_package_message_by_wxid(self.contact.wxid)
else:
messages = msg_db.get_messages(self.contact.wxid)
Me().save_avatar(os.path.join(f"{origin_docx_path}/avatar/{Me().wxid}.png"))
if self.contact.is_chatroom:
for message in messages:
if message[4]: # is_send
continue
try:
chatroom_avatar_path = f"{origin_docx_path}/avatar/{message[12].wxid}.png"
message[12].save_avatar(chatroom_avatar_path)
except:
print(message)
pass
else:
self.contact.save_avatar(os.path.join(f"{origin_docx_path}/avatar/{self.contact.wxid}.png"))
self.rangeSignal.emit(len(messages))
for index, message in enumerate(messages):
type_ = message[2]
sub_type = message[3]
timestamp = message[5]
self.progressSignal.emit(1)
if self.is_5_min(timestamp):
str_time = message[8]
doc.add_paragraph(str_time).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
if type_ == 1 and self.message_types.get(type_):
self.text(doc, message)
elif type_ == 3 and self.message_types.get(type_):
self.image(doc, message)
elif type_ == 34 and self.message_types.get(type_):
self.audio(doc, message)
elif type_ == 43 and self.message_types.get(type_):
self.video(doc, message)
elif type_ == 47 and self.message_types.get(type_):
self.emoji(doc, message)
elif type_ == 10000 and self.message_types.get(type_):
self.system_msg(doc, message)
elif type_ == 49 and sub_type == 57 and self.message_types.get(1):
self.refermsg(doc, message)
elif type_ == 49 and sub_type == 6 and self.message_types.get(4906):
self.file(doc, message)
try:
doc.save(filename)
except PermissionError:
filename = filename[:-5] + f'{time.time()}' + '.docx'
doc.save(filename)
self.okSignal.emit(1)