mirror of
https://github.com/LC044/WeChatMsg
synced 2025-02-23 03:22:17 +08:00
修复不可打印字符导致的docx导出失败#297
This commit is contained in:
parent
c3b6aa379b
commit
60e297321e
@ -4,6 +4,7 @@ import time
|
|||||||
from re import findall
|
from re import findall
|
||||||
|
|
||||||
import docx
|
import docx
|
||||||
|
import unicodedata
|
||||||
from docx import shared
|
from docx import shared
|
||||||
from docx.enum.table import WD_ALIGN_VERTICAL
|
from docx.enum.table import WD_ALIGN_VERTICAL
|
||||||
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
|
from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT
|
||||||
@ -12,10 +13,33 @@ from docxcompose.composer import Composer
|
|||||||
|
|
||||||
from app.DataBase import msg_db, hard_link_db
|
from app.DataBase import msg_db, hard_link_db
|
||||||
from app.DataBase.output import ExporterBase, escape_js_and_html
|
from app.DataBase.output import ExporterBase, escape_js_and_html
|
||||||
|
from app.log import logger
|
||||||
from app.person import Me
|
from app.person import Me
|
||||||
from app.util.compress_content import parser_reply, share_card, music_share
|
from app.util.compress_content import parser_reply, share_card, music_share
|
||||||
from app.util.image import get_image_abs_path
|
from app.util.image import get_image_abs_path
|
||||||
from app.util.music import get_music_path
|
from app.util.music import get_music_path
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
def filter_control_characters(input_string):
|
||||||
|
"""
|
||||||
|
过滤掉不可打印字符
|
||||||
|
@param input_string:
|
||||||
|
@return:
|
||||||
|
"""
|
||||||
|
# 创建一个包含所有可打印字符的字符串
|
||||||
|
printable_chars = set(string.printable)
|
||||||
|
|
||||||
|
# 过滤掉非可打印字符
|
||||||
|
filtered_string = ''.join(char for char in input_string if char in printable_chars)
|
||||||
|
|
||||||
|
return filtered_string
|
||||||
|
|
||||||
|
def is_control_char(ch):
|
||||||
|
'''Whether a control character.
|
||||||
|
https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python
|
||||||
|
'''
|
||||||
|
return unicodedata.category(ch)[0] == 'C'
|
||||||
|
|
||||||
|
|
||||||
class DocxExporter(ExporterBase):
|
class DocxExporter(ExporterBase):
|
||||||
@ -30,6 +54,11 @@ class DocxExporter(ExporterBase):
|
|||||||
display_name = self.get_display_name(is_send, message)
|
display_name = self.get_display_name(is_send, message)
|
||||||
avatar = self.get_avatar_path(is_send, message, True)
|
avatar = self.get_avatar_path(is_send, message, True)
|
||||||
content_cell = self.create_table(doc, is_send, avatar)
|
content_cell = self.create_table(doc, is_send, avatar)
|
||||||
|
try:
|
||||||
|
content_cell.paragraphs[0].add_run(str_content)
|
||||||
|
except ValueError:
|
||||||
|
logger.error(f'非法字符:{str_content}')
|
||||||
|
str_content = filter_control_characters(str_content)
|
||||||
content_cell.paragraphs[0].add_run(str_content)
|
content_cell.paragraphs[0].add_run(str_content)
|
||||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||||
if is_send:
|
if is_send:
|
||||||
@ -77,7 +106,7 @@ class DocxExporter(ExporterBase):
|
|||||||
display_name = self.get_display_name(is_send, message)
|
display_name = self.get_display_name(is_send, message)
|
||||||
avatar = self.get_avatar_path(is_send, message, True)
|
avatar = self.get_avatar_path(is_send, message, True)
|
||||||
content_cell = self.create_table(doc, is_send, avatar)
|
content_cell = self.create_table(doc, is_send, avatar)
|
||||||
content_cell.paragraphs[0].add_run('【表情包】')
|
content_cell.paragraphs[0].add_run('【语音】')
|
||||||
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
content_cell.paragraphs[0].font_size = shared.Inches(0.5)
|
||||||
if is_send:
|
if is_send:
|
||||||
p = content_cell.paragraphs[0]
|
p = content_cell.paragraphs[0]
|
||||||
@ -233,7 +262,6 @@ class DocxExporter(ExporterBase):
|
|||||||
avatar = self.get_avatar_path(is_send, message)
|
avatar = self.get_avatar_path(is_send, message)
|
||||||
display_name = self.get_display_name(is_send, message)
|
display_name = self.get_display_name(is_send, message)
|
||||||
|
|
||||||
|
|
||||||
def share_card(self, doc, message):
|
def share_card(self, doc, message):
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.contact.remark}"
|
||||||
is_send = message[4]
|
is_send = message[4]
|
||||||
@ -260,6 +288,7 @@ class DocxExporter(ExporterBase):
|
|||||||
app_logo = './image/' + os.path.basename(app_logo)
|
app_logo = './image/' + os.path.basename(app_logo)
|
||||||
else:
|
else:
|
||||||
app_logo = ''
|
app_logo = ''
|
||||||
|
|
||||||
def merge_docx(self, conRemark, n):
|
def merge_docx(self, conRemark, n):
|
||||||
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}"
|
origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}"
|
||||||
all_file_path = []
|
all_file_path = []
|
||||||
|
@ -59,6 +59,9 @@ def parser_chatroom_message(messages):
|
|||||||
message.append(ContactDefault(wxid))
|
message.append(ContactDefault(wxid))
|
||||||
updated_messages.append(tuple(message))
|
updated_messages.append(tuple(message))
|
||||||
continue
|
continue
|
||||||
|
# todo 解析还是有问题,会出现这种带:的东西
|
||||||
|
if ':' in wxid: # wxid_ewi8gfgpp0eu22:25319:1
|
||||||
|
wxid = wxid.split(':')[0]
|
||||||
contact_info_list = micro_msg_db.get_contact_by_username(wxid)
|
contact_info_list = micro_msg_db.get_contact_by_username(wxid)
|
||||||
if contact_info_list is None: # 群聊中已退群的联系人不会保存在数据库里
|
if contact_info_list is None: # 群聊中已退群的联系人不会保存在数据库里
|
||||||
message.append(ContactDefault(wxid))
|
message.append(ContactDefault(wxid))
|
||||||
|
@ -116,15 +116,16 @@ class ExporterBase(QThread):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_avatar_path(self, is_send, message, is_absolute_path=False) -> str:
|
def get_avatar_path(self, is_send, message, is_absolute_path=False) -> str:
|
||||||
if self.contact.is_chatroom:
|
|
||||||
avatar = message[12].smallHeadImgUrl
|
|
||||||
else:
|
|
||||||
avatar = Me().smallHeadImgUrl if is_send else self.contact.smallHeadImgUrl
|
|
||||||
if is_absolute_path:
|
if is_absolute_path:
|
||||||
if self.contact.is_chatroom:
|
if self.contact.is_chatroom:
|
||||||
avatar = message[12].avatar_path
|
avatar = message[12].avatar_path
|
||||||
else:
|
else:
|
||||||
avatar = Me().avatar_path if is_send else self.contact.avatar_path
|
avatar = Me().avatar_path if is_send else self.contact.avatar_path
|
||||||
|
else:
|
||||||
|
if self.contact.is_chatroom:
|
||||||
|
avatar = message[12].smallHeadImgUrl
|
||||||
|
else:
|
||||||
|
avatar = Me().smallHeadImgUrl if is_send else self.contact.smallHeadImgUrl
|
||||||
return avatar
|
return avatar
|
||||||
|
|
||||||
def get_display_name(self, is_send, message) -> str:
|
def get_display_name(self, is_send, message) -> str:
|
||||||
|
Loading…
Reference in New Issue
Block a user