import os import re import time import docx import numpy as np import pandas as pd import xmltodict from PyQt5.QtCore import * from docx import shared from docx.enum.table import WD_ALIGN_VERTICAL from docx.enum.text import WD_COLOR_INDEX, WD_PARAGRAPH_ALIGNMENT from docxcompose.composer import Composer from app import person from app.DataBase import data from app.log import log # import data def IS_5_min(last_m, now_m): """ #! 判断两次聊天时间是不是大于五分钟 #! 若大于五分钟则显示时间 #! 否则不显示 """ '''两次聊天记录时间差,单位是秒''' dt = now_m - last_m return abs(dt // 1000) >= 300 def time_format(timestamp): ''' #! 将字符串类型的时间戳转换成日期 #! 返回格式化的时间字符串 #! %Y-%m-%d %H:%M:%S ''' timestamp = timestamp / 1000 time_tuple = time.localtime(timestamp) return time.strftime("%Y-%m-%d %H:%M:%S", time_tuple) class Output(QThread): """ 发送信息线程 """ progressSignal = pyqtSignal(int) rangeSignal = pyqtSignal(int) okSignal = pyqtSignal(int) i = 1 CSV = 0 DOCX = 1 HTML = 2 def __init__(self, Me: person.Me, ta_u, parent=None, type_=DOCX): super().__init__(parent) self.Me = Me self.sec = 2 # 默认1000秒 self.ta_username = ta_u self.my_avatar = self.Me.avatar_path self.ta_avatar = data.get_avator(ta_u) self.msg_id = 0 self.output_type = type_ self.total_num = 0 @log def merge_docx(self, conRemark, n): origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}" all_file_path = [] for i in range(n): file_name = f"{conRemark}{i}.docx" all_file_path.append(origin_docx_path + '/' + file_name) filename = f"{conRemark}.docx" # print(all_file_path) doc = docx.Document() doc.save(origin_docx_path + '/' + filename) master = docx.Document(origin_docx_path + '/' + filename) middle_new_docx = Composer(master) num = 0 for word in all_file_path: word_document = docx.Document(word) word_document.add_page_break() if num != 0: middle_new_docx.append(word_document) num = num + 1 os.remove(word) middle_new_docx.save(origin_docx_path + '/' + filename) def progress(self, value): self.i += 1 # 处理完成之后将多个文件合并 if self.i == self.total_num: QThread.sleep(1) conRemark = data.get_conRemark(self.ta_username) self.progressSignal.emit(self.total_num - 1) self.merge_docx(conRemark, self.n) print('ok') self.progressSignal.emit(self.total_num) self.okSignal.emit(1) self.progressSignal.emit(self.i) @log def to_csv(self, conRemark, path): origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}" messages = data.get_all_message(self.ta_username) # print(messages) self.Child0 = ChildThread(self.Me, self.ta_username, messages, conRemark, 0, type_=ChildThread.CSV) self.Child0.progressSignal.connect(self.progress) self.Child0.start() print("成功导出CSV文件:", origin_docx_path) def run(self): conRemark = data.get_conRemark(self.ta_username) data.mkdir(f"{os.path.abspath('.')}/data/聊天记录/{conRemark}") if self.output_type == self.DOCX: self.Child = {} if 1: messages = data.get_all_message(self.ta_username) self.total_num = len(messages) self.rangeSignal.emit(self.total_num) l = len(messages) self.n = 10 for i in range(self.n): q = i * (l // self.n) p = (i + 1) * (l // self.n) if i == self.n - 1: p = l len_data = messages[q:p] # self.to_docx(len_data, i, conRemark) self.Child[i] = ChildThread(self.Me, self.ta_username, len_data, conRemark, i) self.Child[i].progressSignal.connect(self.progress) self.Child[i].start() elif self.output_type == self.CSV: # print("线程导出csv") # self.to_csv(self.ta_username, "path") origin_docx_path = f"{os.path.abspath('.')}/data/聊天记录/{self.ta_username}" messages = data.get_all_message(self.ta_username) # print(messages) self.Child0 = ChildThread(self.Me, self.ta_username, messages, conRemark, 0, type_=ChildThread.CSV) self.Child0.progressSignal.connect(self.progress) self.Child0.run() self.okSignal.emit(1) class ChildThread(QThread): """ 子线程,用于导出部分聊天记录 """ progressSignal = pyqtSignal(int) rangeSignal = pyqtSignal(int) i = 1 CSV = 0 DOCX = 1 HTML = 2 def __init__(self, Me: person.Me, ta_u, message, conRemark, num, parent=None, type_=DOCX): super().__init__(parent) self.Me = Me self.sec = 2 # 默认1000秒 self.ta_username = ta_u self.num = num self.my_avatar = self.Me.avatar_path self.ta_avatar = data.get_avator(ta_u) self.conRemark = conRemark self.message = message self.msg_id = 0 self.output_type = type_ def create_table(self, doc, isSend): ''' #! 创建一个1*2表格 #! isSend = 1 (0,0)存聊天内容,(0,1)存头像 #! isSend = 0 (0,0)存头像,(0,1)存聊天内容 #! 返回聊天内容的坐标 ''' table = doc.add_table(rows=1, cols=2, style='Normal Table') table.cell(0, 1).height = shared.Inches(0.5) table.cell(0, 0).height = shared.Inches(0.5) text_size = 1 if isSend: '''表格右对齐''' table.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT avatar = table.cell(0, 1).paragraphs[0].add_run() '''插入头像,设置头像宽度''' avatar.add_picture(self.my_avatar, width=shared.Inches(0.5)) '''设置单元格宽度跟头像一致''' table.cell(0, 1).width = shared.Inches(0.5) content_cell = table.cell(0, 0) '''聊天内容右对齐''' content_cell.paragraphs[0].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT else: avatar = table.cell(0, 0).paragraphs[0].add_run() avatar.add_picture(self.ta_avatar, width=shared.Inches(0.5)) '''设置单元格宽度''' table.cell(0, 0).width = shared.Inches(0.5) content_cell = table.cell(0, 1) '''聊天内容垂直居中对齐''' content_cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER return content_cell def text(self, doc, isSend, message, status): if status == 5: message += '(未发出) ' content_cell = self.create_table(doc, isSend) content_cell.paragraphs[0].add_run(message) content_cell.paragraphs[0].font_size = shared.Inches(0.5) # self.self_text.emit(message) if isSend: p = content_cell.paragraphs[0] p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT doc.add_paragraph() def image(self, doc, isSend, Type, content, imgPath): ''' #! 插入聊天图片 #! isSend = 1 只有缩略图 #! isSend = 0 有原图 :param doc: :param isSend: :param Type: :param content: :param imgPath: :return: ''' content = self.create_table(doc, isSend) run = content.paragraphs[0].add_run() if Type == 3: imgPath = imgPath.split('th_')[1] imgPath = f'./app/data/image2/{imgPath[0:2]}/{imgPath[2:4]}/th_{imgPath}' imgPath = data.clearImagePath(imgPath) try: run.add_picture(f'{imgPath}', height=shared.Inches(2)) doc.add_paragraph() except Exception: print("Error!image") # run.add_picture(f'{Path}/{imgPath}', height=shared.Inches(2)) def emoji(self, doc, isSend, content, imgPath): ''' #! 添加表情包 :param isSend: :param content: :param imgPath: :return: ''' imgPath = data.get_emoji(imgPath) if 1: is_Exist = os.path.exists(imgPath) self.image(doc, isSend, Type=47, content=content, imgPath=imgPath) def wx_file(self, doc, isSend, content, status): ''' #! 添加微信文件 :param isSend: :param content: :param status: :return: ''' pattern = re.compile(r"(.*?)<") r = pattern.search(content).group() filename = r.lstrip('<title>').rstrip('<') self.text(doc, isSend, filename, status) def retract_message(self, doc, isSend, content, status): ''' #! 显示撤回消息 :param isSend: :param content: :param status: :return: ''' paragraph = doc.add_paragraph(content) paragraph.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER def reply(self, doc, isSend, content, status): ''' #! 添加回复信息 :param isSend: :param content: :param status: :return: ''' pattern1 = re.compile(r"<title>(?P<title>(.*?))") title = pattern1.search(content).groupdict()['title'] pattern2 = re.compile(r"(?P(.*?))") displayname = pattern2.search(content).groupdict()['displayname'] '''匹配回复的回复''' pattern3 = re.compile(r"\n?title>(?P(.*?))\n?</title>") if not pattern3.search(content): if isSend == 0: '''匹配对方的回复''' pattern3 = re.compile(r"(?P(.*?))") else: '''匹配自己的回复''' pattern3 = re.compile(r"\n?(?P(.*?))\n?") '''这部分代码完全可以用if代替''' try: '''试错''' text = pattern3.search(content).groupdict()['content'] except Exception: try: '''试错''' text = pattern3.search(content).groupdict()['content'] except Exception: '''试错''' pattern3 = re.compile(r"\n?(?P(.*?))\n?") '''试错''' if pattern3.search(content): text = pattern3.search(content).groupdict()['content'] else: text = '图片' if status == 5: message = '(未发出) ' + '' content_cell = self.create_table(doc, isSend) content_cell.paragraphs[0].add_run(title) content_cell.paragraphs[0].font_size = shared.Inches(0.5) reply_p = content_cell.add_paragraph() run = content_cell.paragraphs[1].add_run(displayname + ':' + text) '''设置被回复内容格式''' run.font.color.rgb = shared.RGBColor(121, 121, 121) run.font_size = shared.Inches(0.3) run.font.highlight_color = WD_COLOR_INDEX.GRAY_25 if isSend: p = content_cell.paragraphs[0] p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT reply_p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT doc.add_paragraph() def pat_a_pat(self, doc, isSend, content, status): """ #! 添加拍一拍信息 todo 把wxid转化成昵称 :param isSend: :param content: :param status: :return: """ try: pat_data = xmltodict.parse(content) pat_data = pat_data['msg']['appmsg']['patMsg']['records']['record'] fromUser = pat_data['fromUser'] pattedUser = pat_data['pattedUser'] template = pat_data['template'] template = ''.join(template.split('${pattedusername@textstatusicon}')) template = ''.join(template.split('${fromusername@textstatusicon}')) template = template.replace(f'${{{fromUser}}}', data.get_conRemark(fromUser)) template = template.replace(f'${{{pattedUser}}}', data.get_conRemark(pattedUser)) print(template) except Exception as e: print(e) template = '糟糕!出错了。' p = doc.add_paragraph() run = p.add_run(template) p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER '''设置拍一拍文字格式''' run.font.color.rgb = shared.RGBColor(121, 121, 121) run.font_size = shared.Inches(0.3) # run.font.highlight_color=WD_COLOR_INDEX.GRAY_25 def video(self, doc, isSend, content, status, img_path): print(content, img_path) def to_docx(self, messages, i, conRemark): '''创建联系人目录''' filename = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}/{conRemark}{i}.docx" doc = docx.Document() last_timestamp = 1601968667000 for message in messages: self.progressSignal.emit(self.i) self.i += 1 msgId = message[0] ta_username = message[7] Type = int(message[2]) isSend = message[4] content = message[8] imgPath = message[9] now_timestamp = message[6] status = message[3] createTime = time_format(now_timestamp) # print(createTime, isSend, content) if IS_5_min(last_timestamp, now_timestamp): doc.add_paragraph(createTime).alignment = WD_PARAGRAPH_ALIGNMENT.CENTER last_timestamp = now_timestamp if Type == 1: try: self.text(doc, isSend, content, status) except Exception as e: print(e) elif Type == 3: self.image(doc, isSend, 3, content, imgPath) elif Type == 47: self.emoji(doc, isSend, content, imgPath) elif Type == 1090519089: self.wx_file(doc, isSend, content, status) elif Type == 268445456: self.retract_message(doc, isSend, content, status) elif Type == 822083633: self.reply(doc, isSend, content, status) elif Type == 922746929: self.pat_a_pat(doc, isSend, content, status) elif Type == 43: # print(createTime) self.video(doc, isSend, content, status, imgPath) # doc.add_paragraph(str(i)) print(filename) doc.save(filename) def to_csv(self, messages, i, conRemark): '''创建联系人目录''' # print('123', messages) filename = f"{os.path.abspath('.')}/data/聊天记录/{conRemark}/{conRemark}.csv" last_timestamp = 1601968667000 columns = ["用户名", "消息内容", "发送时间", "发送状态", "消息类型", "isSend", "msgId"] df = pd.DataFrame() df["用户名"] = np.array(list(map(lambda x: x[7], messages))) df["消息内容"] = np.array(list(map(lambda x: x[8], messages))) df["发送时间"] = np.array(list(map(lambda x: time_format(x[6]), messages))) df["发送状态"] = np.array(list(map(lambda x: x[3], messages))) df["消息类型"] = np.array(list(map(lambda x: x[2], messages))) df["isSend"] = np.array(list(map(lambda x: x[4], messages))) df["msgId"] = np.array(list(map(lambda x: x[0], messages))) df.to_csv(filename) # df.to_csv('data.csv') print(df) self.progressSignal.emit(self.num) def to_html(self, messages, i, conRemark): pass def run(self): if self.output_type == self.DOCX: # print("导出docx") self.to_docx(self.message, self.num, self.conRemark) elif self.output_type == self.CSV: print("导出csv001") # print('00', self.message[0]) self.to_csv(self.message, self.num, self.conRemark) if __name__ == '__main__': # wxid_0o18ef858vnu22 # wxid_fdkbu92el15h22 me = data.Me_Person('wxid_fdkbu92el15h22') t = Output(Me=me, ta_u='wxid_0o18ef858vnu22', type_=Output.CSV) t.run()