mirror of
https://github.com/LC044/WeChatMsg
synced 2024-11-09 09:31:18 +08:00
新增导出AI对话专用TXT
This commit is contained in:
parent
8e33a2db57
commit
8be7d0d34e
@ -3,6 +3,7 @@ import random
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import threading
|
import threading
|
||||||
import traceback
|
import traceback
|
||||||
|
from collections import defaultdict
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
@ -222,7 +223,7 @@ class Msg:
|
|||||||
# result.sort(key=lambda x: x[5])
|
# result.sort(key=lambda x: x[5])
|
||||||
# return self.add_sender(result)
|
# return self.add_sender(result)
|
||||||
|
|
||||||
def get_messages_all(self,time_range=None):
|
def get_messages_all(self, time_range=None):
|
||||||
if time_range:
|
if time_range:
|
||||||
start_time, end_time = convert_to_timestamp(time_range)
|
start_time, end_time = convert_to_timestamp(time_range)
|
||||||
sql = f'''
|
sql = f'''
|
||||||
@ -242,6 +243,45 @@ class Msg:
|
|||||||
result.sort(key=lambda x: x[5])
|
result.sort(key=lambda x: x[5])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def get_messages_group_by_day(
|
||||||
|
self,
|
||||||
|
username_: str,
|
||||||
|
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
||||||
|
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
return dict {
|
||||||
|
date: messages
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if not self.open_flag:
|
||||||
|
return {}
|
||||||
|
if time_range:
|
||||||
|
start_time, end_time = convert_to_timestamp(time_range)
|
||||||
|
sql = f'''
|
||||||
|
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent,DisplayContent
|
||||||
|
from MSG
|
||||||
|
where StrTalker=? AND type=1
|
||||||
|
{'AND CreateTime>' + str(start_time) + ' AND CreateTime<' + str(end_time) if time_range else ''}
|
||||||
|
order by CreateTime;
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
lock.acquire(True)
|
||||||
|
self.cursor.execute(sql, [username_])
|
||||||
|
result = self.cursor.fetchall()
|
||||||
|
finally:
|
||||||
|
lock.release()
|
||||||
|
result = parser_chatroom_message(result) if username_.__contains__('@chatroom') else result
|
||||||
|
|
||||||
|
# 按天分组存储聊天记录
|
||||||
|
grouped_results = defaultdict(list)
|
||||||
|
for row in result:
|
||||||
|
'2024-01-01'
|
||||||
|
date = row[8][:10] # 获取日期部分
|
||||||
|
grouped_results[date].append(row) # 将消息加入对应的日期列表中
|
||||||
|
|
||||||
|
return grouped_results
|
||||||
|
|
||||||
def get_messages_length(self):
|
def get_messages_length(self):
|
||||||
sql = '''
|
sql = '''
|
||||||
select count(*)
|
select count(*)
|
||||||
@ -329,7 +369,7 @@ class Msg:
|
|||||||
result = self.cursor.fetchall()
|
result = self.cursor.fetchall()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10,time_range=None, year_='all'):
|
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10, time_range=None, year_='all'):
|
||||||
if not self.open_flag:
|
if not self.open_flag:
|
||||||
return None
|
return None
|
||||||
if time_range:
|
if time_range:
|
||||||
@ -491,7 +531,7 @@ class Msg:
|
|||||||
lock.release()
|
lock.release()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_messages_by_hour(self, username_, time_range=None,year_='all'):
|
def get_messages_by_hour(self, username_, time_range=None, year_='all'):
|
||||||
result = []
|
result = []
|
||||||
if not self.open_flag:
|
if not self.open_flag:
|
||||||
return result
|
return result
|
||||||
@ -535,7 +575,7 @@ class Msg:
|
|||||||
lock.release()
|
lock.release()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_latest_time_of_message(self, username_='', time_range=None,year_='all'):
|
def get_latest_time_of_message(self, username_='', time_range=None, year_='all'):
|
||||||
if not self.open_flag:
|
if not self.open_flag:
|
||||||
return None
|
return None
|
||||||
if time_range:
|
if time_range:
|
||||||
@ -743,7 +783,7 @@ class Msg:
|
|||||||
def get_send_messages_number_by_hour(
|
def get_send_messages_number_by_hour(
|
||||||
self,
|
self,
|
||||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
||||||
)->list:
|
) -> list:
|
||||||
"""
|
"""
|
||||||
统计每个(小时)时段自己总共发了多少消息,从最多到最少排序\n
|
统计每个(小时)时段自己总共发了多少消息,从最多到最少排序\n
|
||||||
return be like [('23', 9526), ('00', 7890), ('22', 7600), ..., ('05', 29)]
|
return be like [('23', 9526), ('00', 7890), ('22', 7600), ..., ('05', 29)]
|
||||||
@ -773,11 +813,12 @@ class Msg:
|
|||||||
finally:
|
finally:
|
||||||
lock.release()
|
lock.release()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_message_length(
|
def get_message_length(
|
||||||
self,
|
self,
|
||||||
username_='',
|
username_='',
|
||||||
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
time_range: Tuple[int | float | str | date, int | float | str | date] = None,
|
||||||
)->int:
|
) -> int:
|
||||||
"""
|
"""
|
||||||
统计自己总共发消息的字数,包含type=1的文本和type=49,subtype=57里面自己发的文本
|
统计自己总共发消息的字数,包含type=1的文本和type=49,subtype=57里面自己发的文本
|
||||||
"""
|
"""
|
||||||
@ -806,9 +847,9 @@ class Msg:
|
|||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
lock.acquire(True)
|
lock.acquire(True)
|
||||||
self.cursor.execute(sql_type_1,[username_])
|
self.cursor.execute(sql_type_1, [username_])
|
||||||
result_type_1 = self.cursor.fetchall()[0][0]
|
result_type_1 = self.cursor.fetchall()[0][0]
|
||||||
self.cursor.execute(sql_type_49,[username_])
|
self.cursor.execute(sql_type_49, [username_])
|
||||||
result_type_49 = self.cursor.fetchall()
|
result_type_49 = self.cursor.fetchall()
|
||||||
except sqlite3.DatabaseError:
|
except sqlite3.DatabaseError:
|
||||||
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
|
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
|
||||||
@ -822,6 +863,7 @@ class Msg:
|
|||||||
sum_type_49 += len(content["title"])
|
sum_type_49 += len(content["title"])
|
||||||
sum_type_1 = result_type_1 if result_type_1 else 0
|
sum_type_1 = result_type_1 if result_type_1 else 0
|
||||||
return sum_type_1 + sum_type_49
|
return sum_type_1 + sum_type_49
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.open_flag:
|
if self.open_flag:
|
||||||
try:
|
try:
|
||||||
|
@ -45,6 +45,7 @@ class ContactInfo(QWidget, Ui_Form):
|
|||||||
self.toCSVAct = QAction(Icon.ToCSV, '导出CSV', self)
|
self.toCSVAct = QAction(Icon.ToCSV, '导出CSV', self)
|
||||||
self.toHtmlAct = QAction(Icon.ToHTML, '导出HTML', self)
|
self.toHtmlAct = QAction(Icon.ToHTML, '导出HTML', self)
|
||||||
self.toTxtAct = QAction(Icon.ToTXT, '导出TXT', self)
|
self.toTxtAct = QAction(Icon.ToTXT, '导出TXT', self)
|
||||||
|
self.toAiTxtAct = QAction(Icon.ToTXT, '导出AI对话专用TXT', self)
|
||||||
self.toJsonAct = QAction(Icon.ToTXT, '导出json', self)
|
self.toJsonAct = QAction(Icon.ToTXT, '导出json', self)
|
||||||
self.toolButton_output.setPopupMode(QToolButton.MenuButtonPopup)
|
self.toolButton_output.setPopupMode(QToolButton.MenuButtonPopup)
|
||||||
self.toolButton_output.clicked.connect(self.toolButton_show)
|
self.toolButton_output.clicked.connect(self.toolButton_show)
|
||||||
@ -52,6 +53,7 @@ class ContactInfo(QWidget, Ui_Form):
|
|||||||
menu.addAction(self.toCSVAct)
|
menu.addAction(self.toCSVAct)
|
||||||
menu.addAction(self.toHtmlAct)
|
menu.addAction(self.toHtmlAct)
|
||||||
menu.addAction(self.toTxtAct)
|
menu.addAction(self.toTxtAct)
|
||||||
|
menu.addAction(self.toAiTxtAct)
|
||||||
menu.addAction(self.toJsonAct)
|
menu.addAction(self.toJsonAct)
|
||||||
self.toolButton_output.setMenu(menu)
|
self.toolButton_output.setMenu(menu)
|
||||||
self.toolButton_output.setIcon(Icon.Output)
|
self.toolButton_output.setIcon(Icon.Output)
|
||||||
@ -61,6 +63,7 @@ class ContactInfo(QWidget, Ui_Form):
|
|||||||
self.toCSVAct.triggered.connect(self.output)
|
self.toCSVAct.triggered.connect(self.output)
|
||||||
self.toTxtAct.triggered.connect(self.output)
|
self.toTxtAct.triggered.connect(self.output)
|
||||||
self.toJsonAct.triggered.connect(self.output)
|
self.toJsonAct.triggered.connect(self.output)
|
||||||
|
self.toAiTxtAct.triggered.connect(self.output)
|
||||||
|
|
||||||
def set_contact(self, contact: Contact):
|
def set_contact(self, contact: Contact):
|
||||||
self.view_userinfo.set_contact(contact)
|
self.view_userinfo.set_contact(contact)
|
||||||
@ -129,6 +132,9 @@ class ContactInfo(QWidget, Ui_Form):
|
|||||||
elif self.sender() == self.toTxtAct:
|
elif self.sender() == self.toTxtAct:
|
||||||
dialog = ExportDialog(self.contact, title='选择导出的消息类型', file_type='txt', parent=self)
|
dialog = ExportDialog(self.contact, title='选择导出的消息类型', file_type='txt', parent=self)
|
||||||
result = dialog.exec_() # 使用exec_()获取用户的操作结果
|
result = dialog.exec_() # 使用exec_()获取用户的操作结果
|
||||||
|
elif self.sender() == self.toAiTxtAct:
|
||||||
|
dialog = ExportDialog(self.contact, title='选择导出的消息类型', file_type='ai_txt', parent=self)
|
||||||
|
result = dialog.exec_() # 使用exec_()获取用户的操作结果
|
||||||
elif self.sender() == self.toJsonAct:
|
elif self.sender() == self.toJsonAct:
|
||||||
dialog = ExportDialog(self.contact, title='选择导出的消息类型', file_type='json', parent=self)
|
dialog = ExportDialog(self.contact, title='选择导出的消息类型', file_type='json', parent=self)
|
||||||
result = dialog.exec_() # 使用exec_()获取用户的操作结果
|
result = dialog.exec_() # 使用exec_()获取用户的操作结果
|
||||||
|
@ -62,6 +62,9 @@ class ExportDialog(QDialog, Ui_Dialog):
|
|||||||
self.export_choices = {"文本": True, "图片": True, "语音": True, "视频": True, "表情包": True,
|
self.export_choices = {"文本": True, "图片": True, "语音": True, "视频": True, "表情包": True,
|
||||||
'音乐与音频': True, '分享卡片': True, '文件': True,
|
'音乐与音频': True, '分享卡片': True, '文件': True,
|
||||||
'拍一拍等系统消息': True} # 定义导出的数据类型,默认全部选择
|
'拍一拍等系统消息': True} # 定义导出的数据类型,默认全部选择
|
||||||
|
elif file_type == 'ai_txt':
|
||||||
|
self.export_type = Output.AI_TXT
|
||||||
|
self.export_choices = {"文本": True} # 定义导出的数据类型,默认全部选择
|
||||||
elif file_type == 'docx':
|
elif file_type == 'docx':
|
||||||
self.export_type = Output.DOCX
|
self.export_type = Output.DOCX
|
||||||
self.export_choices = {"文本": True, "图片": False, "语音": False, "视频": False,
|
self.export_choices = {"文本": True, "图片": False, "语音": False, "视频": False,
|
||||||
|
96
app/util/exporter/exporter_ai_txt.py
Normal file
96
app/util/exporter/exporter_ai_txt.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from app.DataBase import msg_db
|
||||||
|
from app.util.compress_content import parser_reply, share_card
|
||||||
|
from app.util.exporter.exporter import ExporterBase
|
||||||
|
|
||||||
|
|
||||||
|
def remove_privacy_info(text):
|
||||||
|
# 正则表达式模式
|
||||||
|
patterns = {
|
||||||
|
'phone': r'\b(\+?86[-\s]?)?1[3-9]\d{9}\b', # 手机号
|
||||||
|
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # 邮箱
|
||||||
|
'id_card': r'\b\d{15}|\d{18}|\d{17}X\b', # 身份证号
|
||||||
|
'password': r'\b(?:password|pwd|pass|psw)[\s=:]*\S+\b', # 密码
|
||||||
|
'account': r'\b(?:account|username|user|acct)[\s=:]*\S+\b' # 账号
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, pattern in patterns.items():
|
||||||
|
text = re.sub(pattern, f'[{key} xxx]', text)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
class AiTxtExporter(ExporterBase):
|
||||||
|
last_is_send = -1
|
||||||
|
|
||||||
|
def title(self, message):
|
||||||
|
str_time = message[8]
|
||||||
|
is_send = message[4]
|
||||||
|
display_name = ''
|
||||||
|
if is_send != self.last_is_send:
|
||||||
|
display_name = '\n' + self.get_display_name(is_send, message) + ':'
|
||||||
|
self.last_is_send = is_send
|
||||||
|
return display_name
|
||||||
|
|
||||||
|
def text(self, doc, message):
|
||||||
|
str_content = remove_privacy_info(message[7])
|
||||||
|
doc.write(
|
||||||
|
f'''{self.title(message)}{str_content} '''
|
||||||
|
)
|
||||||
|
|
||||||
|
def image(self, doc, message):
|
||||||
|
doc.write(
|
||||||
|
f'''{self.title(message)}[图片]'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def audio(self, doc, message):
|
||||||
|
doc.write(
|
||||||
|
f'''{self.title(message)}[语音]'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def emoji(self, doc, message):
|
||||||
|
doc.write(
|
||||||
|
f'''{self.title(message)}[表情包]'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def file(self, doc, message):
|
||||||
|
doc.write(
|
||||||
|
f'''{self.title(message)}[文件]'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def system_msg(self, doc, message):
|
||||||
|
str_content = message[7]
|
||||||
|
str_time = message[8]
|
||||||
|
str_content = str_content.replace('<![CDATA[', "").replace(
|
||||||
|
' <a href="weixin://revoke_edit_click">重新编辑</a>]]>', "")
|
||||||
|
doc.write(
|
||||||
|
f'''{str_time} {str_content}'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def video(self, doc, message):
|
||||||
|
is_send = message[4]
|
||||||
|
doc.write(
|
||||||
|
f'''{self.title(message)}[视频]'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def export(self):
|
||||||
|
# 实现导出为txt的逻辑
|
||||||
|
print(f"【开始导出 TXT {self.contact.remark}】")
|
||||||
|
origin_path = self.origin_path
|
||||||
|
os.makedirs(origin_path, exist_ok=True)
|
||||||
|
filename = os.path.join(origin_path, self.contact.remark + '_chat.txt')
|
||||||
|
messages = msg_db.get_messages_group_by_day(self.contact.wxid, time_range=self.time_range)
|
||||||
|
total_steps = len(messages)
|
||||||
|
with open(filename, mode='w', newline='', encoding='utf-8') as f:
|
||||||
|
for date, messages in messages.items():
|
||||||
|
f.write(f"\n\n{'*' * 20}{date}{'*' * 20}\n")
|
||||||
|
for index, message in enumerate(messages):
|
||||||
|
type_ = message[2]
|
||||||
|
sub_type = message[3]
|
||||||
|
self.progressSignal.emit(int((index + 1) / total_steps * 100))
|
||||||
|
if type_ == 1 and self.message_types.get(type_):
|
||||||
|
self.text(f, message)
|
||||||
|
print(f"【完成导出 TXT {self.contact.remark}】")
|
||||||
|
self.okSignal.emit(1)
|
@ -10,6 +10,7 @@ from PyQt5.QtWidgets import QFileDialog
|
|||||||
from docx.oxml.ns import qn
|
from docx.oxml.ns import qn
|
||||||
from docxcompose.composer import Composer
|
from docxcompose.composer import Composer
|
||||||
|
|
||||||
|
from app.util.exporter.exporter_ai_txt import AiTxtExporter
|
||||||
from app.util.exporter.exporter_csv import CSVExporter
|
from app.util.exporter.exporter_csv import CSVExporter
|
||||||
from app.util.exporter.exporter_docx import DocxExporter
|
from app.util.exporter.exporter_docx import DocxExporter
|
||||||
from app.util.exporter.exporter_html import HtmlExporter
|
from app.util.exporter.exporter_html import HtmlExporter
|
||||||
@ -44,6 +45,7 @@ class Output(QThread):
|
|||||||
CONTACT_CSV = 4
|
CONTACT_CSV = 4
|
||||||
TXT = 5
|
TXT = 5
|
||||||
JSON = 6
|
JSON = 6
|
||||||
|
AI_TXT = 7
|
||||||
Batch = 10086
|
Batch = 10086
|
||||||
|
|
||||||
def __init__(self, contact, type_=DOCX, message_types={}, sub_type=[], time_range=None, parent=None):
|
def __init__(self, contact, type_=DOCX, message_types={}, sub_type=[], time_range=None, parent=None):
|
||||||
@ -158,6 +160,9 @@ class Output(QThread):
|
|||||||
elif type_ == self.TXT:
|
elif type_ == self.TXT:
|
||||||
# print('批量导出txt')
|
# print('批量导出txt')
|
||||||
self.to_txt(contact, self.message_types, True)
|
self.to_txt(contact, self.message_types, True)
|
||||||
|
elif type_ == self.AI_TXT:
|
||||||
|
# print('批量导出txt')
|
||||||
|
self.to_ai_txt(contact, self.message_types, True)
|
||||||
elif type_ == self.CSV:
|
elif type_ == self.CSV:
|
||||||
self.to_csv(contact, self.message_types, True)
|
self.to_csv(contact, self.message_types, True)
|
||||||
elif type_ == self.HTML:
|
elif type_ == self.HTML:
|
||||||
@ -232,6 +237,15 @@ class Output(QThread):
|
|||||||
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||||
Child.start()
|
Child.start()
|
||||||
|
|
||||||
|
def to_ai_txt(self, contact, message_types, is_batch=False):
|
||||||
|
Child = AiTxtExporter(contact, type_=self.TXT, message_types=message_types, time_range=self.time_range)
|
||||||
|
self.children.append(Child)
|
||||||
|
Child.progressSignal.connect(self.progress)
|
||||||
|
if not is_batch:
|
||||||
|
Child.rangeSignal.connect(self.rangeSignal)
|
||||||
|
Child.okSignal.connect(self.okSignal if not is_batch else self.batch_finish_one)
|
||||||
|
Child.start()
|
||||||
|
|
||||||
def to_html(self, contact, message_types, is_batch=False):
|
def to_html(self, contact, message_types, is_batch=False):
|
||||||
Child = HtmlExporter(contact, type_=self.output_type, message_types=message_types, time_range=self.time_range)
|
Child = HtmlExporter(contact, type_=self.output_type, message_types=message_types, time_range=self.time_range)
|
||||||
self.children.append(Child)
|
self.children.append(Child)
|
||||||
@ -284,6 +298,8 @@ class Output(QThread):
|
|||||||
self.contact_to_csv()
|
self.contact_to_csv()
|
||||||
elif self.output_type == self.TXT:
|
elif self.output_type == self.TXT:
|
||||||
self.to_txt(self.contact, self.message_types)
|
self.to_txt(self.contact, self.message_types)
|
||||||
|
elif self.output_type == self.AI_TXT:
|
||||||
|
self.to_ai_txt(self.contact, self.message_types)
|
||||||
elif self.output_type == self.CSV:
|
elif self.output_type == self.CSV:
|
||||||
self.to_csv(self.contact, self.message_types)
|
self.to_csv(self.contact, self.message_types)
|
||||||
elif self.output_type == self.HTML:
|
elif self.output_type == self.HTML:
|
||||||
|
Loading…
Reference in New Issue
Block a user