WeChatMsg/app/DataBase/msg.py

368 lines
12 KiB
Python
Raw Normal View History

2023-11-15 23:53:39 +08:00
import os.path
2023-12-03 21:25:50 +08:00
import random
2023-12-11 21:17:29 +08:00
import html
2023-11-15 23:53:39 +08:00
import sqlite3
import threading
import traceback
2023-11-18 13:25:56 +08:00
from pprint import pprint
2023-12-11 21:17:29 +08:00
import lz4.block
import html
import re
2023-11-15 23:53:39 +08:00
from app.log import logger
2023-11-29 21:23:44 +08:00
db_path = "./app/Database/Msg/MSG.db"
lock = threading.Lock()
2023-11-25 00:20:35 +08:00
2023-11-16 22:39:59 +08:00
2023-11-25 00:20:35 +08:00
def is_database_exist():
2023-11-29 21:23:44 +08:00
return os.path.exists(db_path)
2023-11-25 00:20:35 +08:00
def singleton(cls):
_instance = {}
def inner():
if cls not in _instance:
_instance[cls] = cls()
return _instance[cls]
return inner
2023-12-03 21:25:50 +08:00
class MsgType:
TEXT = 1
IMAGE = 3
EMOJI = 47
class Msg:
def __init__(self):
self.DB = None
self.cursor = None
self.open_flag = False
self.init_database()
2023-12-03 21:25:50 +08:00
def init_database(self, path=None):
global db_path
if not self.open_flag:
2023-12-03 21:25:50 +08:00
if path:
db_path = path
if os.path.exists(db_path):
self.DB = sqlite3.connect(db_path, check_same_thread=False)
# '''创建游标'''
self.cursor = self.DB.cursor()
self.open_flag = True
if lock.locked():
lock.release()
def get_messages(self, username_):
if not self.open_flag:
return None
sql = '''
2023-12-11 21:17:29 +08:00
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
2023-12-07 11:57:08 +08:00
where StrTalker=?
order by CreateTime
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchall()
finally:
lock.release()
result.sort(key=lambda x: x[5])
2023-12-11 23:04:59 +08:00
return result
def get_messages_all(self):
sql = '''
2023-12-11 21:17:29 +08:00
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,StrTalker,Reserved1,CompressContent
from MSG
order by CreateTime
'''
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchall()
finally:
lock.release()
result.sort(key=lambda x: x[5])
2023-12-11 23:04:59 +08:00
return result
def get_messages_length(self):
sql = '''
select count(*)
group by MsgSvrID
from MSG
'''
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql)
result = self.cursor.fetchone()
except Exception as e:
result = None
finally:
lock.release()
return result[0]
def get_message_by_num(self, username_, local_id):
sql = '''
2023-12-11 21:17:29 +08:00
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
from MSG
2023-12-07 11:57:08 +08:00
where StrTalker = ? and localId < ?
order by CreateTime desc
2023-12-07 20:54:28 +08:00
limit 20
'''
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, local_id])
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
# result.sort(key=lambda x: x[5])
2023-12-11 23:04:59 +08:00
return result
2023-12-10 22:00:19 +08:00
def get_messages_by_type(self, username_, type_, is_Annual_report_=False, year_='2023'):
2023-12-03 21:25:50 +08:00
if not self.open_flag:
return None
2023-12-10 22:00:19 +08:00
if is_Annual_report_:
sql = '''
2023-12-11 21:17:29 +08:00
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
2023-12-10 22:00:19 +08:00
from MSG
where StrTalker=? and Type=? and strftime('%Y',CreateTime,'unixepoch','localtime') = ?
order by CreateTime
'''
else:
sql = '''
2023-12-11 21:17:29 +08:00
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra,CompressContent
2023-12-03 21:25:50 +08:00
from MSG
2023-12-07 11:57:08 +08:00
where StrTalker=? and Type=?
2023-12-03 21:25:50 +08:00
order by CreateTime
'''
try:
lock.acquire(True)
2023-12-10 22:00:19 +08:00
if is_Annual_report_:
self.cursor.execute(sql, [username_, type_, year_])
else:
self.cursor.execute(sql, [username_, type_])
2023-12-03 21:25:50 +08:00
result = self.cursor.fetchall()
finally:
lock.release()
2023-12-12 10:38:43 +08:00
return result
2023-12-03 21:25:50 +08:00
def get_messages_by_keyword(self, username_, keyword, num=5, max_len=10):
2023-12-03 21:25:50 +08:00
if not self.open_flag:
return None
sql = '''
2023-12-07 21:34:27 +08:00
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID,BytesExtra
2023-12-03 21:25:50 +08:00
from MSG
2023-12-07 11:57:08 +08:00
where StrTalker=? and Type=1 and LENGTH(StrContent)<? and StrContent like ?
2023-12-03 21:25:50 +08:00
order by CreateTime desc
'''
temp = []
try:
lock.acquire(True)
self.cursor.execute(sql, [username_, max_len, f'%{keyword}%'])
2023-12-03 21:25:50 +08:00
messages = self.cursor.fetchall()
finally:
lock.release()
if len(messages) > 5:
messages = random.sample(messages, num)
try:
lock.acquire(True)
for msg in messages:
local_id = msg[0]
is_send = msg[4]
sql = '''
select localId,TalkerId,Type,SubType,IsSender,CreateTime,Status,StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime,MsgSvrID
from MSG
where localId > ? and StrTalker=? and Type=1 and IsSender=?
limit 1
'''
self.cursor.execute(sql, [local_id, username_, 1 - is_send])
temp.append((msg, self.cursor.fetchone()))
finally:
lock.release()
res = []
for dialog in temp:
msg1 = dialog[0]
msg2 = dialog[1]
2023-12-13 20:45:53 +08:00
try:
res.append((
(msg1[4], msg1[5], msg1[7].split(keyword), msg1[8]),
(msg2[4], msg2[5], msg2[7], msg2[8])
))
except TypeError:
res.append((
('', '', ['', ''], ''),
('', '', '', '')
))
print(keyword,res)
2023-12-03 21:25:50 +08:00
return res
2023-12-10 22:00:19 +08:00
def get_messages_by_days(self, username_, is_Annual_report_=False, year_='2023'):
if is_Annual_report_:
sql = '''
SELECT strftime('%Y-%m-%d',CreateTime,'unixepoch','localtime') as days,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ? AND strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?
)
2023-12-10 22:00:19 +08:00
group by days
'''
else:
sql = '''
SELECT strftime('%Y-%m-%d',CreateTime,'unixepoch','localtime') as days,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ?
)
2023-12-10 22:00:19 +08:00
group by days
'''
2023-12-07 17:22:44 +08:00
result = None
if not self.open_flag:
return None
2023-12-07 16:51:15 +08:00
try:
lock.acquire(True)
2023-12-10 22:00:19 +08:00
if is_Annual_report_:
self.cursor.execute(sql, [username_, year_])
else:
self.cursor.execute(sql, [username_])
2023-12-07 16:51:15 +08:00
result = self.cursor.fetchall()
finally:
lock.release()
return result
2023-12-07 20:35:51 +08:00
2023-12-10 22:00:19 +08:00
def get_messages_by_month(self, username_, is_Annual_report_=False, year_='2023'):
if is_Annual_report_:
sql = '''
SELECT strftime('%Y-%m',CreateTime,'unixepoch','localtime') as days,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ? AND strftime('%Y', CreateTime, 'unixepoch', 'localtime') = ?
)
group by days
2023-12-10 22:00:19 +08:00
'''
else:
sql = '''
2023-12-07 19:26:37 +08:00
SELECT strftime('%Y-%m',CreateTime,'unixepoch','localtime') as days,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
WHERE StrTalker = ?
)
2023-12-07 19:26:37 +08:00
group by days
'''
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
2023-12-10 22:00:19 +08:00
if is_Annual_report_:
self.cursor.execute(sql, [username_, year_])
else:
self.cursor.execute(sql, [username_])
2023-12-07 19:26:37 +08:00
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
# result.sort(key=lambda x: x[5])
return result
2023-12-07 16:51:15 +08:00
2023-12-10 22:00:19 +08:00
def get_messages_by_hour(self, username_, is_Annual_report_=False, year_='2023'):
if is_Annual_report_:
sql = '''
SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
2023-12-10 22:00:19 +08:00
where StrTalker = ? and strftime('%Y',CreateTime,'unixepoch','localtime') = ?
)
group by hours
2023-12-10 22:00:19 +08:00
'''
else:
sql = '''
2023-12-07 19:48:59 +08:00
SELECT strftime('%H:00',CreateTime,'unixepoch','localtime') as hours,count(MsgSvrID)
from (
SELECT MsgSvrID, CreateTime
FROM MSG
where StrTalker = ?
)
2023-12-07 19:48:59 +08:00
group by hours
'''
result = None
if not self.open_flag:
return None
try:
lock.acquire(True)
2023-12-10 22:00:19 +08:00
if is_Annual_report_:
self.cursor.execute(sql, [username_, year_])
else:
self.cursor.execute(sql, [username_])
2023-12-07 19:48:59 +08:00
result = self.cursor.fetchall()
except sqlite3.DatabaseError:
logger.error(f'{traceback.format_exc()}\n数据库损坏请删除msg文件夹重试')
finally:
lock.release()
# result.sort(key=lambda x: x[5])
return result
2023-12-07 16:51:15 +08:00
2023-12-05 00:13:20 +08:00
def get_first_time_of_message(self, username_):
if not self.open_flag:
return None
sql = '''
select StrContent,strftime('%Y-%m-%d %H:%M:%S',CreateTime,'unixepoch','localtime') as StrTime
from MSG
2023-12-07 11:57:08 +08:00
where StrTalker=?
2023-12-05 00:13:20 +08:00
order by CreateTime
limit 1
'''
try:
lock.acquire(True)
self.cursor.execute(sql, [username_])
result = self.cursor.fetchone()
finally:
lock.release()
return result
def close(self):
if self.open_flag:
try:
lock.acquire(True)
self.open_flag = False
self.DB.close()
finally:
lock.release()
def __del__(self):
self.close()
2023-11-16 22:39:59 +08:00
if __name__ == '__main__':
db_path = "./Msg/MSG.db"
msg = Msg()
msg.init_database()
2023-12-11 22:49:17 +08:00
result = msg.get_message_by_num('wxid_vtz9jk9ulzjt22', 9999999)
print(result)
2023-12-11 22:49:17 +08:00
result = msg.get_messages_by_type('wxid_vtz9jk9ulzjt22',49)
for r in result:
type_ = r[2]
sub_type = r[3]
if type_ == 49 and sub_type == 57:
2023-12-11 23:04:59 +08:00
print(r)
print(r[-1])
2023-12-12 10:38:43 +08:00
break