Merge pull request #303 from STDquantum/master

修复emoji大部分会bug掉的bug
This commit is contained in:
SiYuan 2024-01-12 19:18:22 +08:00 committed by GitHub
commit 4d0b16d7d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -17,11 +17,12 @@ from PyQt5.QtGui import QPixmap
import requests import requests
from app.log import log, logger from app.log import log, logger
lock = threading.Lock() lock = threading.Lock()
db_path = "./app/Database/Msg/Emotion.db" db_path = "./app/Database/Msg/Emotion.db"
root_path = './data/emoji/' root_path = "./data/emoji/"
if not os.path.exists('./data'): if not os.path.exists("./data"):
os.mkdir('./data') os.mkdir("./data")
if not os.path.exists(root_path): if not os.path.exists(root_path):
os.mkdir(root_path) os.mkdir(root_path)
@ -30,10 +31,10 @@ if not os.path.exists(root_path):
def get_image_format(header): def get_image_format(header):
# 定义图片格式的 magic numbers # 定义图片格式的 magic numbers
image_formats = { image_formats = {
b'\xFF\xD8\xFF': 'jpeg', b"\xFF\xD8\xFF": "jpeg",
b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': 'png', b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A": "png",
b'\x47\x49\x46': 'gif', b"\x47\x49\x46": "gif",
b'\x42\x4D': 'bmp', b"\x42\x4D": "bmp",
# 添加其他图片格式的 magic numbers # 添加其他图片格式的 magic numbers
} }
# 判断文件的图片格式 # 判断文件的图片格式
@ -47,27 +48,32 @@ def get_image_format(header):
@log @log
def parser_xml(xml_string): def parser_xml(xml_string):
assert type(xml_string) == str assert type(xml_string) == str
root = ET.fromstring(xml_string) # Parse the XML string
emoji = root.find('./emoji') try:
root = ET.fromstring(xml_string)
except:
root = ET.fromstring(xml_string.replace("&", "&"))
emoji = root.find("./emoji")
# Accessing attributes of the 'emoji' element # Accessing attributes of the 'emoji' element
fromusername = emoji.get('fromusername') fromusername = emoji.get("fromusername")
tousername = emoji.get('tousername') tousername = emoji.get("tousername")
md5 = emoji.get('md5') md5 = emoji.get("md5")
cdnurl = emoji.get('cdnurl') cdnurl = emoji.get("cdnurl")
encrypturl = emoji.get('encrypturl') encrypturl = emoji.get("encrypturl")
thumburl = emoji.get('thumburl') thumburl = emoji.get("thumburl")
externurl = emoji.get('externurl') externurl = emoji.get("externurl")
androidmd5 = emoji.get('androidmd5') androidmd5 = emoji.get("androidmd5")
width = emoji.get('width') width = emoji.get("width")
height = emoji.get('height') height = emoji.get("height")
return { return {
'width': width, "width": width,
'height': height, "height": height,
'cdnurl': cdnurl, "cdnurl": cdnurl,
'thumburl': thumburl if thumburl else cdnurl, "thumburl": thumburl if thumburl else cdnurl,
'md5': (md5 if md5 else androidmd5).lower(), "md5": (md5 if md5 else androidmd5).lower(),
} }
def singleton(cls): def singleton(cls):
_instance = {} _instance = {}
@ -78,6 +84,7 @@ def singleton(cls):
return inner return inner
# 一定要保证只有一个实例对象 # 一定要保证只有一个实例对象
@singleton @singleton
class Emotion: class Emotion:
@ -98,9 +105,9 @@ class Emotion:
lock.release() lock.release()
def get_emoji_url(self, md5: str, thumb: bool) -> str | bytes: def get_emoji_url(self, md5: str, thumb: bool) -> str | bytes:
'''供下载用返回可能是url可能是bytes''' """供下载用返回可能是url可能是bytes"""
if thumb: if thumb:
sql = ''' sql = """
select select
case case
when thumburl is NULL or thumburl = '' then cdnurl when thumburl is NULL or thumburl = '' then cdnurl
@ -108,13 +115,13 @@ class Emotion:
end as selected_url end as selected_url
from CustomEmotion from CustomEmotion
where md5 = ? where md5 = ?
''' """
else: else:
sql = ''' sql = """
select CDNUrl select CDNUrl
from CustomEmotion from CustomEmotion
where md5 = ? where md5 = ?
''' """
try: try:
lock.acquire(True) lock.acquire(True)
self.cursor.execute(sql, [md5]) self.cursor.execute(sql, [md5])
@ -128,14 +135,14 @@ class Emotion:
""" """
self.cursor.execute(sql, [md5]) self.cursor.execute(sql, [md5])
res = self.cursor.fetchone() res = self.cursor.fetchone()
return res[0] if res else '' return res[0] if res else ""
finally: finally:
lock.release() lock.release()
def get_emoji_URL(self, md5: str, thumb: bool): def get_emoji_URL(self, md5: str, thumb: bool):
'''只管url另外的不管''' """只管url另外的不管"""
if thumb: if thumb:
sql = ''' sql = """
select select
case case
when thumburl is NULL or thumburl = '' then cdnurl when thumburl is NULL or thumburl = '' then cdnurl
@ -143,13 +150,13 @@ class Emotion:
end as selected_url end as selected_url
from CustomEmotion from CustomEmotion
where md5 = ? where md5 = ?
''' """
else: else:
sql = ''' sql = """
select CDNUrl select CDNUrl
from CustomEmotion from CustomEmotion
where md5 = ? where md5 = ?
''' """
try: try:
lock.acquire(True) lock.acquire(True)
self.cursor.execute(sql, [md5]) self.cursor.execute(sql, [md5])
@ -158,6 +165,7 @@ class Emotion:
return "" return ""
finally: finally:
lock.release() lock.release()
def close(self): def close(self):
if self.open_flag: if self.open_flag:
try: try:
@ -178,12 +186,12 @@ def download(url, output_dir, name, thumb=False):
image_format = get_image_format(byte[:8]) image_format = get_image_format(byte[:8])
if image_format: if image_format:
if thumb: if thumb:
output_path = os.path.join(output_dir, 'th_' + name + '.' + image_format) output_path = os.path.join(output_dir, "th_" + name + "." + image_format)
else: else:
output_path = os.path.join(output_dir, name + '.' + image_format) output_path = os.path.join(output_dir, name + "." + image_format)
else: else:
output_path = os.path.join(output_dir, name) output_path = os.path.join(output_dir, name)
with open(output_path, 'wb') as f: with open(output_path, "wb") as f:
f.write(resp.content) f.write(resp.content)
return output_path return output_path
@ -195,7 +203,7 @@ def get_most_emoji(messages):
emoji_info = parser_xml(str_content) emoji_info = parser_xml(str_content)
if emoji_info is None: if emoji_info is None:
continue continue
md5 = emoji_info['md5'] md5 = emoji_info["md5"]
if not md5: if not md5:
continue continue
try: try:
@ -205,11 +213,11 @@ def get_most_emoji(messages):
md5_nums = [(num[0], key, num[1]) for key, num in dic.items()] md5_nums = [(num[0], key, num[1]) for key, num in dic.items()]
md5_nums.sort(key=lambda x: x[0], reverse=True) md5_nums.sort(key=lambda x: x[0], reverse=True)
if not md5_nums: if not md5_nums:
return '', 0 return "", 0
md5 = md5_nums[0][1] md5 = md5_nums[0][1]
num = md5_nums[0][0] num = md5_nums[0][0]
emoji_info = md5_nums[0][2] emoji_info = md5_nums[0][2]
url = emoji_info['cdnurl'] url = emoji_info["cdnurl"]
if not url or url == "": if not url or url == "":
url = Emotion().get_emoji_url(md5, False) url = Emotion().get_emoji_url(md5, False)
return url, num return url, num
@ -219,14 +227,14 @@ def get_emoji(xml_string, thumb=True, output_path=root_path) -> str:
"""供下载用""" """供下载用"""
try: try:
emoji_info = parser_xml(xml_string) emoji_info = parser_xml(xml_string)
md5 = emoji_info['md5'] md5 = emoji_info["md5"]
image_format = ['.png', '.gif', '.jpeg'] image_format = [".png", ".gif", ".jpeg"]
for f in image_format: for f in image_format:
prefix = 'th_' if thumb else '' prefix = "th_" if thumb else ""
file_path = os.path.join(output_path, prefix + md5 + f) file_path = os.path.join(output_path, prefix + md5 + f)
if os.path.exists(file_path): if os.path.exists(file_path):
return file_path return file_path
url = emoji_info['thumburl'] if thumb else emoji_info['cdnurl'] url = emoji_info["thumburl"] if thumb else emoji_info["cdnurl"]
if not url or url == "": if not url or url == "":
url = Emotion().get_emoji_url(md5, thumb) url = Emotion().get_emoji_url(md5, thumb)
if type(url) == str and url != "": if type(url) == str and url != "":
@ -237,50 +245,53 @@ def get_emoji(xml_string, thumb=True, output_path=root_path) -> str:
image_format = get_image_format(url[:8]) image_format = get_image_format(url[:8])
if image_format: if image_format:
if thumb: if thumb:
output_path = os.path.join(output_path, 'th_' + md5 + '.' + image_format) output_path = os.path.join(
output_path, "th_" + md5 + "." + image_format
)
else: else:
output_path = os.path.join(output_path, md5 + '.' + image_format) output_path = os.path.join(output_path, md5 + "." + image_format)
else: else:
output_path = os.path.join(output_path, md5) output_path = os.path.join(output_path, md5)
with open(output_path, 'wb') as f: with open(output_path, "wb") as f:
f.write(url) f.write(url)
print("表情包数据库加载", output_path) print("表情包数据库加载", output_path)
return output_path return output_path
else: else:
print("!!!未知表情包数据,信息:", xml_string, emoji_info, url) print("!!!未知表情包数据,信息:", xml_string, emoji_info, url)
output_path = os.path.join(output_path, '404.png') output_path = os.path.join(output_path, "404.png")
if not os.path.exists(output_path): if not os.path.exists(output_path):
QPixmap(':/icons/icons/404.png').save(output_path) QPixmap(":/icons/icons/404.png").save(output_path)
return output_path return output_path
except: except:
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
output_path = os.path.join(output_path, "404.png") output_path = os.path.join(output_path, "404.png")
if not os.path.exists(output_path): if not os.path.exists(output_path):
QPixmap(':/icons/icons/404.png').save(output_path) QPixmap(":/icons/icons/404.png").save(output_path)
return output_path return output_path
def get_emoji_path(xml_string, thumb=True, output_path=root_path) -> str: def get_emoji_path(xml_string, thumb=True, output_path=root_path) -> str:
try: try:
emoji_info = parser_xml(xml_string) emoji_info = parser_xml(xml_string)
md5 = emoji_info['md5'] md5 = emoji_info["md5"]
image_format = ['.png', '.gif', '.jpeg'] image_format = [".png", ".gif", ".jpeg"]
for f in image_format: for f in image_format:
prefix = 'th_' if thumb else '' prefix = "th_" if thumb else ""
file_path = os.path.join(output_path, prefix + md5 + f) file_path = os.path.join(output_path, prefix + md5 + f)
return file_path return file_path
except: except:
logger.error(traceback.format_exc()) logger.error(traceback.format_exc())
output_path = os.path.join(output_path, "404.png") output_path = os.path.join(output_path, "404.png")
return output_path return output_path
def get_emoji_url(xml_string, thumb=True) -> str: def get_emoji_url(xml_string, thumb=True) -> str:
"""不管下载只返回url""" """不管下载只返回url"""
try: try:
emoji_info = parser_xml(xml_string) emoji_info = parser_xml(xml_string)
md5 = emoji_info['md5'] md5 = emoji_info["md5"]
url = emoji_info["thumburl" if thumb else "cdnurl"] url = emoji_info["thumburl" if thumb else "cdnurl"]
if not url or url == '': if not url or url == "":
url = Emotion().get_emoji_URL(md5=md5, thumb=thumb) url = Emotion().get_emoji_URL(md5=md5, thumb=thumb)
return url return url
except: except:
@ -289,7 +300,7 @@ def get_emoji_url(xml_string, thumb=True) -> str:
return output_path return output_path
if __name__ == '__main__': if __name__ == "__main__":
# xml_string = '<msg><emoji fromusername = "wxid_0o18ef858vnu22" tousername = "wxid_27hqbq7vx5hf22" type="2" idbuffer="media:0_0" md5="71ce49ed3ce9e57e43e07f802983bf45" len = "352588" productid="com.tencent.xin.emoticon.person.stiker_1678703862259eb01f2ef4a313" androidmd5="71ce49ed3ce9e57e43e07f802983bf45" androidlen="352588" s60v3md5 = "71ce49ed3ce9e57e43e07f802983bf45" s60v3len="352588" s60v5md5 = "71ce49ed3ce9e57e43e07f802983bf45" s60v5len="352588" cdnurl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=71ce49ed3ce9e57e43e07f802983bf45&amp;filekey=30350201010421301f020201060402535a041071ce49ed3ce9e57e43e07f802983bf45020305614c040d00000004627466730000000132&amp;hy=SZ&amp;storeid=263ffa00b000720d03274c5820000010600004f50535a1ca0c950b64287022&amp;bizid=1023" designerid = "" thumburl = "http://mmbiz.qpic.cn/mmemoticon/ajNVdqHZLLDSKTMRgM8agiadpFhKz9IJ3cD5Ra2sTROibOaShdt3D4z6PfE92WkjQY/0" encrypturl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=cbaae1d847aac6389652b65562bacaa2&amp;filekey=30350201010421301f020201060402535a0410cbaae1d847aac6389652b65562bacaa20203056150040d00000004627466730000000132&amp;hy=SZ&amp;storeid=263ffa00b0008d8223274c5820000010600004f50535a17b82910b64764739&amp;bizid=1023" aeskey= "7051ab2a34442dec63434832463f45ce" externurl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=960f68693454dfa64b9966ca5d70dbd3&amp;filekey=30340201010420301e020201060402535a0410960f68693454dfa64b9966ca5d70dbd3020221a0040d00000004627466730000000132&amp;hy=SZ&amp;storeid=26423dbe3000793a8720e40de0000010600004f50535a1d40c950b71be0a50&amp;bizid=1023" externmd5 = "41895664fc5a77878e2155fc96209a19" width= "240" height= "240" tpurl= "" tpauthkey= "" attachedtext= "" attachedtextcolor= "" lensid= "" emojiattr= "" linkid= "" desc= "ChEKB2RlZmF1bHQSBuWNlee6rw==" ></emoji> </msg>' # xml_string = '<msg><emoji fromusername = "wxid_0o18ef858vnu22" tousername = "wxid_27hqbq7vx5hf22" type="2" idbuffer="media:0_0" md5="71ce49ed3ce9e57e43e07f802983bf45" len = "352588" productid="com.tencent.xin.emoticon.person.stiker_1678703862259eb01f2ef4a313" androidmd5="71ce49ed3ce9e57e43e07f802983bf45" androidlen="352588" s60v3md5 = "71ce49ed3ce9e57e43e07f802983bf45" s60v3len="352588" s60v5md5 = "71ce49ed3ce9e57e43e07f802983bf45" s60v5len="352588" cdnurl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=71ce49ed3ce9e57e43e07f802983bf45&amp;filekey=30350201010421301f020201060402535a041071ce49ed3ce9e57e43e07f802983bf45020305614c040d00000004627466730000000132&amp;hy=SZ&amp;storeid=263ffa00b000720d03274c5820000010600004f50535a1ca0c950b64287022&amp;bizid=1023" designerid = "" thumburl = "http://mmbiz.qpic.cn/mmemoticon/ajNVdqHZLLDSKTMRgM8agiadpFhKz9IJ3cD5Ra2sTROibOaShdt3D4z6PfE92WkjQY/0" encrypturl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=cbaae1d847aac6389652b65562bacaa2&amp;filekey=30350201010421301f020201060402535a0410cbaae1d847aac6389652b65562bacaa20203056150040d00000004627466730000000132&amp;hy=SZ&amp;storeid=263ffa00b0008d8223274c5820000010600004f50535a17b82910b64764739&amp;bizid=1023" aeskey= "7051ab2a34442dec63434832463f45ce" externurl = "http://wxapp.tc.qq.com/262/20304/stodownload?m=960f68693454dfa64b9966ca5d70dbd3&amp;filekey=30340201010420301e020201060402535a0410960f68693454dfa64b9966ca5d70dbd3020221a0040d00000004627466730000000132&amp;hy=SZ&amp;storeid=26423dbe3000793a8720e40de0000010600004f50535a1d40c950b71be0a50&amp;bizid=1023" externmd5 = "41895664fc5a77878e2155fc96209a19" width= "240" height= "240" tpurl= "" tpauthkey= "" attachedtext= "" attachedtextcolor= "" lensid= "" emojiattr= "" linkid= "" desc= "ChEKB2RlZmF1bHQSBuWNlee6rw==" ></emoji> </msg>'
# res1 = parser_xml(xml_string) # res1 = parser_xml(xml_string)
# print(res1, res1['md5']) # print(res1, res1['md5'])