import os import xml.etree.ElementTree as ET import requests root_path = './data/emoji/' if not os.path.exists('./data'): os.mkdir('./data') if not os.path.exists(root_path): os.mkdir(root_path) def get_image_format(header): # 定义图片格式的 magic numbers image_formats = { b'\xFF\xD8\xFF': 'jpeg', b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': 'png', b'\x47\x49\x46': 'gif', b'\x42\x4D': 'bmp', # 添加其他图片格式的 magic numbers } # 判断文件的图片格式 for magic_number, image_format in image_formats.items(): if header.startswith(magic_number): return image_format # 如果无法识别格式,返回 None return None def parser_xml(xml_string): # Parse the XML string root = ET.fromstring(xml_string) emoji = root.find('./emoji') # Accessing attributes of the 'emoji' element fromusername = emoji.get('fromusername') tousername = emoji.get('tousername') md5 = emoji.get('md5') cdnurl = emoji.get('cdnurl') encrypturl = emoji.get('encrypturl') thumburl = emoji.get('thumburl') externurl = emoji.get('externurl') width = emoji.get('width') height = emoji.get('height') return { 'width': width, 'height': height, 'cdnurl': cdnurl, 'thumburl': thumburl if thumburl else cdnurl, 'md5': md5 } def download(url, output_dir, name, thumb=False): resp = requests.get(url) byte = resp.content image_format = get_image_format(byte[:8]) if image_format: if thumb: output_path = os.path.join(output_dir, 'th_' + name + '.' + image_format) else: output_path = os.path.join(output_dir, name + '.' + image_format) else: output_path = os.path.join(output_dir, name) with open(output_path, 'wb') as f: f.write(resp.content) return output_path def get_emoji(xml_string, thumb=True) -> str: emoji_info = parser_xml(xml_string) md5 = emoji_info['md5'] image_format = ['.png', '.gif', '.jpeg'] for f in image_format: prefix = 'th_' if thumb else '' file_path = os.path.join(root_path, prefix + md5 + f) if os.path.exists(file_path): return file_path url = emoji_info['thumburl'] if thumb else emoji_info['cdnurl'] print("下载表情包ing:", url) return download(url, root_path, md5, thumb) if __name__ == '__main__': xml_string = ' ' res1 = parser_xml(xml_string) print(res1, res1['md5']) # download(res1['cdnurl'], "./data/emoji/", res1['md5']) # download(res1['thumburl'], "./data/emoji/", res1['md5'], True) print(get_emoji(xml_string, True)) print(get_emoji(xml_string, False)) #