mirror of
https://github.com/LC044/WeChatMsg
synced 2024-11-15 06:11:19 +08:00
142 lines
4.4 KiB
Python
142 lines
4.4 KiB
Python
|
import difflib
|
||
|
import os
|
||
|
import typing
|
||
|
import urllib.request
|
||
|
|
||
|
import simplejson as json
|
||
|
|
||
|
|
||
|
class FuzzyDict(dict):
|
||
|
"""Provides a dictionary that performs fuzzy lookup"""
|
||
|
|
||
|
def __init__(self, cutoff: float = 0.6):
|
||
|
"""Construct a new FuzzyDict instance
|
||
|
|
||
|
items is an dictionary to copy items from (optional)
|
||
|
cutoff is the match ratio below which matches should not be considered
|
||
|
cutoff needs to be a float between 0 and 1 (where zero is no match
|
||
|
and 1 is a perfect match)"""
|
||
|
super(FuzzyDict, self).__init__()
|
||
|
self.cutoff = cutoff
|
||
|
|
||
|
# short wrapper around some super (dict) methods
|
||
|
self._dict_contains = lambda key: super(FuzzyDict, self).__contains__(key)
|
||
|
self._dict_getitem = lambda key: super(FuzzyDict, self).__getitem__(key)
|
||
|
|
||
|
def _search(self, lookfor: typing.Any, stop_on_first: bool = False):
|
||
|
"""Returns the value whose key best matches lookfor
|
||
|
|
||
|
if stop_on_first is True then the method returns as soon
|
||
|
as it finds the first item
|
||
|
"""
|
||
|
|
||
|
# if the item is in the dictionary then just return it
|
||
|
if self._dict_contains(lookfor):
|
||
|
return True, lookfor, self._dict_getitem(lookfor), 1
|
||
|
|
||
|
# set up the fuzzy matching tool
|
||
|
ratio_calc = difflib.SequenceMatcher()
|
||
|
ratio_calc.set_seq1(lookfor)
|
||
|
|
||
|
# test each key in the dictionary
|
||
|
best_ratio = 0
|
||
|
best_match = None
|
||
|
best_key = None
|
||
|
for key in self:
|
||
|
# if the current key is not a string
|
||
|
# then we just skip it
|
||
|
try:
|
||
|
# set up the SequenceMatcher with other text
|
||
|
ratio_calc.set_seq2(key)
|
||
|
except TypeError:
|
||
|
continue
|
||
|
|
||
|
# we get an error here if the item to look for is not a
|
||
|
# string - if it cannot be fuzzy matched and we are here
|
||
|
# this it is definitely not in the dictionary
|
||
|
try:
|
||
|
# calculate the match value
|
||
|
ratio = ratio_calc.ratio()
|
||
|
except TypeError:
|
||
|
break
|
||
|
|
||
|
# if this is the best ratio so far - save it and the value
|
||
|
if ratio > best_ratio:
|
||
|
best_ratio = ratio
|
||
|
best_key = key
|
||
|
best_match = self._dict_getitem(key)
|
||
|
|
||
|
if stop_on_first and ratio >= self.cutoff:
|
||
|
break
|
||
|
|
||
|
return best_ratio >= self.cutoff, best_key, best_match, best_ratio
|
||
|
|
||
|
def __contains__(self, item: typing.Any):
|
||
|
if self._search(item, True)[0]:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
def __getitem__(self, lookfor: typing.Any):
|
||
|
matched, key, item, ratio = self._search(lookfor)
|
||
|
|
||
|
if not matched:
|
||
|
raise KeyError(
|
||
|
"'%s'. closest match: '%s' with ratio %.3f"
|
||
|
% (str(lookfor), str(key), ratio)
|
||
|
)
|
||
|
|
||
|
return item
|
||
|
|
||
|
|
||
|
__HERE = os.path.abspath(os.path.dirname(__file__))
|
||
|
with open(os.path.join(__HERE, "map_filename.json"), "r", encoding="utf8") as f:
|
||
|
FILENAMES: FuzzyDict = FuzzyDict()
|
||
|
for k, v in json.load(f).items():
|
||
|
FILENAMES[k] = v
|
||
|
|
||
|
with open(os.path.join(__HERE, "city_coordinates.json"), "r", encoding="utf8") as f:
|
||
|
COORDINATES: FuzzyDict = FuzzyDict()
|
||
|
for k, v in json.load(f).items():
|
||
|
COORDINATES[k] = v
|
||
|
|
||
|
EXTRA = {}
|
||
|
|
||
|
|
||
|
def register_url(asset_url: str):
|
||
|
if asset_url:
|
||
|
registry = asset_url + "/registry.json"
|
||
|
try:
|
||
|
contents = urllib.request.urlopen(registry).read()
|
||
|
contents = json.loads(contents)
|
||
|
except Exception as e:
|
||
|
raise e
|
||
|
files = {}
|
||
|
pinyin_names = set()
|
||
|
for name, pinyin in contents["PINYIN_MAP"].items():
|
||
|
file_name = contents["FILE_MAP"][pinyin]
|
||
|
files[name] = [file_name, "js"]
|
||
|
pinyin_names.add(pinyin)
|
||
|
|
||
|
for key, file_name in contents["FILE_MAP"].items():
|
||
|
if key not in pinyin_names:
|
||
|
# English names
|
||
|
files[key] = [file_name, "js"]
|
||
|
|
||
|
js_folder_name = contents["JS_FOLDER"]
|
||
|
if js_folder_name == "/":
|
||
|
js_file_prefix = f"{asset_url}/"
|
||
|
else:
|
||
|
js_file_prefix = f"{asset_url}/{js_folder_name}/"
|
||
|
EXTRA[js_file_prefix] = files
|
||
|
|
||
|
|
||
|
def register_files(asset_files: dict):
|
||
|
if asset_files:
|
||
|
FILENAMES.update(asset_files)
|
||
|
|
||
|
|
||
|
def register_coords(coords: dict):
|
||
|
if coords:
|
||
|
COORDINATES.update(coords)
|