commit 808f1f61000c418a7f63a79946cdd269151e2cd8 Author: shikong <919411476@qq.com> Date: Wed May 31 10:38:42 2023 +0800 驾考宝典题库 计划整改ing diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d62eb2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,241 @@ +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/car.iml b/.idea/car.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/car.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/git_toolbox_prj.xml b/.idea/git_toolbox_prj.xml new file mode 100644 index 0000000..02b915b --- /dev/null +++ b/.idea/git_toolbox_prj.xml @@ -0,0 +1,15 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..06bb031 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d0b5817 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..7a4b8a4 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..70595ab --- /dev/null +++ b/backend/main.py @@ -0,0 +1,72 @@ +import distutils +import logging + +from flask import Flask, current_app, request +from flask_sqlalchemy import SQLAlchemy + +from utils.model import model_list +from utils.common import strtobool +from orm.jiakaobaodian import JiaKaoBaoDian +from response.response import response + +app = Flask(__name__) +formatter = logging.Formatter("[%(asctime)s][%(filename)s:%(lineno)d][%(levelname)s][%(thread)d] - %(message)s") + +# 输出到文件 +# handler = TimedRotatingFileHandler("logs.log", +# when="D", +# interval=1, +# backupCount=0, +# encoding="UTF-8", +# delay=False, +# utc=True,) + +handler = logging.StreamHandler() +handler.setFormatter(formatter) +app.logger.handlers[0].setFormatter(formatter) +app.logger.setLevel(logging.DEBUG) +# 设置数据库连接地址 +app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:12341234@10.10.10.200:3306/car' +# 是否追踪数据库修改(开启后会触发一些钩子函数) 一般不开启, 会影响性能 +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False +# 是否显示底层执行的SQL语句 +app.config['SQLALCHEMY_ECHO'] = True +db = SQLAlchemy(app) + + +@app.get("/health") +def health(): + current_app.logger.info("666") + return response("OK") + + +@app.get("/car/list") +def car_all(): + args = request.args + page = int(args.get("page", 1)) + size = int(args.get("size", 10)) + keyword = str(args.get("keyword", "")) + order_by_wrong_rate = strtobool(args.get("sortByWrongRate", 'True')) + current_app.logger.info("page => %d, size => %d, keyword => %s", page, size, keyword) + + query = db.session.query(JiaKaoBaoDian) + if len(keyword) > 0: + query = query.filter(JiaKaoBaoDian.question.like("%%%s%%" % keyword)) + if order_by_wrong_rate: + query = query.order_by(JiaKaoBaoDian.wrong_rate.desc()) + + total = query.count() + + offset = (page - 1) * size + data = query.offset(offset).limit(size).all() + current_app.logger.info("data num %d", len(data)) + return response({ + "data": model_list(data), + "page": page, + "size": size, + "total": total + }) + + +if __name__ == '__main__': + app.run(host="0.0.0.0", port=5000, debug=True) diff --git a/backend/orm/__init__.py b/backend/orm/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/backend/orm/__init__.py @@ -0,0 +1 @@ + diff --git a/backend/orm/jiakaobaodian.py b/backend/orm/jiakaobaodian.py new file mode 100644 index 0000000..4ef1036 --- /dev/null +++ b/backend/orm/jiakaobaodian.py @@ -0,0 +1,34 @@ +from sqlalchemy import Column,Integer, String, Double +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() + + +class JiaKaoBaoDian(Base): + __tablename__ = 'jiaKaoBaoDian' + + id = Column(Integer, primary_key=True) + answer = Column(String) + media_content = Column(String, name="mediaContent") + question = Column(String) + question_id = Column(Integer, name="questionId") + option_a = Column(String, name="optionA") + option_b = Column(String, name="optionB") + option_c = Column(String, name="optionC") + option_d = Column(String, name="optionD") + option_e = Column(String, name="optionE") + option_f = Column(String, name="optionF") + option_g = Column(String, name="optionG") + option_h = Column(String, name="optionH") + keywords = Column(String) + illiteracy_explain = Column(String, name="illiteracyExplain") + concise_explain = Column(String, name="conciseExplain") + explain = Column(String) + knack = Column(String) + wrong_rate = Column(Double, name="wrongRate") + + def to_json(self): + _dict = self.__dict__ + if "_sa_instance_state" in _dict: + del _dict["_sa_instance_state"] + return _dict diff --git a/backend/orm/main.py b/backend/orm/main.py new file mode 100644 index 0000000..bf84260 --- /dev/null +++ b/backend/orm/main.py @@ -0,0 +1,22 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from jiakaobaodian import JiaKaoBaoDian + +db = create_engine('mysql+pymysql://', connect_args={ + 'user': "root", + 'password': "12341234", + 'host': '10.10.10.200', + 'port': 3306, + 'database': 'car', + 'charset': 'utf8mb4' +}) +if __name__ == '__main__': + _session = sessionmaker(db) + session = _session() + + data = session.query(JiaKaoBaoDian).filter(JiaKaoBaoDian.question.like("%驾驶%")).all() + for item in data: + print(item.to_json()) + + session.close() diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..817013f Binary files /dev/null and b/backend/requirements.txt differ diff --git a/backend/response/__init__.py b/backend/response/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/response/response.py b/backend/response/response.py new file mode 100644 index 0000000..21f84a9 --- /dev/null +++ b/backend/response/response.py @@ -0,0 +1,9 @@ +from flask import jsonify + + +def response(data: any, code=200, msg="OK"): + return jsonify({ + "code": code, + "data": data, + "msg": msg, + }) diff --git a/backend/utils/__init__.py b/backend/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/utils/common.py b/backend/utils/common.py new file mode 100644 index 0000000..c76aa0f --- /dev/null +++ b/backend/utils/common.py @@ -0,0 +1,13 @@ +def strtobool(val): + """Convert a string representation of truth to true (1) or false (0). + True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values + are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if + 'val' is anything else. + """ + val = val.lower() + if val in ('y', 'yes', 't', 'true', 'on', '1'): + return 1 + elif val in ('n', 'no', 'f', 'false', 'off', '0'): + return 0 + else: + raise ValueError("invalid truth value %r" % (val,)) diff --git a/backend/utils/model.py b/backend/utils/model.py new file mode 100644 index 0000000..8a0be1f --- /dev/null +++ b/backend/utils/model.py @@ -0,0 +1,9 @@ +def model_list(result): + _list = [] + for row in result: + _dict = {} + for k, v in row.__dict__.items(): + if not k.startswith('_sa_instance_state'): + _dict[k] = v + _list.append(_dict) + return _list diff --git a/jiakaobaodian.sql b/jiakaobaodian.sql new file mode 100644 index 0000000..b92beee --- /dev/null +++ b/jiakaobaodian.sql @@ -0,0 +1,24 @@ +CREATE TABLE `car`.`jiaKaoBaoDian` +( + `answer` int NOT NULL COMMENT '答案 单选题时 (2 ** (3+n)) n 为 选项下标 A: 0 B: 1 以此类推 A对应二进制 0001 0000 B 对应 0010 0000 C对应 0100 0000 D对应 1000 0000', + `id` int NOT NULL COMMENT 'id', + `mediaContent` varchar(500) NULL COMMENT '媒体资源', + `question` varchar(500) NOT NULL COMMENT '题目', + `questionId` int NOT NULL COMMENT '题目id', + `optionA` varchar(255) NULL COMMENT 'A选项', + `optionB` varchar(255) NULL COMMENT 'B选项', + `optionC` varchar(255) NULL COMMENT 'C选项', + `optionD` varchar(255) NULL COMMENT 'D选项', + `optionE` varchar(255) NULL COMMENT 'E选项', + `optionF` varchar(255) NULL COMMENT 'F选项', + `optionG` varchar(255) NULL COMMENT 'G选项', + `optionH` varchar(255) NULL COMMENT 'H选项', + `keywords` varchar(255) NULL COMMENT '关键字', + `illiteracyExplain` varchar(500) NULL COMMENT '相关规定', + `conciseExplain` varchar(255) NULL COMMENT '简单解析', + `explain` varchar(500) NULL COMMENT '解析', + `knack` varchar(500) NULL COMMENT '窍门', + `wrongRate` decimal(19,16) DEFAULT 0 COMMENT '错误率', + PRIMARY KEY (`id`), + UNIQUE INDEX `idx_question_id` (`questionId`) +); diff --git a/main_old.py b/main_old.py new file mode 100644 index 0000000..4ac1ebb --- /dev/null +++ b/main_old.py @@ -0,0 +1,70 @@ +import requests +import pymysql +import json +import time +import re +import os + +url = "https://mnks.jxedt.com/get_question?r=0.46514869467754005&index={}" +db = pymysql.connect(host="10.10.10.100",port=3306, user="root", password="12341234", database="gofiber") +cursor = db.cursor() + +TABLE = "car" + +def loads_str(data_str): + try: + result = json.loads(data_str) + return result + except Exception as e: + error_index = re.findall(r"char (\d+)\)", str(e)) + if error_index: + error_str = data_str[int(error_index[0])] + data_str = data_str.replace(error_str, "") + # 该处将处理结果继续递归处理 + return loads_str(data_str) + +for i in range(1, 10000): + if i % 1000 == 0: + time.sleep(3) + + # 请求链接 + response = requests.get(url.format(i)) + # 加载数据,过滤escape + question_data = loads_str(response.text) + + try: + if question_data["question"] != "": + sql = "INSERT INTO %s (`id`, `question`, `options`, `image`, `answer`, `bestanswer`, `type`, `a`, `b`, `c`, `d`, `e`, `f`, `g`, `cid`) " % TABLE + sql += "VALUES (%d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d );" % ( + question_data["id"], + question_data["question"], + question_data["options"], + question_data["imageurl"], + question_data["ta"], + question_data["bestanswer"], + question_data["type"], + question_data["a"], + question_data["b"], + question_data["c"], + question_data["d"], + question_data["e"], + question_data["f"], + question_data["g"], + question_data["cid"], + ) + + try: + # 执行sql语句 + cursor.execute(sql) + # 提交到数据库执行 + db.commit() + except: + continue + else: + continue + except: + print("index %d 存在错误", i) + continue + +# 查询所有问题,解析,正确答案 +# SELECT id, question, IF(answer = 1,a, IF(answer = 2,b,IF(answer = 3,c,d))) as answer,bestanswer,answer as answer_id from car WHERE a != ""; \ No newline at end of file diff --git a/spider.py b/spider.py new file mode 100644 index 0000000..d0653fe --- /dev/null +++ b/spider.py @@ -0,0 +1,147 @@ +import random +import time + +import execjs +import pymysql +import requests + +url = { + "sequence": "https://api2.jiakaobaodian.com/api/open/exercise/sequence.htm", + "question": "https://api2.jiakaobaodian.com/api/open/question/question-list.htm", +} + +cityCode = 440500 + +TABLE = "jiaKaoBaoDian" + + +def calc_r(): + ctx = execjs.compile(""" +s = function (t) { + var a, i, o = Math.abs(parseInt((new Date).getTime() * Math.random() * 1e4)).toString(), n = 0; + for (a = 0; a < o.length; a++) + n += parseInt(o[a]); + return i = function(t) { + return function(a, i) { + return 0 >= i - "" + a.length ? a : (t[i] || (t[i] = Array(i + 1).join(0))) + a + } + }([]), + n += o.length, + n = i(n, 3 - n.toString().length), + t.toString() + o + n +} +function get_r() { + return s(1) +} +""") + return ctx.call("get_r") + + +def get_sequence(): + _r = calc_r() + + resp = requests.get(url["sequence"], params={ + "_r": _r, + "carStyle": "xiaoche", + "carType": "car", + "cityCode": cityCode, + "course": "kemu1", + "kemuStyyle": "kemu1", + "_": 0.08272777821960653 + }) + + if resp.status_code != 200: + print(resp.text) + raise BaseException("获取 题库 信息失败") + + question_list = [] + data = resp.json()["data"] + + question_list = question_list + data + print("获取 题库信息 共计 %d 道题" % len(question_list)) + + return question_list + + +def get_question(questions): + retry_time = 5 + for i in range(retry_time): + try: + _r = calc_r() + params = { + "_r": _r, + "carType": "car", + "course": "kemu1", + "_": 0.06614633144515003, + "questionIds": str(questions).lstrip("[").rstrip("]").replace(" ", "") + } + + resp = requests.get(url["question"], params) + if resp.status_code != 200: + print(resp.text) + raise BaseException("获取 题目 信息失败") + + data = resp.json()["data"] + print("获取 %s 道题目 信息成功" % len(data)) + + return data + except BaseException as e: + if i == (retry_time - 1): + print("%s 重试 %d 次" % (questions, (i + 1))) + raise e + else: + continue + + +if __name__ == '__main__': + db = pymysql.connect(host="10.10.10.200", port=3306, user="root", password="12341234", database="car") + cursor = db.cursor() + + try: + _question_list = get_sequence() + _question_num_of_part = 20 + _counter = 0 + _throttling = 10 + while len(_question_list) != 0: + _part_questions = _question_list[:_question_num_of_part] + _question_list = _question_list[_question_num_of_part:] + + _data = get_question(_part_questions) + _counter += 1 + if _counter == _throttling: + time.sleep(5 + (random.random() * 10)) + + for _datum in _data: + sql = "INSERT INTO %s (`answer`,`id`,`mediaContent`,`question`,`questionId`,`optionA`,`optionB`,`optionC`,`optionD`,`optionE`,`optionF`,`optionG`,`optionH`,`keywords`,`illiteracyExplain`,`conciseExplain`,`explain`,`knack`,`wrongRate`)" % TABLE + sql += "VALUES (%d, %d, '%s', '%s', %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', " \ + "'%s', '%s', '%s', '%s', '%s', %f);" % ( + _datum["answer"], + _datum["id"], + _datum["mediaContent"], + _datum["question"], + _datum["questionId"], + _datum["optionA"], + _datum["optionB"], + _datum["optionC"], + _datum["optionD"], + _datum["optionE"], + _datum["optionF"], + _datum["optionG"], + _datum["optionH"], + _datum["keywords"], + _datum["illiteracyExplain"], + _datum["conciseExplain"], + _datum["explain"], + _datum["knackDetail"], + _datum["wrongRate"], + ) + try: + # 执行sql语句 + cursor.execute(sql) + # 提交到数据库执行 + db.commit() + except Exception as e: + continue + finally: + cursor.close() + db.close() diff --git a/test.js b/test.js new file mode 100644 index 0000000..227f5cf --- /dev/null +++ b/test.js @@ -0,0 +1,17 @@ +s = function (t) { + var a, i, o = Math.abs(parseInt((new Date).getTime() * Math.random() * 1e4)).toString(), n = 0; + for (a = 0; a < o.length; a++) + n += parseInt(o[a]); + return i = function (t) { + return function (a, i) { + return 0 >= i - "" + a.length ? a : (t[i] || (t[i] = Array(i + 1).join(0))) + a + } + }([]), + n += o.length, + n = i(n, 3 - n.toString().length), + t.toString() + o + n +} + +function get_r() { + return s(1) +} diff --git a/test.py b/test.py new file mode 100644 index 0000000..dada95c --- /dev/null +++ b/test.py @@ -0,0 +1,38 @@ +import random +import time +import execjs + +if __name__ == '__main__': + a = [] + for x in range(100): + a.append(x) + + print(a) + + x = 10 + while len(a) != 0: + print(str(a[:x]).lstrip("[").rstrip("]")) + a = a[x:] + + print("%d %f" % (1.1, 1.1)) + + ctx = execjs.compile(""" +s = function (t) { + var a, i, o = Math.abs(parseInt((new Date).getTime() * Math.random() * 1e4)).toString(), n = 0; + for (a = 0; a < o.length; a++) + n += parseInt(o[a]); + return i = function(t) { + return function(a, i) { + return 0 >= i - "" + a.length ? a : (t[i] || (t[i] = Array(i + 1).join(0))) + a + } + }([]), + n += o.length, + n = i(n, 3 - n.toString().length), + t.toString() + o + n +} +function get_r() { + return s(1) +} +""") + print(ctx.call("get_r")) +