驾考宝典题库 计划整改ing

This commit is contained in:
shikong 2023-05-31 10:38:42 +08:00
commit 808f1f6100
23 changed files with 761 additions and 0 deletions

241
.gitignore vendored Normal file
View File

@ -0,0 +1,241 @@
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

9
.idea/car.iml Normal file
View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

15
.idea/git_toolbox_prj.xml Normal file
View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GitToolBoxProjectSettings">
<option name="commitMessageIssueKeyValidationOverride">
<BoolValueOverride>
<option name="enabled" value="true" />
</BoolValueOverride>
</option>
<option name="commitMessageValidationEnabledOverride">
<BoolValueOverride>
<option name="enabled" value="true" />
</BoolValueOverride>
</option>
</component>
</project>

View File

@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N802" />
</list>
</option>
</inspection_tool>
</profile>
</component>

6
.idea/misc.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_20" project-jdk-name="Python 3.10" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/car.iml" filepath="$PROJECT_DIR$/.idea/car.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

72
backend/main.py Normal file
View File

@ -0,0 +1,72 @@
import distutils
import logging
from flask import Flask, current_app, request
from flask_sqlalchemy import SQLAlchemy
from utils.model import model_list
from utils.common import strtobool
from orm.jiakaobaodian import JiaKaoBaoDian
from response.response import response
app = Flask(__name__)
formatter = logging.Formatter("[%(asctime)s][%(filename)s:%(lineno)d][%(levelname)s][%(thread)d] - %(message)s")
# 输出到文件
# handler = TimedRotatingFileHandler("logs.log",
# when="D",
# interval=1,
# backupCount=0,
# encoding="UTF-8",
# delay=False,
# utc=True,)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
app.logger.handlers[0].setFormatter(formatter)
app.logger.setLevel(logging.DEBUG)
# 设置数据库连接地址
app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:12341234@10.10.10.200:3306/car'
# 是否追踪数据库修改(开启后会触发一些钩子函数) 一般不开启, 会影响性能
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
# 是否显示底层执行的SQL语句
app.config['SQLALCHEMY_ECHO'] = True
db = SQLAlchemy(app)
@app.get("/health")
def health():
current_app.logger.info("666")
return response("OK")
@app.get("/car/list")
def car_all():
args = request.args
page = int(args.get("page", 1))
size = int(args.get("size", 10))
keyword = str(args.get("keyword", ""))
order_by_wrong_rate = strtobool(args.get("sortByWrongRate", 'True'))
current_app.logger.info("page => %d, size => %d, keyword => %s", page, size, keyword)
query = db.session.query(JiaKaoBaoDian)
if len(keyword) > 0:
query = query.filter(JiaKaoBaoDian.question.like("%%%s%%" % keyword))
if order_by_wrong_rate:
query = query.order_by(JiaKaoBaoDian.wrong_rate.desc())
total = query.count()
offset = (page - 1) * size
data = query.offset(offset).limit(size).all()
current_app.logger.info("data num %d", len(data))
return response({
"data": model_list(data),
"page": page,
"size": size,
"total": total
})
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5000, debug=True)

1
backend/orm/__init__.py Normal file
View File

@ -0,0 +1 @@

View File

@ -0,0 +1,34 @@
from sqlalchemy import Column,Integer, String, Double
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class JiaKaoBaoDian(Base):
__tablename__ = 'jiaKaoBaoDian'
id = Column(Integer, primary_key=True)
answer = Column(String)
media_content = Column(String, name="mediaContent")
question = Column(String)
question_id = Column(Integer, name="questionId")
option_a = Column(String, name="optionA")
option_b = Column(String, name="optionB")
option_c = Column(String, name="optionC")
option_d = Column(String, name="optionD")
option_e = Column(String, name="optionE")
option_f = Column(String, name="optionF")
option_g = Column(String, name="optionG")
option_h = Column(String, name="optionH")
keywords = Column(String)
illiteracy_explain = Column(String, name="illiteracyExplain")
concise_explain = Column(String, name="conciseExplain")
explain = Column(String)
knack = Column(String)
wrong_rate = Column(Double, name="wrongRate")
def to_json(self):
_dict = self.__dict__
if "_sa_instance_state" in _dict:
del _dict["_sa_instance_state"]
return _dict

22
backend/orm/main.py Normal file
View File

@ -0,0 +1,22 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from jiakaobaodian import JiaKaoBaoDian
db = create_engine('mysql+pymysql://', connect_args={
'user': "root",
'password': "12341234",
'host': '10.10.10.200',
'port': 3306,
'database': 'car',
'charset': 'utf8mb4'
})
if __name__ == '__main__':
_session = sessionmaker(db)
session = _session()
data = session.query(JiaKaoBaoDian).filter(JiaKaoBaoDian.question.like("%驾驶%")).all()
for item in data:
print(item.to_json())
session.close()

BIN
backend/requirements.txt Normal file

Binary file not shown.

View File

View File

@ -0,0 +1,9 @@
from flask import jsonify
def response(data: any, code=200, msg="OK"):
return jsonify({
"code": code,
"data": data,
"msg": msg,
})

View File

13
backend/utils/common.py Normal file
View File

@ -0,0 +1,13 @@
def strtobool(val):
"""Convert a string representation of truth to true (1) or false (0).
True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
'val' is anything else.
"""
val = val.lower()
if val in ('y', 'yes', 't', 'true', 'on', '1'):
return 1
elif val in ('n', 'no', 'f', 'false', 'off', '0'):
return 0
else:
raise ValueError("invalid truth value %r" % (val,))

9
backend/utils/model.py Normal file
View File

@ -0,0 +1,9 @@
def model_list(result):
_list = []
for row in result:
_dict = {}
for k, v in row.__dict__.items():
if not k.startswith('_sa_instance_state'):
_dict[k] = v
_list.append(_dict)
return _list

24
jiakaobaodian.sql Normal file
View File

@ -0,0 +1,24 @@
CREATE TABLE `car`.`jiaKaoBaoDian`
(
`answer` int NOT NULL COMMENT '答案 单选题时 (2 ** (3+n)) n 为 选项下标 A: 0 B: 1 以此类推 A对应二进制 0001 0000 B 对应 0010 0000 C对应 0100 0000 D对应 1000 0000',
`id` int NOT NULL COMMENT 'id',
`mediaContent` varchar(500) NULL COMMENT '媒体资源',
`question` varchar(500) NOT NULL COMMENT '题目',
`questionId` int NOT NULL COMMENT '题目id',
`optionA` varchar(255) NULL COMMENT 'A选项',
`optionB` varchar(255) NULL COMMENT 'B选项',
`optionC` varchar(255) NULL COMMENT 'C选项',
`optionD` varchar(255) NULL COMMENT 'D选项',
`optionE` varchar(255) NULL COMMENT 'E选项',
`optionF` varchar(255) NULL COMMENT 'F选项',
`optionG` varchar(255) NULL COMMENT 'G选项',
`optionH` varchar(255) NULL COMMENT 'H选项',
`keywords` varchar(255) NULL COMMENT '关键字',
`illiteracyExplain` varchar(500) NULL COMMENT '相关规定',
`conciseExplain` varchar(255) NULL COMMENT '简单解析',
`explain` varchar(500) NULL COMMENT '解析',
`knack` varchar(500) NULL COMMENT '窍门',
`wrongRate` decimal(19,16) DEFAULT 0 COMMENT '错误率',
PRIMARY KEY (`id`),
UNIQUE INDEX `idx_question_id` (`questionId`)
);

70
main_old.py Normal file
View File

@ -0,0 +1,70 @@
import requests
import pymysql
import json
import time
import re
import os
url = "https://mnks.jxedt.com/get_question?r=0.46514869467754005&index={}"
db = pymysql.connect(host="10.10.10.100",port=3306, user="root", password="12341234", database="gofiber")
cursor = db.cursor()
TABLE = "car"
def loads_str(data_str):
try:
result = json.loads(data_str)
return result
except Exception as e:
error_index = re.findall(r"char (\d+)\)", str(e))
if error_index:
error_str = data_str[int(error_index[0])]
data_str = data_str.replace(error_str, "<?>")
# 该处将处理结果继续递归处理
return loads_str(data_str)
for i in range(1, 10000):
if i % 1000 == 0:
time.sleep(3)
# 请求链接
response = requests.get(url.format(i))
# 加载数据过滤escape
question_data = loads_str(response.text)
try:
if question_data["question"] != "":
sql = "INSERT INTO %s (`id`, `question`, `options`, `image`, `answer`, `bestanswer`, `type`, `a`, `b`, `c`, `d`, `e`, `f`, `g`, `cid`) " % TABLE
sql += "VALUES (%d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d );" % (
question_data["id"],
question_data["question"],
question_data["options"],
question_data["imageurl"],
question_data["ta"],
question_data["bestanswer"],
question_data["type"],
question_data["a"],
question_data["b"],
question_data["c"],
question_data["d"],
question_data["e"],
question_data["f"],
question_data["g"],
question_data["cid"],
)
try:
# 执行sql语句
cursor.execute(sql)
# 提交到数据库执行
db.commit()
except:
continue
else:
continue
except:
print("index %d 存在错误", i)
continue
# 查询所有问题,解析,正确答案
# SELECT id, question, IF(answer = 1,a, IF(answer = 2,b,IF(answer = 3,c,d))) as answer,bestanswer,answer as answer_id from car WHERE a != "";

147
spider.py Normal file
View File

@ -0,0 +1,147 @@
import random
import time
import execjs
import pymysql
import requests
url = {
"sequence": "https://api2.jiakaobaodian.com/api/open/exercise/sequence.htm",
"question": "https://api2.jiakaobaodian.com/api/open/question/question-list.htm",
}
cityCode = 440500
TABLE = "jiaKaoBaoDian"
def calc_r():
ctx = execjs.compile("""
s = function (t) {
var a, i, o = Math.abs(parseInt((new Date).getTime() * Math.random() * 1e4)).toString(), n = 0;
for (a = 0; a < o.length; a++)
n += parseInt(o[a]);
return i = function(t) {
return function(a, i) {
return 0 >= i - "" + a.length ? a : (t[i] || (t[i] = Array(i + 1).join(0))) + a
}
}([]),
n += o.length,
n = i(n, 3 - n.toString().length),
t.toString() + o + n
}
function get_r() {
return s(1)
}
""")
return ctx.call("get_r")
def get_sequence():
_r = calc_r()
resp = requests.get(url["sequence"], params={
"_r": _r,
"carStyle": "xiaoche",
"carType": "car",
"cityCode": cityCode,
"course": "kemu1",
"kemuStyyle": "kemu1",
"_": 0.08272777821960653
})
if resp.status_code != 200:
print(resp.text)
raise BaseException("获取 题库 信息失败")
question_list = []
data = resp.json()["data"]
question_list = question_list + data
print("获取 题库信息 共计 %d 道题" % len(question_list))
return question_list
def get_question(questions):
retry_time = 5
for i in range(retry_time):
try:
_r = calc_r()
params = {
"_r": _r,
"carType": "car",
"course": "kemu1",
"_": 0.06614633144515003,
"questionIds": str(questions).lstrip("[").rstrip("]").replace(" ", "")
}
resp = requests.get(url["question"], params)
if resp.status_code != 200:
print(resp.text)
raise BaseException("获取 题目 信息失败")
data = resp.json()["data"]
print("获取 %s 道题目 信息成功" % len(data))
return data
except BaseException as e:
if i == (retry_time - 1):
print("%s 重试 %d" % (questions, (i + 1)))
raise e
else:
continue
if __name__ == '__main__':
db = pymysql.connect(host="10.10.10.200", port=3306, user="root", password="12341234", database="car")
cursor = db.cursor()
try:
_question_list = get_sequence()
_question_num_of_part = 20
_counter = 0
_throttling = 10
while len(_question_list) != 0:
_part_questions = _question_list[:_question_num_of_part]
_question_list = _question_list[_question_num_of_part:]
_data = get_question(_part_questions)
_counter += 1
if _counter == _throttling:
time.sleep(5 + (random.random() * 10))
for _datum in _data:
sql = "INSERT INTO %s (`answer`,`id`,`mediaContent`,`question`,`questionId`,`optionA`,`optionB`,`optionC`,`optionD`,`optionE`,`optionF`,`optionG`,`optionH`,`keywords`,`illiteracyExplain`,`conciseExplain`,`explain`,`knack`,`wrongRate`)" % TABLE
sql += "VALUES (%d, %d, '%s', '%s', %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', " \
"'%s', '%s', '%s', '%s', '%s', %f);" % (
_datum["answer"],
_datum["id"],
_datum["mediaContent"],
_datum["question"],
_datum["questionId"],
_datum["optionA"],
_datum["optionB"],
_datum["optionC"],
_datum["optionD"],
_datum["optionE"],
_datum["optionF"],
_datum["optionG"],
_datum["optionH"],
_datum["keywords"],
_datum["illiteracyExplain"],
_datum["conciseExplain"],
_datum["explain"],
_datum["knackDetail"],
_datum["wrongRate"],
)
try:
# 执行sql语句
cursor.execute(sql)
# 提交到数据库执行
db.commit()
except Exception as e:
continue
finally:
cursor.close()
db.close()

17
test.js Normal file
View File

@ -0,0 +1,17 @@
s = function (t) {
var a, i, o = Math.abs(parseInt((new Date).getTime() * Math.random() * 1e4)).toString(), n = 0;
for (a = 0; a < o.length; a++)
n += parseInt(o[a]);
return i = function (t) {
return function (a, i) {
return 0 >= i - "" + a.length ? a : (t[i] || (t[i] = Array(i + 1).join(0))) + a
}
}([]),
n += o.length,
n = i(n, 3 - n.toString().length),
t.toString() + o + n
}
function get_r() {
return s(1)
}

38
test.py Normal file
View File

@ -0,0 +1,38 @@
import random
import time
import execjs
if __name__ == '__main__':
a = []
for x in range(100):
a.append(x)
print(a)
x = 10
while len(a) != 0:
print(str(a[:x]).lstrip("[").rstrip("]"))
a = a[x:]
print("%d %f" % (1.1, 1.1))
ctx = execjs.compile("""
s = function (t) {
var a, i, o = Math.abs(parseInt((new Date).getTime() * Math.random() * 1e4)).toString(), n = 0;
for (a = 0; a < o.length; a++)
n += parseInt(o[a]);
return i = function(t) {
return function(a, i) {
return 0 >= i - "" + a.length ? a : (t[i] || (t[i] = Array(i + 1).join(0))) + a
}
}([]),
n += o.length,
n = i(n, 3 - n.toString().length),
t.toString() + o + n
}
function get_r() {
return s(1)
}
""")
print(ctx.call("get_r"))