爬虫搞定,100篇文章有了

This commit is contained in:
高子兴 2024-11-29 16:09:42 +08:00
commit 37bab39059
15 changed files with 218 additions and 0 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

8
.idea/PsycologyAPI.iml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -0,0 +1,54 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<Languages>
<language minSize="110" name="Python" />
</Languages>
</inspection_tool>
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="7">
<item index="0" class="java.lang.String" itemvalue="alibabacloud_tea_console" />
<item index="1" class="java.lang.String" itemvalue="alibabacloud_tea_util" />
<item index="2" class="java.lang.String" itemvalue="alibabacloud_alidns20150109" />
<item index="3" class="java.lang.String" itemvalue="alibabacloud_tea_openapi" />
<item index="4" class="java.lang.String" itemvalue="gradio" />
<item index="5" class="java.lang.String" itemvalue="torch" />
<item index="6" class="java.lang.String" itemvalue="jose" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E722" />
<option value="E501" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N806" />
<option value="N802" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="list.*" />
</list>
</option>
</inspection_tool>
<inspection_tool class="SpellCheckingInspection" enabled="true" level="INFORMATION" enabled_by_default="true">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

32
.idea/misc.xml Normal file
View File

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="demo1" />
</component>
<component name="ProjectInspectionProfilesVisibleTreeState">
<entry key="Project Default">
<profile-state>
<expanded-state>
<State>
<id>EditorConfig</id>
</State>
<State>
<id>GitHub 操作</id>
</State>
<State>
<id>正则表达式</id>
</State>
<State>
<id>版本控制</id>
</State>
</expanded-state>
<selected-state>
<State>
<id>用户定义</id>
</State>
</selected-state>
</profile-state>
</entry>
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="demo1" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/PsycologyAPI.iml" filepath="$PROJECT_DIR$/.idea/PsycologyAPI.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

BIN
db.sqlite3 Normal file

Binary file not shown.

13
main.py Normal file
View File

@ -0,0 +1,13 @@
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
async def root():
return {"message": "Hello World"}
@app.get("/hello/{name}")
async def say_hello(name: str):
return {"message": f"Hello {name}"}

10
models/__init__.py Normal file
View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# @Time : 2024/11/13 下午3:10
# @Author : 河瞬
# @FileName: __init__.py.py
# @Software: PyCharm
from .source import Source
from .category import Category
from .raw_data import RawData
from .processed_data import ProcessedData

16
models/category.py Normal file
View File

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# @Time : 2024/11/14 下午4:04
# @Author : 河瞬
# @FileName: category.py
# @Software: PyCharm
from tortoise.models import Model
from tortoise import fields
class Category(Model):
id = fields.IntField(primary_key=True)
name = fields.TextField()
description = fields.TextField()
# 这个类别是有分层类似树状结构的所以需要一个field来表示父类和子类
parent = fields.ForeignKeyField("models.Category", related_name="children", null=True)
children = fields.ReverseRelation["Category"]

15
models/processed_data.py Normal file
View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
# @Time : 2024/11/14 下午4:02
# @Author : 河瞬
# @FileName: processed_data.py
# @Software: PyCharm
from tortoise.models import Model
from tortoise import fields
class ProcessedData(Model):
id = fields.IntField(primary_key=True)
raw_data = fields.ForeignKeyField("models.RawData", related_name="processed_data")
category = fields.ForeignKeyField("models.Category", related_name="processed_data")
summary = fields.TextField()
processed_at = fields.DatetimeField(auto_now_add=True)

18
models/raw_data.py Normal file
View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
# @Time : 2024/11/13 下午3:38
# @Author : 河瞬
# @FileName: raw_data.py
# @Software: PyCharm
from tortoise.models import Model
from tortoise import fields
class RawData(Model):
id = fields.IntField(primary_key=True)
url = fields.TextField()
title = fields.TextField()
content = fields.TextField(null=True)
source = fields.ForeignKeyField("models.Source", related_name="raw_data")
detected_at = fields.DatetimeField(auto_now_add=True)
fetched_at = fields.DatetimeField(auto_now=True)
is_processed = fields.BooleanField(default=False)

13
models/source.py Normal file
View File

@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
# @Time : 2024/11/14 下午3:54
# @Author : 河瞬
# @FileName: source.py
# @Software: PyCharm
from tortoise.models import Model
from tortoise import fields
class Source(Model):
id = fields.IntField(primary_key=True)
name = fields.TextField()
index_url = fields.TextField()

11
test_main.http Normal file
View File

@ -0,0 +1,11 @@
# Test your FastAPI endpoints
GET http://127.0.0.1:8000/
Accept: application/json
###
GET http://127.0.0.1:8000/hello/User
Accept: application/json
###