PsycologyAPI/web_spider/base_spider.py

34 lines
982 B
Python
Raw Normal View History

2024-11-29 09:09:32 +00:00
# -*- coding: utf-8 -*-
# @Time : 2024/11/15 下午4:09
# @Author : 河瞬
# @FileName: base_spider.py
# @Software: PyCharm
from typing import List, Dict
class BaseSpider:
name: str
index_url: List[str]
timeout: int
use_proxy_default: bool
def __init__(self, proxy_url: str = None, timeout: int = 6):
self.use_proxy = proxy_url is not None
self.proxy_url = proxy_url
self.timeout = timeout
def set_proxy(self, proxy_url: str):
self.use_proxy = True
self.proxy_url = proxy_url
async def get_news_urls(self, page_cnt: int) -> List[Dict | None]:
"""
:param page_cnt:
:return: List[Dict] 字典必须包含title和url两个key
"""
raise NotImplementedError("Subclass method not implemented:get_news_urls")
async def get_news_content(self, urls: List[str]) -> List[str | None]:
raise NotImplementedError("Subclass method not implemented:get_news_content")