Coverage for src\scrape\base_scraper.py: 100%
49 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-10-19 21:13 +0800
« prev ^ index » next coverage.py v7.10.1, created at 2025-10-19 21:13 +0800
1from bs4 import BeautifulSoup
2from fastapi import Request
3from requests import Session
4from requests.exceptions import RequestException
5from src.utility.endpoints import Endpoint
6from src.utility.lib import CustomException, Logger
7from src.utility.utils import EndpointType, Utils, ViewType
8from typing import Dict, Type, TypeVar
9from urllib.parse import urlencode
11T = TypeVar("T", bound="BaseScraper")
14class BaseScraper:
15 def __init__(self, soup: BeautifulSoup, params: Dict, view: ViewType) -> None:
16 self.soup = soup
17 self.params = params
18 self.view = view
20 @classmethod
21 def scrape(cls: Type[T], endpoint: Endpoint, req: Request) -> T | None:
22 endpoint = endpoint.value
24 if endpoint.type == EndpointType.QUERY:
25 params = req.query_params
26 url = Utils.create_filmarks_link(endpoint.path + "?" + urlencode(params))
28 elif endpoint.type == EndpointType.PATH:
29 params = req.path_params
30 url = Utils.create_filmarks_link(endpoint.path.format(**params))
32 elif endpoint.type == EndpointType.COMBINED:
33 params = {**req.query_params, **req.path_params}
34 url = Utils.create_filmarks_link(endpoint.path.format(**req.path_params) + "?" + urlencode(req.query_params))
36 else:
37 raise ValueError(f"Unexpected EndpointType: {endpoint.type}") # pragma: no cover
39 try:
40 with Session() as session:
41 resp = session.get(url=url, headers=Utils.FILMARKS_REQUEST_HEADERS)
42 soup = BeautifulSoup(resp.text, "lxml")
44 cls._raise_if_page_service_unavailable(soup)
45 cls._raise_if_page_not_found(soup)
47 return cls(soup, params, endpoint.view)
49 except RequestException as e:
50 Logger.err(f"Request to Filmarks failed: '{e}'")
51 raise CustomException.service_unavailable()
53 @staticmethod
54 def _raise_if_page_service_unavailable(soup: BeautifulSoup) -> None:
55 status = soup.select_one("p.main__text")
57 if status and status.text.strip().startswith("一時的にアクセスできない状態です。"):
58 Logger.err("Filmarks is temporarily unavailable")
59 raise CustomException.service_unavailable()
61 @staticmethod
62 def _raise_if_page_not_found(soup: BeautifulSoup) -> None:
63 status = soup.select_one("p.main__status-ja")
65 if status and status.text.strip() == "お探しのページは見つかりません。":
66 Logger.err("Invalid Filmarks page requested")
67 raise CustomException.not_found()