Source code for mediawiki.mediawiki
"""
MediaWiki class module
"""
# MIT License
# Author: Tyler Barrus (barrust@gmail.com)
import time
from datetime import datetime, timedelta
from decimal import Decimal, DecimalException
from json import JSONDecodeError
from typing import Any, Dict, List, Optional, Tuple, Union
import requests
import requests.exceptions as rex
from mediawiki.configuraton import VERSION, Configuration
from mediawiki.exceptions import (
HTTPTimeoutError,
MediaWikiAPIURLError,
MediaWikiCategoryTreeError,
MediaWikiException,
MediaWikiGeoCoordError,
MediaWikiLoginError,
PageError,
)
from mediawiki.mediawikipage import MediaWikiPage
from mediawiki.utilities import memoize
[docs]
class MediaWiki:
"""MediaWiki API Wrapper Instance
Args:
url (str): API URL of the MediaWiki site; defaults to Wikipedia
lang (str): Language of the MediaWiki site; used to help change API URL
timeout (float): HTTP timeout setting; None means no timeout
rate_limit (bool): Use rate limiting to limit calls to the site
rate_limit_wait (timedelta): Amount of time to wait between requests
cat_prefix (str): The prefix for categories used by the mediawiki site; defaults to Category (en)
user_agent (str): The user agent string to use when making requests; defaults to a library version \
but per the MediaWiki API documentation it recommends setting a unique one and not using the library's \
default user-agent string
username (str): The username to use to log into the MediaWiki
password (str): The password to use to log into the MediaWiki
proxies (str): A dictionary of specific proxies to use in the Requests libary
verify_ssl (bool|str): Verify SSL Certificates to be passed directly into the Requests library"""
__slots__ = [
"_version",
"_config",
"_session",
"_extensions",
"_api_version",
"_api_version_str",
"_base_url",
"__supported_languages",
"__available_languages",
"_is_logged_in",
"_cache",
]
def __init__(
self,
url: str = "https://{lang}.wikipedia.org/w/api.php",
lang: str = "en",
timeout: float = 15.0,
rate_limit: bool = False,
rate_limit_wait: timedelta = timedelta(milliseconds=50),
cat_prefix: str = "Category",
user_agent: Optional[str] = None,
username: Optional[str] = None,
password: Optional[str] = None,
proxies: Optional[Dict] = None,
verify_ssl: Union[bool, str] = True,
):
"""Init Function"""
self._version = VERSION
url.format(lang=lang.lower())
self._config = Configuration(
lang=lang,
api_url=url.format(lang=lang.lower()),
category_prefix=cat_prefix,
timeout=timeout,
user_agent=user_agent,
proxies=proxies,
verify_ssl=verify_ssl,
rate_limit=rate_limit,
rate_limit_wait=rate_limit_wait,
username=username,
password=password,
refresh_interval=None,
use_cache=True,
)
# requests library parameters
self._session: requests.Session = requests.Session()
# reset libary parameters
self._extensions = None
self._api_version = None
self._api_version_str = None
self._base_url = None
self.__supported_languages: Optional[Dict[str, str]] = None
self.__available_languages: Optional[Dict[str, bool]] = None
# for memoized results
self._cache: Dict = {}
self._reset_session()
# for login information
self._is_logged_in = False
if self._config.username is not None and self._config.password is not None:
self.login(self._config.username, self._config.password)
try:
self._get_site_info()
except MediaWikiException as exc:
raise MediaWikiAPIURLError(self._config.api_url) from exc
# non-settable properties
@property
def version(self) -> str:
"""str: The version of the pymediawiki library
Note:
Not settable"""
return self._version
@property
def api_version(self) -> Optional[str]:
"""str: API Version of the MediaWiki site
Note:
Not settable"""
return self._api_version_str
@property
def base_url(self) -> str:
"""str: Base URL for the MediaWiki site
Note:
Not settable"""
return self._base_url if self._base_url else ""
@property
def extensions(self) -> List[str]:
"""list: Extensions installed on the MediaWiki site
Note:
Not settable"""
return self._extensions if self._extensions else []
# settable properties
@property
def rate_limit(self) -> bool:
"""bool: Turn on or off Rate Limiting"""
return self._config.rate_limit
@rate_limit.setter
def rate_limit(self, rate_limit: bool):
"""Turn on or off rate limiting"""
self._config.rate_limit = rate_limit
if self._config._clear_memoized:
self.clear_memoized()
@property
def proxies(self) -> Optional[Dict]:
"""dict: Turn on, off, or set proxy use with the Requests library"""
return self._config.proxies
@proxies.setter
def proxies(self, proxies: Optional[Dict]):
"""Turn on, off, or set proxy use through the Requests library"""
self._config.proxies = proxies
if self._config._reset_session:
self._reset_session()
@property
def use_cache(self) -> bool:
"""bool: Whether caching should be used; on (**True**) or off (**False**)"""
return self._config.use_cache
@use_cache.setter
def use_cache(self, use_cache: bool):
"""toggle using the cache or not"""
self._config.use_cache = use_cache
@property
def rate_limit_min_wait(self) -> timedelta:
"""timedelta: Time to wait between calls
Note:
Only used if rate_limit is **True**"""
return self._config.rate_limit_min_wait
@rate_limit_min_wait.setter
def rate_limit_min_wait(self, min_wait: timedelta):
"""Set minimum wait to use for rate limiting"""
self._config.rate_limit_min_wait = min_wait
@property
def timeout(self) -> Optional[float]:
"""float: Response timeout for API requests
Note:
Use **None** for no response timeout"""
return self._config.timeout
@timeout.setter
def timeout(self, timeout: Optional[float]):
"""Set request timeout in seconds (or fractions of a second)"""
self._config.timeout = timeout
@property
def verify_ssl(self) -> Union[bool, str]:
"""bool | str: Verify SSL when using requests or path to cert file"""
return self._config.verify_ssl
@verify_ssl.setter
def verify_ssl(self, verify_ssl: Union[bool, str]):
"""Set request verify SSL parameter; defaults to True if issue"""
self._config.verify_ssl = verify_ssl
if self._config._reset_session:
self._reset_session()
@property
def language(self) -> str:
"""str: The API URL language, if possible this will update the API URL
Note:
Use correct language titles with the updated API URL
Note:
Some API URLs do not encode language; unable to update if this is the case"""
return self._config.lang
@language.setter
def language(self, lang: str):
"""Set the language to use; attempts to change the API URL"""
self._config.lang = lang
if self._config._clear_memoized:
self.clear_memoized()
@property
def category_prefix(self) -> str:
"""str: The category prefix to use when using category based functions
Note:
Use the correct category name for the language selected"""
return self._config.category_prefix
@category_prefix.setter
def category_prefix(self, prefix: str):
"""Set the category prefix correctly"""
self._config.category_prefix = prefix
@property
def user_agent(self) -> str:
"""str: User agent string
Note: If using in as part of another project, this should be changed"""
return self._config.user_agent
@user_agent.setter
def user_agent(self, user_agent: str):
"""Set the new user agent string
Note: Will need to re-log into the MediaWiki if user agent string is changed"""
self._config.user_agent = user_agent
if self._config._reset_session:
self._reset_session()
@property
def api_url(self) -> str:
"""str: API URL of the MediaWiki site
Note:
Not settable; See :py:func:`mediawiki.MediaWiki.set_api_url`"""
return self._config.api_url
@property
def memoized(self) -> Dict[Any, Any]:
"""dict: Return the memoize cache
Note:
Not settable; see
:py:func:`mediawiki.MediaWiki.clear_memoized`"""
return self._cache
@property
def refresh_interval(self) -> Optional[int]:
"""int: The interval at which the memoize cache is to be refresh"""
return self._config.refresh_interval
@refresh_interval.setter
def refresh_interval(self, refresh_interval: int):
"""Set the new cache refresh interval"""
self._config.refresh_interval = refresh_interval
[docs]
def login(self, username: str, password: str, strict: bool = True) -> bool:
"""Login as specified user
Args:
username (str): The username to log in with
password (str): The password for the user
strict (bool): `True` to throw an error on failure
Returns:
bool: `True` if successfully logged in; `False` otherwise
Raises:
:py:func:`mediawiki.exceptions.MediaWikiLoginError`: if unable to login
Note:
Per the MediaWiki API, one should use the `bot password`; \
see https://www.mediawiki.org/wiki/API:Login for more information
"""
# get login token
params = {
"action": "query",
"meta": "tokens",
"type": "login",
"format": "json",
}
token_res = self._get_response(params)
if "query" in token_res and "tokens" in token_res["query"]:
token = token_res["query"]["tokens"]["logintoken"]
params = {
"action": "login",
"lgname": username,
"lgpassword": password,
"lgtoken": token,
"format": "json",
}
res = self._post_response(params)
if res["login"]["result"] == "Success":
self._is_logged_in = True
self._config._login = False
return True
self._is_logged_in = False
reason = res["login"]["reason"]
if strict:
raise MediaWikiLoginError(f"MediaWiki login failure: {reason}")
return False
# non-properties
[docs]
def set_api_url(
self,
api_url: str = "https://{lang}.wikipedia.org/w/api.php",
lang: str = "en",
username: Optional[str] = None,
password: Optional[str] = None,
):
"""Set the API URL and language
Args:
api_url (str): API URL to use
lang (str): Language of the API URL
username (str): The username, if needed, to log into the MediaWiki site
password (str): The password, if needed, to log into the MediaWiki site
Raises:
:py:func:`mediawiki.exceptions.MediaWikiAPIURLError`: if the \
url is not a valid MediaWiki site or login fails
"""
old_api_url = self._config.api_url
old_lang = self._config.lang
self._config.lang = lang.lower()
self._config.api_url = api_url.format(lang=self._config.lang)
self._config.username = username
self._config.password = password
self._is_logged_in = False
try:
if self._config.username is not None and self._config.password is not None:
self.login(self._config.username, self._config.password)
self._get_site_info()
self.__supported_languages = None # reset this
self.__available_languages = None # reset this
except (rex.ConnectTimeout, MediaWikiException) as exc:
# reset api url and lang in the event that the exception was caught
self._config.api_url = old_api_url
self._config.lang = old_lang
raise MediaWikiAPIURLError(api_url) from exc
self.clear_memoized()
def _reset_session(self):
"""Set session information"""
if self._session:
self._session.close()
headers = {"User-Agent": self._config.user_agent}
self._session = requests.Session()
self._session.headers.update(headers)
if self._config.proxies is not None:
self._session.proxies.update(self._config.proxies)
self._session.verify = self._config.verify_ssl
self._is_logged_in = False
self._config._reset_session = False
[docs]
def clear_memoized(self):
"""Clear memoized (cached) values"""
if hasattr(self, "_cache"):
self._cache.clear()
self._config._clear_memoized = False
# non-setup functions
@property
def supported_languages(self) -> Dict[str, str]:
"""dict: All supported language prefixes on the MediaWiki site
Note:
Not Settable"""
if self.__supported_languages is None:
res = self.wiki_request({"meta": "siteinfo", "siprop": "languages"})
tmp = res["query"]["languages"]
supported = {lang["code"]: lang["*"] for lang in tmp}
self.__supported_languages = supported
return self.__supported_languages
@property
def available_languages(self) -> Dict[str, bool]:
"""dict: All available language prefixes on the MediaWiki site
Note:
Not Settable"""
if self.__available_languages is None:
available = {}
for lang in self.supported_languages:
try:
MediaWiki(lang=lang)
available[lang] = True
except (rex.ConnectionError, rex.ConnectTimeout, MediaWikiException, MediaWikiAPIURLError):
available[lang] = False
self.__available_languages = available
return self.__available_languages
@property
def logged_in(self) -> bool:
"""bool: Returns if logged into the MediaWiki site"""
return self._is_logged_in
[docs]
def random(self, pages: int = 1) -> Union[str, List[str]]:
"""Request a random page title or list of random titles
Args:
pages (int): Number of random pages to return
Returns:
list or int: A list of random page titles or a random page title if pages = 1"""
if pages is None or pages < 1:
raise ValueError("Number of pages must be greater than 0")
query_params = {"list": "random", "rnnamespace": 0, "rnlimit": pages}
request = self.wiki_request(query_params)
titles = [page["title"] for page in request["query"]["random"]]
return titles[0] if len(titles) == 1 else titles
[docs]
@memoize
def allpages(self, query: str = "", results: int = 10) -> List[str]:
"""Request all pages from mediawiki instance
Args:
query (str): Search string to use for pulling pages
results (int): The number of pages to return
Returns:
list: The pages that meet the search query
Note:
Could add ability to continue past the limit of 500
"""
max_pull = 500
limit = min(results, max_pull) if results is not None else max_pull
query_params = {"list": "allpages", "aplimit": limit, "apfrom": query}
request = self.wiki_request(query_params)
self._check_error_response(request, query)
return [page["title"] for page in request["query"]["allpages"]]
[docs]
@memoize
def search(
self, query: str, results: int = 10, suggestion: bool = False
) -> Union[List[str], Tuple[List[str], Optional[str]]]:
"""Search for similar titles
Args:
query (str): Page title
results (int): Number of pages to return
suggestion (bool): Use suggestion
Returns:
tuple or list: tuple (list results, suggestion) if suggestion is **True**; list of results otherwise
Note:
Could add ability to continue past the limit of 500
"""
self._check_query(query, "Query must be specified")
max_pull = 500
search_params = {
"list": "search",
"srprop": "",
"srlimit": min(results, max_pull) if results is not None else max_pull,
"srsearch": query,
"sroffset": 0, # this is what will be used to pull more than the max
}
if suggestion:
search_params["srinfo"] = "suggestion"
raw_results = self.wiki_request(search_params)
self._check_error_response(raw_results, query)
search_results = [d["title"] for d in raw_results["query"]["search"]]
if suggestion:
sug = raw_results["query"]["searchinfo"]["suggestion"] if raw_results["query"].get("searchinfo") else None
return search_results, sug
return search_results
[docs]
@memoize
def suggest(self, query: str) -> Optional[str]:
"""Gather suggestions based on the provided title or None if no
suggestions found
Args:
query (str): Page title
Returns:
String or None: Suggested page title or **None** if no suggestion found
"""
res, suggest = self.search(query, results=1, suggestion=True)
try:
title = res[0] or suggest
except IndexError: # page doesn't exist
title = None
return title
[docs]
@memoize
def geosearch(
self,
latitude: Union[Decimal, float, None] = None,
longitude: Union[Decimal, float, None] = None,
radius: int = 1000,
title: Optional[str] = None,
auto_suggest: bool = True,
results: int = 10,
) -> List[str]:
"""Search for pages that relate to the provided geocoords or near
the page
Args:
latitude (Decimal or None): Latitude geocoord; must be coercible to decimal
longitude (Decimal or None): Longitude geocoord; must be coercible to decimal
radius (int): Radius around page or geocoords to pull back; in meters
title (str): Page title to use as a geocoordinate; this has precedence over lat/long
auto_suggest (bool): Auto-suggest the page title
results (int): Number of pages within the radius to return
Returns:
list: A listing of page titles
Note:
The Geosearch API does not support pulling more than the maximum of 500
Note:
If the page doesn't match the provided title, try setting auto_suggest to `False`
Raises:
ValueError: If either the passed latitude or longitude are not coercible to a Decimal
"""
def test_lat_long(val):
"""handle testing lat and long"""
if not isinstance(val, Decimal):
error = (
"Latitude and Longitude must be specified either as "
"a Decimal or in formats that can be coerced into "
"a Decimal."
)
try:
return Decimal(val)
except (DecimalException, TypeError) as exc:
raise ValueError(error) from exc
return val
# end local function
max_pull = 500
limit = min(results, max_pull) if results is not None else max_pull
params = {"list": "geosearch", "gsradius": radius, "gslimit": limit}
if title is not None:
if auto_suggest:
title = self.suggest(title)
params["gspage"] = title
else:
lat = test_lat_long(latitude)
lon = test_lat_long(longitude)
params["gscoord"] = f"{lat}|{lon}"
raw_results = self.wiki_request(params)
self._check_error_response(raw_results, title if title else "Page Title Not Provided")
return [d["title"] for d in raw_results["query"]["geosearch"]]
[docs]
@memoize
def opensearch(self, query: str, results: int = 10, redirect: bool = True) -> List[Tuple[str, str, str]]:
"""Execute a MediaWiki opensearch request, similar to search box
suggestions and conforming to the OpenSearch specification
Args:
query (str): Title to search for
results (int): Number of pages within the radius to return
redirect (bool): If **False** return the redirect itself, otherwise resolve redirects
Returns:
List: List of results that are stored in a tuple (Title, Summary, URL)
Note:
The Opensearch API does not support pulling more than the maximum of 500
Raises:
"""
self._check_query(query, "Query must be specified")
max_pull = 500
query_params = {
"action": "opensearch",
"search": query,
"limit": (min(results, max_pull) if results is not None else max_pull),
"redirects": ("resolve" if redirect else "return"),
"warningsaserror": True,
"namespace": "",
}
out = self.wiki_request(query_params)
self._check_error_response(out, query)
return [(item, out[2][i], out[3][i]) for i, item in enumerate(out[1])]
[docs]
@memoize
def prefixsearch(self, prefix: str, results: int = 10) -> List[str]:
""" Perform a prefix search using the provided prefix string
Args:
prefix (str): Prefix string to use for search
results (int): Number of pages with the prefix to return
Returns:
list: List of page titles
Note:
**Per the documentation:** "The purpose of this module is \
similar to action=opensearch: to take user input and provide \
the best-matching titles. Depending on the search engine \
backend, this might include typo correction, redirect \
avoidance, or other heuristics."
Note:
Could add ability to continue past the limit of 500
"""
self._check_query(prefix, "Prefix must be specified")
query_params = {
"list": "prefixsearch",
"pssearch": prefix,
"pslimit": ("max" if (results > 500 or results is None) else results),
"psnamespace": 0,
"psoffset": 0, # parameterize to skip to later in the list?
}
raw_results = self.wiki_request(query_params)
self._check_error_response(raw_results, prefix)
return [rec["title"] for rec in raw_results["query"]["prefixsearch"]]
[docs]
@memoize
def summary(self, title: str, sentences: int = 0, chars: int = 0, auto_suggest: bool = True, redirect: bool = True):
""" Get the summary for the title in question
Args:
title (str): Page title to summarize
sentences (int): Number of sentences to return in summary
chars (int): Number of characters to return in summary
auto_suggest (bool): Run auto-suggest on title before summarizing
redirect (bool): Use page redirect on title before summarizing
Returns:
str: The summarized results of the page
Note:
Precedence for parameters: sentences then chars; if both are \
0 then the entire first section is returned
Note:
If the page doesn't match the provided title, try setting auto_suggest to `False`"""
page_info = self.page(title, auto_suggest=auto_suggest, redirect=redirect)
return page_info.summarize(sentences, chars)
[docs]
@memoize
def categorymembers(
self, category: str, results: int = 10, subcategories: bool = True
) -> Union[List[str], Tuple[List[str], List[str]]]:
"""Get information about a category: pages and subcategories
Args:
category (str): Category name
results (int): Number of result
subcategories (bool): Include subcategories (**True**) or not (**False**)
Returns:
Tuple or List: Either a tuple ([pages], [subcategories]) or just the list of pages
Note:
Set results to **None** to get all results"""
self._check_query(category, "Category must be specified")
max_pull = 500
search_params = {
"list": "categorymembers",
"cmprop": "ids|title|type",
"cmtype": ("page|subcat|file" if subcategories else "page|file"),
"cmlimit": (min(results, max_pull) if results is not None else max_pull),
"cmtitle": f"{self.category_prefix}:{category}",
}
pages = []
subcats = []
returned_results = 0
finished = False
last_cont: Dict = {}
while not finished:
params = search_params.copy()
params.update(last_cont)
raw_res = self.wiki_request(params)
self._check_error_response(raw_res, category)
current_pull = len(raw_res["query"]["categorymembers"])
for rec in raw_res["query"]["categorymembers"]:
if rec["type"] in ("page", "file"):
pages.append(rec["title"])
elif rec["type"] == "subcat":
tmp = rec["title"]
if tmp.startswith(self.category_prefix):
tmp = tmp[len(self.category_prefix) + 1 :]
subcats.append(tmp)
cont = raw_res.get("query-continue", False)
if cont and "categorymembers" in cont:
cont = cont["categorymembers"]
else:
cont = raw_res.get("continue", False)
if cont is False or last_cont == cont:
break
returned_results += current_pull
if results is None or (results - returned_results > 0):
last_cont = cont
else:
finished = True
if results is not None and results - returned_results < max_pull:
search_params["cmlimit"] = results - returned_results
# end while loop
return (pages, subcats) if subcategories else pages
[docs]
def categorytree(self, category: str, depth: int = 5) -> Dict[str, Any]:
"""Generate the Category Tree for the given categories
Args:
category(str or list of strings): Category name(s)
depth(int): Depth to traverse the tree
Returns:
dict: Category tree structure
Note:
Set depth to **None** to get the whole tree
Note:
Return Data Structure: Subcategory contains the same recursive structure
>>> {
'category': {
'depth': Number,
'links': list,
'parent-categories': list,
'sub-categories': dict
}
}
.. versionadded:: 0.3.10"""
# make it simple to use both a list or a single category term
cats = [category] if not isinstance(category, list) else category
self.__category_parameter_verification(cats, depth, category)
results: Dict = {}
categories: Dict = {}
links: Dict = {}
for cat in [x for x in cats if x]:
self.__cat_tree_rec(cat, depth, results, 0, categories, links)
return results
[docs]
def page(self, title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
"""Get MediaWiki page based on the provided title or pageid
Args:
title (str): Page title
pageid (int): MediaWiki page identifier
auto-suggest (bool): **True:** Allow page title auto-suggest
redirect (bool): **True:** Follow page redirects
preload (bool): **True:** Load most page properties
Raises:
ValueError: when title is blank or None and no pageid is provided
Raises:
:py:func:`mediawiki.exceptions.PageError`: if page does not exist
Note:
Title takes precedence over pageid if both are provided
Note:
If the page doesn't match the provided title, try setting auto_suggest to `False`"""
if (title is None or title.strip() == "") and pageid is None:
raise ValueError("Either a title or a pageid must be specified")
if title:
if auto_suggest:
temp_title = self.suggest(title)
if temp_title is None: # page doesn't exist
raise PageError(title=title)
title = temp_title
return MediaWikiPage(self, title, redirect=redirect, preload=preload)
return MediaWikiPage(self, pageid=pageid, preload=preload)
[docs]
def wiki_request(self, params: Dict[str, Any]) -> Dict[Any, Any]:
""" Make a request to the MediaWiki API using the given search
parameters
Args:
params (dict): Request parameters
Returns:
A parsed dict of the JSON response
Note:
Useful when wanting to query the MediaWiki site for some \
value that is not part of the wrapper API """
params["format"] = "json"
if "action" not in params:
params["action"] = "query"
limit = self._config.rate_limit
last_call = self._config._rate_limit_last_call
if limit and last_call and last_call + self._config.rate_limit_min_wait > datetime.now():
# call time to quick for rate limited api requests, wait
wait_time = (last_call + self._config.rate_limit_min_wait) - datetime.now()
time.sleep(wait_time.total_seconds())
req = self._get_response(params)
if self._config.rate_limit:
self._config._rate_limit_last_call = datetime.now()
return req
# Protected functions
def _get_site_info(self):
"""Parse out the Wikimedia site information including API Version and Extensions"""
response = self.wiki_request({"meta": "siteinfo", "siprop": "extensions|general"})
# parse what we need out here!
query = response.get("query", None)
if query is None or query.get("general", None) is None:
raise MediaWikiException("Missing query in response")
gen = query.get("general", None)
api_version = gen["generator"].split(" ")[1].split("-")[0]
major_minor = [int(i) for i in api_version.split(".")]
self._api_version = tuple(major_minor)
self._api_version_str = ".".join([str(x) for x in self._api_version])
# parse the base url out
tmp = gen.get("server", "")
if tmp == "":
raise MediaWikiException("Unable to parse base url")
if tmp.startswith("http://") or tmp.startswith("https://"):
self._base_url = tmp
elif gen["base"].startswith("https:"):
self._base_url = f"https:{tmp}"
else:
self._base_url = f"http:{tmp}"
self._extensions = [ext["name"] for ext in query["extensions"]]
self._extensions = sorted(list(set(self._extensions)))
# end _get_site_info
@staticmethod
def _check_error_response(response, query: str):
"""check for default error messages and throw correct exception"""
if "error" in response:
http_error = ["HTTP request timed out.", "Pool queue is full"]
geo_error = [
"Page coordinates unknown.",
"One of the parameters gscoord, gspage, gsbbox is required",
"Invalid coordinate provided",
]
err = response["error"]["info"]
if err in http_error:
raise HTTPTimeoutError(query)
if err in geo_error:
raise MediaWikiGeoCoordError(err)
raise MediaWikiException(err)
@staticmethod
def _check_query(value, message: str):
"""check if the query is 'valid'"""
if value is None or value.strip() == "":
raise ValueError(message)
@staticmethod
def __category_parameter_verification(cats, depth, category):
# parameter verification
if len(cats) == 1 and (cats[0] is None or cats[0] == ""):
msg = (
"CategoryTree: Parameter 'category' must either "
"be a list of one or more categories or a string; "
f"provided: '{category}'"
)
raise ValueError(msg)
if depth is not None and depth < 1:
msg = "CategoryTree: Parameter 'depth' must be either None (for the full tree) or be greater than 0"
raise ValueError(msg)
def __cat_tree_rec(
self, cat: str, depth: int, tree: Dict[str, Any], level: int, categories: Dict[str, Any], links: Dict[str, Any]
):
"""recursive function to build out the tree"""
tree[cat] = {}
tree[cat]["depth"] = level
tree[cat]["sub-categories"] = {}
tree[cat]["links"] = []
tree[cat]["parent-categories"] = []
parent_cats = []
if cat not in categories:
tries = 0
while True:
if tries > 10:
raise MediaWikiCategoryTreeError(cat)
try:
pag = self.page(f"{self.category_prefix}:{cat}")
categories[cat] = pag
parent_cats = categories[cat].categories
links[cat] = self.categorymembers(cat, results=None, subcategories=True)
break
except PageError as exc:
raise PageError(f"{self.category_prefix}:{cat}") from exc
except KeyboardInterrupt as exc:
raise exc
except Exception:
tries = tries + 1
# TODO: Should this really sleep?
time.sleep(1)
else:
parent_cats = categories[cat].categories
tree[cat]["parent-categories"].extend(parent_cats)
tree[cat]["links"].extend(links[cat][0])
if depth and level >= depth:
for ctg in links[cat][1]:
tree[cat]["sub-categories"][ctg] = None
else:
for ctg in links[cat][1]:
self.__cat_tree_rec(
ctg,
depth,
tree[cat]["sub-categories"],
level + 1,
categories,
links,
)
def _get_response(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""wrap the call to the requests package"""
try:
return self._session.get(self._config.api_url, params=params, timeout=self._config.timeout).json()
except JSONDecodeError:
return {}
def _post_response(self, params: Dict[str, Any]) -> Dict[str, Any]:
"""wrap a post call to the requests package"""
try:
return self._session.post(self._config.api_url, data=params, timeout=self._config.timeout).json()
except JSONDecodeError:
return {}
# end MediaWiki class