206 lines
5.7 KiB
Python
206 lines
5.7 KiB
Python
"""
|
|
This type stub file was generated by pyright.
|
|
"""
|
|
|
|
import dataclasses
|
|
import functools
|
|
import html
|
|
import json
|
|
import pickle
|
|
import re
|
|
from __future__ import annotations
|
|
from importlib import import_module
|
|
from os import path
|
|
from typing import Any, IO, TYPE_CHECKING
|
|
from docutils import nodes
|
|
from docutils.nodes import Element, Node
|
|
from sphinx import addnodes, package_dir
|
|
from sphinx.environment import BuildEnvironment
|
|
from sphinx.util.index_entries import split_index_msg
|
|
from collections.abc import Iterable
|
|
from sphinx.search.en import SearchEnglish
|
|
|
|
"""Create a full-text search index for offline search."""
|
|
if TYPE_CHECKING:
|
|
...
|
|
class SearchLanguage:
|
|
"""
|
|
This class is the base class for search natural language preprocessors. If
|
|
you want to add support for a new language, you should override the methods
|
|
of this class.
|
|
|
|
You should override `lang` class property too (e.g. 'en', 'fr' and so on).
|
|
|
|
.. attribute:: stopwords
|
|
|
|
This is a set of stop words of the target language. Default `stopwords`
|
|
is empty. This word is used for building index and embedded in JS.
|
|
|
|
.. attribute:: js_splitter_code
|
|
|
|
Return splitter function of JavaScript version. The function should be
|
|
named as ``splitQuery``. And it should take a string and return list of
|
|
strings.
|
|
|
|
.. versionadded:: 3.0
|
|
|
|
.. attribute:: js_stemmer_code
|
|
|
|
Return stemmer class of JavaScript version. This class' name should be
|
|
``Stemmer`` and this class must have ``stemWord`` method. This string is
|
|
embedded as-is in searchtools.js.
|
|
|
|
This class is used to preprocess search word which Sphinx HTML readers
|
|
type, before searching index. Default implementation does nothing.
|
|
"""
|
|
lang: str | None = ...
|
|
language_name: str | None = ...
|
|
stopwords: set[str] = ...
|
|
js_splitter_code: str = ...
|
|
js_stemmer_rawcode: str | None = ...
|
|
js_stemmer_code = ...
|
|
_word_re = ...
|
|
def __init__(self, options: dict) -> None:
|
|
...
|
|
|
|
def init(self, options: dict) -> None:
|
|
"""
|
|
Initialize the class with the options the user has given.
|
|
"""
|
|
...
|
|
|
|
def split(self, input: str) -> list[str]:
|
|
"""
|
|
This method splits a sentence into words. Default splitter splits input
|
|
at white spaces, which should be enough for most languages except CJK
|
|
languages.
|
|
"""
|
|
...
|
|
|
|
def stem(self, word: str) -> str:
|
|
"""
|
|
This method implements stemming algorithm of the Python version.
|
|
|
|
Default implementation does nothing. You should implement this if the
|
|
language has any stemming rules.
|
|
|
|
This class is used to preprocess search words before registering them in
|
|
the search index. The stemming of the Python version and the JS version
|
|
(given in the js_stemmer_code attribute) must be compatible.
|
|
"""
|
|
...
|
|
|
|
def word_filter(self, word: str) -> bool:
|
|
"""
|
|
Return true if the target word should be registered in the search index.
|
|
This method is called after stemming.
|
|
"""
|
|
...
|
|
|
|
|
|
|
|
def parse_stop_word(source: str) -> set[str]:
|
|
"""
|
|
Parse snowball style word list like this:
|
|
|
|
* http://snowball.tartarus.org/algorithms/finnish/stop.txt
|
|
"""
|
|
...
|
|
|
|
languages: dict[str, str | type[SearchLanguage]] = ...
|
|
class _JavaScriptIndex:
|
|
"""
|
|
The search index as JavaScript file that calls a function
|
|
on the documentation search object to register the index.
|
|
"""
|
|
PREFIX = ...
|
|
SUFFIX = ...
|
|
def dumps(self, data: Any) -> str:
|
|
...
|
|
|
|
def loads(self, s: str) -> Any:
|
|
...
|
|
|
|
def dump(self, data: Any, f: IO) -> None:
|
|
...
|
|
|
|
def load(self, f: IO) -> Any:
|
|
...
|
|
|
|
|
|
|
|
js_index = ...
|
|
@dataclasses.dataclass
|
|
class WordStore:
|
|
words: list[str] = ...
|
|
titles: list[tuple[str, str]] = ...
|
|
title_words: list[str] = ...
|
|
|
|
|
|
class WordCollector(nodes.NodeVisitor):
|
|
"""
|
|
A special visitor that collects words for the `IndexBuilder`.
|
|
"""
|
|
def __init__(self, document: nodes.document, lang: SearchLanguage) -> None:
|
|
...
|
|
|
|
def dispatch_visit(self, node: Node) -> None:
|
|
...
|
|
|
|
|
|
|
|
class IndexBuilder:
|
|
"""
|
|
Helper class that creates a search index based on the doctrees
|
|
passed to the `feed` method.
|
|
"""
|
|
formats = ...
|
|
def __init__(self, env: BuildEnvironment, lang: str, options: dict, scoring: str) -> None:
|
|
...
|
|
|
|
def load(self, stream: IO, format: Any) -> None:
|
|
"""Reconstruct from frozen data."""
|
|
...
|
|
|
|
def dump(self, stream: IO, format: Any) -> None:
|
|
"""Dump the frozen index to a stream."""
|
|
...
|
|
|
|
def get_objects(self, fn2index: dict[str, int]) -> dict[str, list[tuple[int, int, int, str, str]]]:
|
|
...
|
|
|
|
def get_terms(self, fn2index: dict) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
|
|
...
|
|
|
|
def freeze(self) -> dict[str, Any]:
|
|
"""Create a usable data structure for serializing."""
|
|
...
|
|
|
|
def label(self) -> str:
|
|
...
|
|
|
|
def prune(self, docnames: Iterable[str]) -> None:
|
|
"""Remove data for all docnames not in the list."""
|
|
...
|
|
|
|
def feed(self, docname: str, filename: str, title: str, doctree: nodes.document) -> None:
|
|
"""Feed a doctree to the index."""
|
|
...
|
|
|
|
def context_for_searchtool(self) -> dict[str, Any]:
|
|
...
|
|
|
|
def get_js_stemmer_rawcodes(self) -> list[str]:
|
|
"""Returns a list of non-minified stemmer JS files to copy."""
|
|
...
|
|
|
|
def get_js_stemmer_rawcode(self) -> str | None:
|
|
...
|
|
|
|
def get_js_stemmer_code(self) -> str:
|
|
"""Returns JS code that will be inserted into language_data.js."""
|
|
...
|
|
|
|
|
|
|