Updating documentation

This commit is contained in:
Ad5001 2023-11-03 01:33:56 +01:00
parent 014d7ecda0
commit 11c62b090b
Signed by: Ad5001
GPG key ID: EF45F9C6AFE20160
7 changed files with 155 additions and 53 deletions

1
.gitignore vendored
View file

@ -1,5 +1,6 @@
*/*.pyc */*.pyc
*/__pycache__ */__pycache__
html
.idea .idea
.coverage .coverage
.pytest_cache .pytest_cache

View file

@ -1,3 +1,29 @@
# pybergamot # pybergamot
(Somewhat) stable interface for the **Bergamot Translation Engine Python Bindings**. (Somewhat) stable interface for the **Bergamot Translation Engine Python Bindings**.
## Generate documentation
Documentation for `pybergamot` can be generated using [pdoc](https://pdoc.dev).
Use the script `build-doc.sh` to generate the documentation directly.
## Legal
pybergamot - (Somewhat) stable interface for the **Bergamot Translation Engine Python Bindings**.
Copyright (C) 2023 Ad5001 <mail@ad5001.eu>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
This project is not affiliated to the Bergamot Project or Mozilla.

6
build-doc.sh Executable file
View file

@ -0,0 +1,6 @@
#!/usr/bin/env bash
python3 -m pdoc --force --html pybergamot
cd html/pybergamot || exit
python3 -m http.server 8080

View file

@ -1 +1,6 @@
"""
Welcome to pybergamot!
"""
from .translator import Translator from .translator import Translator

View file

@ -46,18 +46,27 @@ class Engine(ABC):
def translate(self, text: str, html: bool = False, alignment: bool = False, quality_scores: bool = False) -> str: def translate(self, text: str, html: bool = False, alignment: bool = False, quality_scores: bool = False) -> str:
""" """
Translates the text from the engine's source lang its target. Translates the text from the engine's source lang its target.
:param text: Text to translate.
:param html: Set to True if the text contains an HTML structure which needs to ## Parameters
be preserved while translated. ---
:param alignment: Toggle for alignment. - **text**: Text to translate.
:param quality_scores: Toggle for whether to include the translation's quality scores - **html**: Set to True if the text contains an HTML structure which needs to
for each word in HTML format. be preserved while translated.
:return: The translated text. - **alignment**: Toggle for alignment.
- **quality_scores**: Toggle for whether to include the translation's quality scores
for each word in HTML format.
## Returns
---
The translated text.
""" """
pass pass
class DirectBergamotModelEngine(Engine): class DirectBergamotModelEngine(Engine):
"""
Internal module. Engine using a single bergamot model to translate.
"""
def __init__(self, source_lang: str, target_lang: str, model: TranslationModel, def __init__(self, source_lang: str, target_lang: str, model: TranslationModel,
service: Service): service: Service):
self._source_lang = source_lang self._source_lang = source_lang
@ -82,6 +91,9 @@ class DirectBergamotModelEngine(Engine):
class ChainBergamotModelsEngine(Engine): class ChainBergamotModelsEngine(Engine):
"""
Internal module. Engine chaining two bergamot model to translate.
"""
def __init__(self, source_lang: str, target_lang: str, model1: TranslationModel, def __init__(self, source_lang: str, target_lang: str, model1: TranslationModel,
model2: TranslationModel, service: Service): model2: TranslationModel, service: Service):
self._source_lang = source_lang self._source_lang = source_lang

View file

@ -64,10 +64,19 @@ class Models:
def get_model_languages(model_name: str) -> tuple: def get_model_languages(model_name: str) -> tuple:
""" """
Returns a tuple of two two-char ISO language name which the model translates from and to. Returns a tuple of two two-char ISO language name which the model translates from and to.
:param model_name: Name of the model
:raises: ## Parameters
ValueError: When the model_name doesn't exist. ---
:return: (from language, to language) - **model_name**: Name of the model
## Exceptions
---
- `ValueError`: When the model_name doesn't exist.
## Returns
---
(from language, to language)
""" """
if model_name not in Models.AVAILABLE: if model_name not in Models.AVAILABLE:
raise ValueError(f"Model {model_name} does not exist. Did you update the repository cache?") raise ValueError(f"Model {model_name} does not exist. Did you update the repository cache?")
@ -83,9 +92,16 @@ class Models:
def get_model_name_for_languages(source_lang: str, target_lang: str) -> str | None: def get_model_name_for_languages(source_lang: str, target_lang: str) -> str | None:
""" """
Finds a model which translates source_lang into target_lang. Finds a model which translates source_lang into target_lang.
:param source_lang: Language to translate from.
:param target_lang: Language to translate to. ## Parameters
:return: None if no model was found, name of the model otherwise. ---
- **source_lang**: Language to translate from.
- **target_lang**: Language to translate to.
## Returns
---
None if no model was found, name of the model otherwise.
""" """
lang_tuple = (source_lang, target_lang) lang_tuple = (source_lang, target_lang)
names = list(filter(lambda name: lang_tuple == Models.get_model_languages(name), Models.AVAILABLE)) names = list(filter(lambda name: lang_tuple == Models.get_model_languages(name), Models.AVAILABLE))
@ -99,9 +115,14 @@ class Models:
def download(model_name: str) -> None: def download(model_name: str) -> None:
""" """
Downloads or updates the given model. Downloads or updates the given model.
:param model_name: Name of the model to download.
:raises: ## Parameters
ValueError: When the model_name doesn't exist. ---
- **model_name**: Name of the model to download.
## Exceptions
---
- `ValueError`: When the model_name doesn't exist.
""" """
if model_name not in Models.AVAILABLE: if model_name not in Models.AVAILABLE:
raise ValueError(f"Model {model_name} does not exist. Did you update the repository cache?") raise ValueError(f"Model {model_name} does not exist. Did you update the repository cache?")

View file

@ -27,17 +27,21 @@ class Translator:
""" """
Main exposed class to provide translation using Bergamot. Main exposed class to provide translation using Bergamot.
Workflow goes as follows: Workflow goes as follows:
1. Create instance 1. Create instance
2. Load languages 2. Load languages
3. Use translation between any of the loaded language. 3. Use translation between any of the loaded language.
""" """
def __init__(self, workers_count = 1, cache_size = 0, log_level = 'off'): def __init__(self, workers_count=1, cache_size=0, log_level='off'):
""" """
Creates a Translator instance. Creates a Translator instance.
:param workers_count: Number of workers which can be used at once.
:param cache_size: Size of the cache used in bergamot.. ## Parameters
:param log_level: Level of logs used in bergamot. ---
- **workers_count**: Number of workers which can be used at once.
- **cache_size**: Size of the cache used in bergamot.
- **log_level**: Level of logs used in bergamot.
""" """
self.loaded_languages = [] self.loaded_languages = []
self._loaded_engines = {} self._loaded_engines = {}
@ -47,13 +51,21 @@ class Translator:
def _load_model(self, model_name: str, download: bool = True) -> TranslationModel: def _load_model(self, model_name: str, download: bool = True) -> TranslationModel:
""" """
Loads a tiny model by its name, downloads it if it doesn't exist. Loads a tiny model by its name, downloads it if it doesn't exist.
:param model_name: Name of the model to load.
:param download: If a model does not exist locally, if True, download it, ## Parameters
otherwise emit an error. ---
:raises: - **model_name**: Name of the model to load.
ValueError: If the provided model does not exist. - **download**: If a model does not exist locally, if True, download it,
EnvironmentError: When a model is unavailable and download has been set to false. otherwise emit an error.
:return: Bergamot translation model instance.
## Exceptions
---
- `ValueError`: If the provided model does not exist.
- `EnvironmentError`: When a model is unavailable and download has been set to false.
## Returns
---
Bergamot translation model instance.
""" """
if model_name not in Models.AVAILABLE: if model_name not in Models.AVAILABLE:
raise ValueError(f"Model {model_name} not available.") raise ValueError(f"Model {model_name} not available.")
@ -71,14 +83,22 @@ class Translator:
def _create_engine(self, source_lang: str, target_lang: str, download: bool = True) -> Engine: def _create_engine(self, source_lang: str, target_lang: str, download: bool = True) -> Engine:
""" """
Creates an Engine to translate a source lang to a target lang. Creates an Engine to translate a source lang to a target lang.
:param source_lang: Language to translate from.
:param target_lang: Language to translate to. ## Parameters
:param download: If a model does not exist locally, if True, download it, ---
otherwise emit an error. - **source_lang**: Language to translate from.
:raises: - **target_lang**: Language to translate to.
ValueError: If a model from a lang to english does not exist. - **download**: If a model does not exist locally, if True, download it,
EnvironmentError: When a model is unavailable and download has been set to false. otherwise emit an error.
:return: Engine instance.
## Exceptions
---
- `ValueError`: If a model from a lang to english does not exist.
- `EnvironmentError`: When a model is unavailable and download has been set to false.
## Returns
---
Engine instance.
""" """
direct_model_name = Models.get_model_name_for_languages(source_lang, target_lang) direct_model_name = Models.get_model_name_for_languages(source_lang, target_lang)
if direct_model_name is not None and (download or direct_model_name in Models.INSTALLED): if direct_model_name is not None and (download or direct_model_name in Models.INSTALLED):
@ -107,12 +127,16 @@ class Translator:
Loads a language code and all the associated models (for already added languages) Loads a language code and all the associated models (for already added languages)
into the translator. into the translator.
:param lang: Two-char ISO language name. ## Parameters
:param download: If a model does not exist locally, if True, download it, ---
otherwise emit an error. - **lang**: Two-char ISO language name.
:raises: - **download**: If a model does not exist locally, if True, download it,
ValueError: If a model from a lang to english does not exist. otherwise emit an error.
EnvironmentError: When a model is unavailable and download has been set to false.
## Exceptions
---
- `ValueError`: If a model from a lang to english does not exist.
- `EnvironmentError`: When a model is unavailable and download has been set to false.
""" """
if lang not in Models.LANGS: if lang not in Models.LANGS:
raise ValueError(f"Language {lang} does not exist.") raise ValueError(f"Language {lang} does not exist.")
@ -136,20 +160,27 @@ class Translator:
""" """
Translates a text from a source lang to a target lang. Translates a text from a source lang to a target lang.
:param source_lang: Language to translate from. ## Parameters
:param target_lang: Language to translate to. ---
:param text: Text to translate. - **source_lang**: Language to translate from.
:param html: Set to True if the text contains an HTML structure which needs to - **target_lang**: Language to translate to.
be preserved while translated. - **text**: Text to translate.
:param alignment: Toggle for alignment. - **html**: Set to True if the text contains an HTML structure which needs to
:param quality_scores: Toggle for whether to include the translation's quality scores be preserved while translated.
- **alignment**: Toggle for alignment.
- **quality_scores**: Toggle for whether to include the translation's quality scores
for each word in HTML format. for each word in HTML format.
:raises:
ValueError: Either source_lang or target_lang haven't been loaded yet. ## Exceptions
:return: The translated text. ---
- `ValueError` Either source_lang or target_lang haven't been loaded yet.
## Returns
---
The translated text.
""" """
if source_lang not in self.loaded_languages: if source_lang not in self.loaded_languages:
raise ValueError(f"Language {source_lang} is not loaded. Use the load() function first.") raise ValueError(f"Language {source_lang} is not loaded. Use the load() function first.")
if target_lang not in self.loaded_languages: if target_lang not in self.loaded_languages:
raise ValueError(f"Language {target_lang} is not loaded. Use the load() function first.") raise ValueError(f"Language {target_lang} is not loaded. Use the load() function first.")
return self._loaded_engines[source_lang][target_lang].translate(text, html, alignment, quality_scores) return self._loaded_engines[source_lang][target_lang].translate(text, html, alignment, quality_scores)