Spaces:
Runtime error
Runtime error
| from typing import Dict, List, Set | |
| from spacy.cli.download import get_compatibility | |
| def metrics_options() -> List[str]: | |
| return [ | |
| "descriptive_stats", | |
| "readability", | |
| "dependency_distance", | |
| "pos_proportions", | |
| "coherence", | |
| "quality", | |
| "information_theory", | |
| ] | |
| def language_options() -> Dict[str, str]: | |
| return { | |
| "Catalan": "ca", | |
| "Chinese": "zh", | |
| "Croatian": "hr", | |
| "Danish": "da", | |
| "Dutch": "nl", | |
| "English": "en", | |
| "Finnish": "fi", | |
| "French": "fr", | |
| "German": "de", | |
| "Greek": "el", | |
| "Italian": "it", | |
| "Japanese": "ja", | |
| "Korean": "ko", | |
| "Lithuanian": "lt", | |
| "Macedonian": "mk", | |
| "Multi-language": "xx", | |
| "Norwegian Bokmål": "nb", | |
| "Polish": "pl", | |
| "Portuguese": "pt", | |
| "Romanian": "ro", | |
| "Russian": "ru", | |
| "Spanish": "es", | |
| "Swedish": "sv", | |
| "Ukrainian": "uk", | |
| } | |
| ################# | |
| # Model options # | |
| ################# | |
| def all_model_size_options_pretty_to_short() -> Dict[str, str]: | |
| return { | |
| "Small": "sm", | |
| "Medium": "md", | |
| "Large": "lg", | |
| # "Transformer": "trf" # Disabled for now | |
| } | |
| def all_model_size_options_short_to_pretty() -> Dict[str, str]: | |
| return { | |
| short: pretty | |
| for pretty, short in all_model_size_options_pretty_to_short().items() | |
| } | |
| def available_model_size_options(lang) -> List[str]: | |
| short_to_pretty = all_model_size_options_short_to_pretty() | |
| if lang == "all": | |
| return sorted(list(short_to_pretty.values())) | |
| return sorted( | |
| [ | |
| short_to_pretty[short] | |
| for short in ModelAvailabilityChecker.available_model_sizes_for_language( | |
| lang | |
| ) | |
| ] | |
| ) | |
| class ModelAvailabilityChecker: | |
| def available_models() -> List[str]: | |
| return list(get_compatibility().keys()) | |
| def extract_language_and_size() -> List[List[str]]: | |
| # [["ca", "sm"], ["en", "lg"], ...] | |
| return list( | |
| [ | |
| list(map(m.split("_").__getitem__, [0, -1])) | |
| for m in ModelAvailabilityChecker.available_models() | |
| ] | |
| ) | |
| def model_is_available(lang: str, size: str) -> bool: | |
| lang_and_size = set( | |
| [ | |
| "_".join(lang_size) | |
| for lang_size in ModelAvailabilityChecker.extract_language_and_size() | |
| ] | |
| ) | |
| return f"{lang}_{size}" in lang_and_size | |
| def available_model_sizes_for_language(lang: str) -> Set[str]: | |
| return set([ | |
| size | |
| for (lang_, size) in ModelAvailabilityChecker.extract_language_and_size() | |
| if lang_ == lang and size in all_model_size_options_pretty_to_short().values() | |
| ]) | |