diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 3c196f0e0..ad3708538 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -316,13 +316,31 @@ def check_and_call_extract_file( if pathmatch(opattern, filename): options = odict break + + # Merge keywords and comment_tags from per-format options if present. + file_keywords = keywords + file_comment_tags = comment_tags + if keywords_opt := options.get("keywords"): + if not isinstance(keywords_opt, dict): # pragma: no cover + raise TypeError( + f"The `keywords` option must be a dict of parsed keywords, not {keywords_opt!r}", + ) + file_keywords = {**keywords, **keywords_opt} + + if comments_opt := options.get("add_comments"): + if not isinstance(comments_opt, (list, tuple, set)): # pragma: no cover + raise TypeError( + f"The `add_comments` option must be a collection of comment tags, not {comments_opt!r}.", + ) + file_comment_tags = tuple(set(comment_tags) | set(comments_opt)) + if callback: callback(filename, method, options) for message_tuple in extract_from_file( method, filepath, - keywords=keywords, - comment_tags=comment_tags, + keywords=file_keywords, + comment_tags=file_comment_tags, options=options, strip_comment_tags=strip_comment_tags, ): diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 6ef62ec4a..1e13b6cc6 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -23,7 +23,7 @@ import warnings from configparser import RawConfigParser from io import StringIO -from typing import BinaryIO, Iterable, Literal +from typing import Any, BinaryIO, Iterable, Literal from babel import Locale, localedata from babel import __version__ as VERSION @@ -584,7 +584,7 @@ def _get_mappings(self): method_map, options_map = [], {} for pattern, method, options in mapping: method_map.append((pattern, method)) - options_map[pattern] = options or {} + options_map[pattern] = _parse_string_options(options or {}) mappings.append((path, method_map, options_map)) else: @@ -1075,7 +1075,7 @@ def parse_mapping_cfg(fileobj, filename=None): else: method, pattern = (part.strip() for part in section.split(':', 1)) method_map.append((pattern, method)) - options_map[pattern] = dict(parser.items(section)) + options_map[pattern] = _parse_string_options(dict(parser.items(section))) if extractors: for idx, (pattern, method) in enumerate(method_map): @@ -1086,6 +1086,25 @@ def parse_mapping_cfg(fileobj, filename=None): return method_map, options_map +def _parse_string_options(options: dict[str, str]) -> dict[str, Any]: + """ + Parse string-formatted options from a mapping configuration. + + The `keywords` and `add_comments` options are parsed into a canonical + internal format, so they can be merged with global keywords/comment tags + during extraction. + """ + options: dict[str, Any] = options.copy() + + if keywords_val := options.pop("keywords", None): + options['keywords'] = parse_keywords(listify_value(keywords_val)) + + if comments_val := options.pop("add_comments", None): + options['add_comments'] = listify_value(comments_val) + + return options + + def _parse_config_object(config: dict, *, filename="(unknown)"): extractors = {} method_map = [] @@ -1140,6 +1159,26 @@ def _parse_config_object(config: dict, *, filename="(unknown)"): if not isinstance(pattern, list): pattern = [pattern] + if keywords_val := entry.pop("keywords", None): + if isinstance(keywords_val, str): + entry["keywords"] = parse_keywords(listify_value(keywords_val)) + elif isinstance(keywords_val, list): + entry["keywords"] = parse_keywords(keywords_val) + else: + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'keywords' must be a string or list, got {keywords_val!r}", + ) + + if comments_val := entry.pop("add_comments", None): + if isinstance(comments_val, str): + entry["add_comments"] = [comments_val] + elif isinstance(comments_val, list): + entry["add_comments"] = comments_val + else: + raise ConfigurationError( + f"{filename}: mappings[{idx}]: 'add_comments' must be a string or list, got {comments_val!r}", + ) + for pat in pattern: if not isinstance(pat, str): raise ConfigurationError( diff --git a/docs/messages.rst b/docs/messages.rst index 0f57eb117..c835d60a7 100644 --- a/docs/messages.rst +++ b/docs/messages.rst @@ -139,14 +139,6 @@ Genshi markup templates and text templates: [javascript: **.js] extract_messages = $._, jQuery._ -The configuration file syntax is based on the format commonly found in ``.INI`` -files on Windows systems, and as supported by the ``ConfigParser`` module in -the Python standard library. Section names (the strings enclosed in square -brackets) specify both the name of the extraction method, and the extended glob -pattern to specify the files that this extraction method should be used for, -separated by a colon. The options in the sections are passed to the extraction -method. Which options are available is specific to the extraction method used. - The extended glob patterns used in this configuration are similar to the glob patterns provided by most shells. A single asterisk (``*``) is a wildcard for any number of characters (except for the pathname component separator "/"), @@ -155,9 +147,132 @@ two subsequent asterisk characters (``**``) can be used to make the wildcard match any directory level, so the pattern ``**.txt`` matches any file with the extension ``.txt`` in any directory. +Babel supports two configuration file formats: INI and TOML. + +INI Configuration Format +^^^^^^^^^^^^^^^^^^^^^^^^ + +The INI configuration file syntax is based on the format commonly found in ``.INI`` +files on Windows systems, and as supported by the ``ConfigParser`` module in +the Python standard library. Section names (the strings enclosed in square +brackets) specify both the name of the extraction method, and the extended glob +pattern to specify the files that this extraction method should be used for, +separated by a colon. The options in the sections are passed to the extraction +method. Which options are available is specific to the extraction method used. + Lines that start with a ``#`` or ``;`` character are ignored and can be used for comments. Empty lines are ignored, too. +TOML Configuration Format +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Babel also supports TOML format for configuration files, when the ``tomllib`` +module is available (Python 3.11+), or when the ``tomli`` package is installed +(for Python versions prior to 3.11). + +TOML provides a more structured format and is particularly useful when combined +with ``pyproject.toml``. + +The same configuration examples shown above can be written in TOML format: + +.. code-block:: toml + + # Extraction from Python source files + [[mappings]] + method = "python" + pattern = "**.py" + + # Extraction from Genshi HTML and text templates + [[mappings]] + method = "genshi" + pattern = "**/templates/**.html" + ignore_tags = "script,style" + include_attrs = "alt title summary" + + [[mappings]] + method = "genshi" + pattern = "**/templates/**.txt" + template_class = "genshi.template:TextTemplate" + encoding = "ISO-8819-15" + + # Extraction from JavaScript files + [[mappings]] + method = "javascript" + pattern = "**.js" + extract_messages = "$._, jQuery._" + +In TOML format, each ``[[mappings]]`` section defines a mapping. The ``method`` +and ``pattern`` fields are required. The ``pattern`` field can be a string or +an array of strings to match multiple patterns with the same configuration. + +If you're using ``pyproject.toml``, nest the configuration under ``[tool.babel]``: + +.. code-block:: toml + + [tool.babel] + [[tool.babel.mappings]] + method = "python" + pattern = "**.py" + +You can reference custom extractors in both formats. In TOML: + +.. code-block:: toml + + [extractors] + custom = "mypackage.module:extract_custom" + + [[mappings]] + method = "custom" + pattern = "**.ctm" + some_option = "foo" + +Common Options +^^^^^^^^^^^^^^ + +In addition to extractor-specific options, the following options can be specified +in any mapping section and will be merged with global settings: + +``keywords`` + A list of keywords (function names) to extract messages from. + This uses the same syntax as the ``--keyword`` command-line option. + Keywords specified here are added to (not replacing) the default keywords or + those specified via command-line. + + In INI format, whitespace-separated: ``keywords = _ gettext ngettext:1,2 pgettext:1c,2`` + + In TOML format, use either a whitespace-separated string or an array: + ``keywords = "_ gettext ngettext:1,2"`` or + ``keywords = ["_", "gettext", "ngettext:1,2"]`` + +``add_comments`` + A list of comment tag prefixes to extract and include in the + output. This uses the same syntax as the ``--add-comments`` command-line option. + Comment tags specified here are added to those specified via command-line. + + In INI format, whitespace-separated: ``add_comments = TRANSLATOR: NOTE:`` + + In TOML format, use either a string or an array: + ``add_comments = "TRANSLATOR NOTE:"`` (parsed as a single string!) or + ``add_comments = ["TRANSLATOR:", "NOTE:"]`` + +**Example in INI format:** + +.. code-block:: ini + + [python: **.py] + keywords = _ _l _n:1,2 + add_comments = TRANSLATOR: + +**Example in TOML format:** + +.. code-block:: toml + + [[mappings]] + method = "python" + pattern = "**.py" + keywords = ["_", "_l", "_n:1,2"] + add_comments = ["TRANSLATOR:"] + .. note:: if you're performing message extraction using the command Babel provides for integration into ``setup.py`` scripts, you can also provide this configuration in a different way, namely as a keyword diff --git a/tests/messages/data/mapping_with_keywords.cfg b/tests/messages/data/mapping_with_keywords.cfg new file mode 100644 index 000000000..710e68187 --- /dev/null +++ b/tests/messages/data/mapping_with_keywords.cfg @@ -0,0 +1,5 @@ +# Test mapping file with keywords option (issue #1224) + +[python: **.py] +encoding = utf-8 +keywords = _ _l _n:1,2 _nl:1,2 _p:1c,2 _pl:1c,2 _np:1c,2,3 _npl:1c,2,3 diff --git a/tests/messages/data/mapping_with_keywords_and_comments.toml b/tests/messages/data/mapping_with_keywords_and_comments.toml new file mode 100644 index 000000000..0a5135f14 --- /dev/null +++ b/tests/messages/data/mapping_with_keywords_and_comments.toml @@ -0,0 +1,8 @@ +# Test mapping file with keywords and add_comments options (issue #1224) + +[[mappings]] +method = "python" +pattern = "**.py" +encoding = "utf-8" +keywords = ["_", "_l", "_n:1,2"] +add_comments = ["SPECIAL:"] diff --git a/tests/messages/data/project/issue_1224_test.py b/tests/messages/data/project/issue_1224_test.py new file mode 100644 index 000000000..8e4f7a608 --- /dev/null +++ b/tests/messages/data/project/issue_1224_test.py @@ -0,0 +1,12 @@ +from myproject.i18n import lazy_gettext as _l, lazy_ngettext as _n + + +class Choices: + # SPECIAL: This comment should be extracted + CHOICE_X = 1, _l("Choice X") + # SPECIAL: Another special comment + CHOICE_Y = 2, _l("Choice Y") + # No comment... + OPTION_C = 3, _l("Option C") + # Test for _n too! (but no comment... shush...) + OPTION_A = 4, (_n("Option A", "Options of the A kind", 1)) diff --git a/tests/messages/frontend/test_extract.py b/tests/messages/frontend/test_extract.py index 1c4532f5f..712200fbb 100644 --- a/tests/messages/frontend/test_extract.py +++ b/tests/messages/frontend/test_extract.py @@ -281,3 +281,54 @@ def test_extraction_add_location_file(extract_cmd, pot_file): """ assert expected_content == pot_file.read_text() + + +def test_extraction_with_mapping_file_with_keywords(extract_cmd, pot_file): + """ + Test that keywords specified in mapping config file are properly parsed, + and merged with default keywords. + """ + extract_cmd.mapping_file = 'mapping_with_keywords.cfg' + extract_cmd.output_file = pot_file + extract_cmd.input_paths = 'project' + + extract_cmd.finalize_options() + extract_cmd.run() + + with pot_file.open() as f: + catalog = read_po(f) + + for msgid in ('bar', 'Choice X', 'Choice Y', 'Option C', 'Option A'): + msg = catalog[msgid] + assert not msg.auto_comments # This configuration didn't specify SPECIAL:... + assert msg.pluralizable == (msgid == 'Option A') + + +def test_extraction_with_mapping_file_with_comments(extract_cmd, pot_file): + """ + Test that add_comments specified in mapping config file are properly parsed. + Uses TOML format to test that code path. + """ + extract_cmd.mapping_file = 'mapping_with_keywords_and_comments.toml' + extract_cmd.output_file = pot_file + extract_cmd.input_paths = 'project/issue_1224_test.py' + + extract_cmd.finalize_options() + extract_cmd.run() + + with pot_file.open() as f: + catalog = read_po(f) + + # Check that messages were extracted and have the expected auto_comments + for msgid, expected_comment in [ + ('Choice X', 'extracted'), + ('Choice Y', 'special'), + ('Option C', None), + ('Option A', None), + ]: + msg = catalog[msgid] + if expected_comment: + assert any('SPECIAL' in comment and expected_comment in comment for comment in msg.auto_comments) + else: + assert not msg.auto_comments + assert msg.pluralizable == (msgid == 'Option A') diff --git a/tests/messages/test_toml_config.py b/tests/messages/test_toml_config.py index 6a3c15700..1dd37a7ac 100644 --- a/tests/messages/test_toml_config.py +++ b/tests/messages/test_toml_config.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import pathlib from io import BytesIO @@ -9,19 +11,64 @@ assert toml_test_cases_path.is_dir(), "toml-test-cases directory not found" +def parse_toml(cfg: bytes | str): + if isinstance(cfg, str): + cfg = cfg.encode("utf-8") + return frontend._parse_mapping_toml(BytesIO(cfg)) + + def test_toml_mapping_multiple_patterns(): """ Test that patterns may be specified as a list in TOML, and are expanded to multiple entries in the method map. """ - method_map, options_map = frontend._parse_mapping_toml(BytesIO(b""" + method_map, options_map = parse_toml(""" [[mappings]] method = "python" pattern = ["xyz/**.py", "foo/**.py"] -""")) - assert len(method_map) == 2 - assert method_map[0] == ('xyz/**.py', 'python') - assert method_map[1] == ('foo/**.py', 'python') +""") + assert method_map == [ + ('xyz/**.py', 'python'), + ('foo/**.py', 'python'), + ] + + +@pytest.mark.parametrize( + ("keywords_val", "expected"), + [ + pytest.param('"foo bar quz"', {'bar': None, 'foo': None, 'quz': None}, id='string'), + pytest.param('["foo", "bar", "quz"]', {'bar': None, 'foo': None, 'quz': None}, id='list'), + pytest.param('"foo:1,2 bar quz"', {'bar': None, 'foo': (1, 2), 'quz': None}, id='s-args'), + pytest.param('["bar", "foo:1,2", "quz"]', {'bar': None, 'foo': (1, 2), 'quz': None}, id='l-args'), + pytest.param('[]', None, id='empty'), + ], +) +def test_toml_mapping_keywords_parsing(keywords_val, expected): + method_map, options_map = parse_toml(f""" +[[mappings]] +method = "python" +pattern = ["**.py"] +keywords = {keywords_val} +""") + assert options_map['**.py'].get('keywords') == expected + + +@pytest.mark.parametrize( + ("add_comments_val", "expected"), + [ + ('"SPECIAL SAUCE"', ['SPECIAL SAUCE']), # TOML will allow this as a single string + ('["SPECIAL", "SAUCE"]', ['SPECIAL', 'SAUCE']), + ('[]', None), + ], +) +def test_toml_mapping_add_comments_parsing(add_comments_val, expected): + method_map, options_map = parse_toml(f""" +[[mappings]] +method = "python" +pattern = ["**.py"] +add_comments = {add_comments_val} +""") + assert options_map['**.py'].get('add_comments') == expected @pytest.mark.parametrize("test_case", toml_test_cases_path.glob("bad.*.toml"), ids=lambda p: p.name)