From a43c2f2c9dc2b745d8f05013ac30dd1bf5c0adcb Mon Sep 17 00:00:00 2001 From: Harshal Parekh Date: Wed, 6 May 2026 21:43:03 -0500 Subject: [PATCH 1/2] feat(pypi): restrict pip.parse hub visibility Add a restrict_visibility_to attribute for bzlmod pip.parse so hub aliases are public only for packages listed in direct requirement files. Keep all lockfile packages available to generated wheel repositories so transitive dependencies continue to resolve internally. Fixes #3413 --- CHANGELOG.md | 4 + docs/pypi/download.md | 26 ++++++- python/private/pypi/extension.bzl | 15 ++++ python/private/pypi/hub_builder.bzl | 1 + python/private/pypi/hub_repository.bzl | 6 +- python/private/pypi/parse_requirements.bzl | 43 ++++++++++- python/private/pypi/render_pkg_aliases.bzl | 53 ++++++++++++- tests/pypi/extension/pip_parse.bzl | 2 + tests/pypi/hub_builder/hub_builder_tests.bzl | 59 +++++++++++++++ .../parse_requirements_tests.bzl | 75 +++++++++++++++++++ .../render_pkg_aliases_test.bzl | 24 ++++++ 11 files changed, 297 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70f30ac67b..61705deb0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -96,6 +96,10 @@ END_UNRELEASED_TEMPLATE * Python toolchain from [20260414] release. * (pypi) `package_metadata` support, fixes [#2054](https://github.com/bazel-contrib/rules_python/issues/2054). +* (pypi) Added {attr}`pip.parse.restrict_visibility_to` to expose only + packages listed in requirement files while keeping lockfile transitive + dependencies available internally. Fixes + [#3413](https://github.com/bazel-contrib/rules_python/issues/3413). [20260325]: https://github.com/astral-sh/python-build-standalone/releases/tag/20260325 [20260414]: https://github.com/astral-sh/python-build-standalone/releases/tag/20260414 diff --git a/docs/pypi/download.md b/docs/pypi/download.md index d4159eb3a7..10a81ac46c 100644 --- a/docs/pypi/download.md +++ b/docs/pypi/download.md @@ -24,8 +24,30 @@ pip.parse( use_repo(pip, "my_deps") ``` -For more documentation, see the Bzlmod examples under the {gh-path}`examples` folder or the documentation -for the {obj}`@rules_python//python/extensions:pip.bzl` extension. +For more documentation, see the Bzlmod examples under the +{gh-path}`examples` folder or the documentation for the +{obj}`@rules_python//python/extensions:pip.bzl` extension. + +## Restricting exposed hub packages + +By default, every package in {attr}`pip.parse.requirements_lock` gets a public +hub alias, such as `@my_deps//foo`. If you want only direct dependencies to be +available to user code, set {attr}`pip.parse.restrict_visibility_to` to one or +more requirement files that list those direct packages: + +```starlark +pip.parse( + hub_name = "my_deps", + python_version = "3.13", + requirements_lock = "//:requirements_lock.txt", + restrict_visibility_to = ["//:requirements.in"], +) +``` + +Packages in the lock file that are not listed in the restricted requirement +files still get generated wheel repositories, so direct dependencies can use +their transitive dependencies. Their hub aliases are visible only to the +generated wheel repositories and are not public targets for user code. :::note} We are using a host-platform compatible toolchain by default to setup pip dependencies. diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index e6052782fa..72e6c8dcdb 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -786,6 +786,21 @@ The Python version the dependencies are targetting, in Major.Minor format If an interpreter isn't explicitly provided (using `python_interpreter` or `python_interpreter_target`), then the version specified here must have a corresponding `python.toolchain()` configured. +""", + ), + "restrict_visibility_to": attr.label_list( + allow_files = True, + doc = """ +A list of requirement files whose package names are exposed as public hub +targets. Packages in the lock files that are not listed here still get wheel +repositories so they can be used as transitive dependencies, but their hub +aliases are only visible to repositories generated by this `pip.parse` hub. + +This is useful when your lock file contains transitive dependencies that should +remain implementation details of your direct dependencies. + +:::{versionadded} VERSION_NEXT_FEATURE +::: """, ), "simpleapi_skip": attr.string_list( diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index 85a31cfc3c..44317c19dc 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -510,6 +510,7 @@ def _create_whl_repos( extra_pip_args = pip_attr.extra_pip_args, get_index_urls = self._get_index_urls.get(pip_attr.python_version), evaluate_markers = _evaluate_markers(self, pip_attr), + exposed_requirements = pip_attr.restrict_visibility_to, logger = logger, ) diff --git a/python/private/pypi/hub_repository.bzl b/python/private/pypi/hub_repository.bzl index f915aa1c77..9fe34720eb 100644 --- a/python/private/pypi/hub_repository.bzl +++ b/python/private/pypi/hub_repository.bzl @@ -26,12 +26,13 @@ exports_files(["requirements.bzl"]) """ def _impl(rctx): - bzl_packages = rctx.attr.packages or rctx.attr.whl_map.keys() + bzl_packages = rctx.attr.packages aliases = render_multiplatform_pkg_aliases( aliases = { key: _whl_config_settings_from_json(values) for key, values in rctx.attr.whl_map.items() }, + exposed_packages = bzl_packages, extra_hub_aliases = rctx.attr.extra_hub_aliases, requirement_cycles = rctx.attr.groups, platform_config_settings = rctx.attr.platform_config_settings, @@ -81,7 +82,8 @@ hub_repository = repository_rule( "packages": attr.string_list( mandatory = False, doc = """\ -The list of packages that will be exposed via all_*requirements macros. Defaults to whl_map keys. +The list of packages that will be exposed via public hub aliases and +all_*requirements macros. """, ), "platform_config_settings": attr.string_list_dict( diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index 2a7793212a..9b50e5e077 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -42,6 +42,7 @@ def parse_requirements( platforms = {}, get_index_urls = None, evaluate_markers = None, + exposed_requirements = [], extract_url_srcs = True, logger): """Get the requirements with platforms that the requirements apply to. @@ -62,6 +63,9 @@ def parse_requirements( the platforms stored as values in the input dict. Returns the same dict, but with values being platforms that are compatible with the requirements line. + exposed_requirements: List of requirements files. When present, only + packages listed in these files should be exposed via the hub + repository. extract_url_srcs: A boolean to enable extracting URLs from requirement lines to enable using bazel downloader. logger: repo_utils.logger, a simple struct to log diagnostic messages. @@ -92,6 +96,7 @@ def parse_requirements( reqs_with_env_markers = {} index_url = None extra_index_urls = [] + exposed_package_names = _exposed_package_names(ctx, exposed_requirements) for file, plats in requirements_by_platform.items(): logger.trace(lambda: "Using {} for {}".format(file, plats)) contents = ctx.read(file) @@ -231,17 +236,34 @@ def parse_requirements( # for p in dist.target_platforms # ] + normalized_name = normalize_name(name) + is_exposed = len(requirement_target_platforms) == len(requirements) + if exposed_package_names != None and normalized_name not in exposed_package_names: + is_exposed = False + item = struct( # Return normalized names - name = normalize_name(name), - is_exposed = len(requirement_target_platforms) == len(requirements), + name = normalized_name, + is_exposed = is_exposed, is_multiple_versions = len(reqs.values()) > 1, index_url = pkg_sources.index_url if pkg_sources else "", srcs = package_srcs, ) ret.append(item) - if not item.is_exposed and logger: - logger.trace(lambda: "Package '{}' will not be exposed because it is only present on a subset of platforms: {} out of {}".format( + if ( + exposed_package_names != None and + normalized_name not in exposed_package_names and + logger + ): + logger.trace(lambda: ( + "Package '{}' will not be exposed because it is not present " + + "in restrict_visibility_to" + ).format(name)) + if len(requirement_target_platforms) != len(requirements) and logger: + logger.trace(lambda: ( + "Package '{}' will not be exposed because it is only present " + + "on a subset of platforms: {} out of {}" + ).format( name, sorted(requirement_target_platforms), sorted(requirements), @@ -251,6 +273,19 @@ def parse_requirements( return ret +def _exposed_package_names(ctx, exposed_requirements): + """Parse the requirement files that define hub-exposed package names.""" + if not exposed_requirements: + return None + + exposed = {} + for file in exposed_requirements: + parse_result = parse_requirements_txt(ctx.read(file)) + for distribution, _ in parse_result.requirements: + exposed[normalize_name(distribution)] = None + + return exposed + def _package_srcs( *, name, diff --git a/python/private/pypi/render_pkg_aliases.bzl b/python/private/pypi/render_pkg_aliases.bzl index 0a1c328491..d000c02a4f 100644 --- a/python/private/pypi/render_pkg_aliases.bzl +++ b/python/private/pypi/render_pkg_aliases.bzl @@ -65,7 +65,7 @@ def _repr_actual(aliases): else: return render.dict(aliases, key_repr = _repr_config_setting) -def _render_common_aliases(*, name, aliases, **kwargs): +def _render_common_aliases(*, name, aliases, visibility, **kwargs): pkg_aliases = render.call( "pkg_aliases", name = repr(name), @@ -80,14 +80,21 @@ def _render_common_aliases(*, name, aliases, **kwargs): return """\ load("@rules_python//python/private/pypi:pkg_aliases.bzl", "pkg_aliases") {extra_loads} -package(default_visibility = ["//visibility:public"]) +package(default_visibility = {visibility}) {aliases}""".format( aliases = pkg_aliases, extra_loads = extra_loads, + visibility = render.list(visibility), ) -def render_pkg_aliases(*, aliases, requirement_cycles = None, extra_hub_aliases = {}, **kwargs): +def render_pkg_aliases( + *, + aliases, + requirement_cycles = None, + extra_hub_aliases = {}, + exposed_packages = None, + **kwargs): """Create alias declarations for each PyPI package. The aliases should be appended to the pip_repository BUILD.bazel file. These aliases @@ -100,6 +107,8 @@ def render_pkg_aliases(*, aliases, requirement_cycles = None, extra_hub_aliases requirement_cycles: any package groups to also add. extra_hub_aliases: The list of extra aliases for each whl to be added in addition to the default ones. + exposed_packages: The public hub packages. When present, other packages + are only visible to generated wheel repositories. **kwargs: Extra kwargs to pass to the rules. Returns: @@ -124,12 +133,20 @@ def render_pkg_aliases(*, aliases, requirement_cycles = None, extra_hub_aliases for whl_name in group_whls } + exposed_packages = _normalize_package_names(exposed_packages) + internal_visibility = _internal_visibility(aliases) + files = { "{}/BUILD.bazel".format(normalize_name(name)): _render_common_aliases( name = normalize_name(name), aliases = pkg_aliases, extra_aliases = extra_hub_aliases.get(normalize_name(name), []), group_name = whl_group_mapping.get(normalize_name(name)), + visibility = _package_visibility( + name = normalize_name(name), + exposed_packages = exposed_packages, + internal_visibility = internal_visibility, + ), **kwargs ).strip() for name, pkg_aliases in aliases.items() @@ -139,6 +156,36 @@ def render_pkg_aliases(*, aliases, requirement_cycles = None, extra_hub_aliases files["_groups/BUILD.bazel"] = generate_group_library_build_bazel("", requirement_cycles) return files +def _normalize_package_names(packages): + if packages == None: + return None + + return { + normalize_name(package): None + for package in packages + } + +def _internal_visibility(aliases): + repo_names = {} + for pkg_aliases in aliases.values(): + if type(pkg_aliases) == type(""): + repo_names[pkg_aliases] = None + continue + + for repo_name in pkg_aliases.values(): + repo_names[repo_name] = None + + return [ + "@{}//:__pkg__".format(repo_name) + for repo_name in sorted(repo_names) + ] + +def _package_visibility(*, name, exposed_packages, internal_visibility): + if exposed_packages == None or name in exposed_packages: + return ["//visibility:public"] + + return internal_visibility + def _major_minor(python_version): major, _, tail = python_version.partition(".") minor, _, _ = tail.partition(".") diff --git a/tests/pypi/extension/pip_parse.bzl b/tests/pypi/extension/pip_parse.bzl index 2d55d5cd1f..75459a7cb7 100644 --- a/tests/pypi/extension/pip_parse.bzl +++ b/tests/pypi/extension/pip_parse.bzl @@ -27,6 +27,7 @@ def pip_parse( requirements_linux = None, requirements_lock = None, requirements_windows = None, + restrict_visibility_to = [], target_platforms = [], simpleapi_skip = [], timeout = 600, @@ -60,6 +61,7 @@ def pip_parse( requirements_linux = requirements_linux, requirements_lock = requirements_lock, requirements_windows = requirements_windows, + restrict_visibility_to = restrict_visibility_to, timeout = timeout, whl_modifications = whl_modifications, parallel_download = False, diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index ccf72c2774..44be22149e 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -153,6 +153,65 @@ def _test_simple(env): _tests.append(_test_simple) +def _test_restrict_visibility_to(env): + builder = hub_builder(env) + builder.pip_parse( + _mock_mctx( + os_name = "osx", + arch_name = "aarch64", + mock_files = { + "requirements.in": "foo>=0.0.1\n", + "requirements.txt": """\ +foo==0.0.1 --hash=sha256:deadbeef +dep-of-foo==0.0.1 --hash=sha256:deadb00f +""", + }, + ), + _parse( + hub_name = "pypi", + python_version = "3.15", + requirements_lock = "requirements.txt", + restrict_visibility_to = ["requirements.in"], + ), + ) + pypi = builder.build() + + pypi.exposed_packages().contains_exactly(["foo"]) + pypi.group_map().contains_exactly({}) + pypi.whl_map().contains_exactly({ + "dep_of_foo": { + "pypi_315_dep_of_foo": [ + whl_config_setting( + version = "3.15", + ), + ], + }, + "foo": { + "pypi_315_foo": [ + whl_config_setting( + version = "3.15", + ), + ], + }, + }) + pypi.whl_libraries().contains_exactly({ + "pypi_315_dep_of_foo": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "dep-of-foo==0.0.1 --hash=sha256:deadb00f", + }, + "pypi_315_foo": { + "config_load": "@pypi//:config.bzl", + "dep_template": "@pypi//{name}:{target}", + "python_interpreter_target": "unit_test_interpreter_target", + "requirement": "foo==0.0.1 --hash=sha256:deadbeef", + }, + }) + pypi.extra_aliases().contains_exactly({}) + +_tests.append(_test_restrict_visibility_to) + def _test_simple_multiple_requirements(env): sub_tests = { ("osx", "aarch64"): "simple==0.0.2 --hash=sha256:deadb00f", diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl index 1786c4e664..3642dc447e 100644 --- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl +++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl @@ -34,6 +34,10 @@ foo @ https://github.com/org/foo/downloads/foo-1.1.tar.gz foo[extra]==0.0.1 \ --hash=sha256:deadbeef +""", + "requirements_exposed_roots": """\ +foo[extra]>=0.0.1 +bar @ https://example.org/bar-0.0.1.whl """, "requirements_foo": """\ foo==0.0.1 \ @@ -61,6 +65,11 @@ bar==0.0.1 --hash=sha256:deadb00f """, "requirements_lock": """\ foo[extra]==0.0.1 --hash=sha256:deadbeef +""", + "requirements_lock_with_transitives": """\ +foo==0.0.1 --hash=sha256:deadbeef +bar==0.0.1 --hash=sha256:deadb00f +baz==0.0.1 --hash=sha256:deadbaaf """, "requirements_lock_dupe": """\ foo[extra,extra_2]==0.0.1 --hash=sha256:deadbeef @@ -150,6 +159,72 @@ def _test_simple(env): _tests.append(_test_simple) +def _test_restrict_visibility_to(env): + got = parse_requirements( + exposed_requirements = ["requirements_exposed_roots"], + requirements_by_platform = { + "requirements_lock_with_transitives": ["linux_x86_64"], + }, + ) + env.expect.that_collection(got).contains_exactly([ + struct( + name = "bar", + index_url = "", + is_exposed = True, + is_multiple_versions = False, + srcs = [ + struct( + distribution = "bar", + extra_pip_args = [], + requirement_line = "bar==0.0.1 --hash=sha256:deadb00f", + target_platforms = ["linux_x86_64"], + url = "", + filename = "", + sha256 = "", + yanked = None, + ), + ], + ), + struct( + name = "baz", + index_url = "", + is_exposed = False, + is_multiple_versions = False, + srcs = [ + struct( + distribution = "baz", + extra_pip_args = [], + requirement_line = "baz==0.0.1 --hash=sha256:deadbaaf", + target_platforms = ["linux_x86_64"], + url = "", + filename = "", + sha256 = "", + yanked = None, + ), + ], + ), + struct( + name = "foo", + index_url = "", + is_exposed = True, + is_multiple_versions = False, + srcs = [ + struct( + distribution = "foo", + extra_pip_args = [], + requirement_line = "foo==0.0.1 --hash=sha256:deadbeef", + target_platforms = ["linux_x86_64"], + url = "", + filename = "", + sha256 = "", + yanked = None, + ), + ], + ), + ]) + +_tests.append(_test_restrict_visibility_to) + def _test_direct_urls_integration(env): """Check that we are using the filename from index_sources.""" got = parse_requirements( diff --git a/tests/pypi/render_pkg_aliases/render_pkg_aliases_test.bzl b/tests/pypi/render_pkg_aliases/render_pkg_aliases_test.bzl index 9114f59279..7bfed6c2b6 100644 --- a/tests/pypi/render_pkg_aliases/render_pkg_aliases_test.bzl +++ b/tests/pypi/render_pkg_aliases/render_pkg_aliases_test.bzl @@ -147,6 +147,30 @@ def _test_aliases_are_created_for_all_wheels(env): _tests.append(_test_aliases_are_created_for_all_wheels) +def _test_restricted_aliases_are_not_public(env): + actual = render_multiplatform_pkg_aliases( + aliases = { + "bar": { + whl_config_setting(version = "3.1"): "pypi_31_bar", + }, + "foo": { + whl_config_setting(version = "3.1"): "pypi_31_foo", + }, + }, + exposed_packages = ["foo"], + ) + + env.expect.that_str(actual["foo/BUILD.bazel"]).contains( + 'package(default_visibility = ["//visibility:public"])', + ) + env.expect.that_str(actual["bar/BUILD.bazel"]).contains("""\ +package(default_visibility = [ + "@pypi_31_bar//:__pkg__", + "@pypi_31_foo//:__pkg__", +])""") + +_tests.append(_test_restricted_aliases_are_not_public) + def _test_aliases_with_groups(env): actual = render_pkg_aliases( aliases = { From d3be2d3820119dda8874168c7af74acc169d5f7f Mon Sep 17 00:00:00 2001 From: Harshal Parekh Date: Wed, 6 May 2026 23:30:10 -0500 Subject: [PATCH 2/2] test: fix exec toolchain matching sentinel --- .../exec_toolchain_matching/exec_toolchain_matching_tests.bzl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/exec_toolchain_matching/exec_toolchain_matching_tests.bzl b/tests/exec_toolchain_matching/exec_toolchain_matching_tests.bzl index a26e4f5f6e..cec3a4c330 100644 --- a/tests/exec_toolchain_matching/exec_toolchain_matching_tests.bzl +++ b/tests/exec_toolchain_matching/exec_toolchain_matching_tests.bzl @@ -86,7 +86,8 @@ def _test_exec_matches_target_python_version(name): ) # This is never matched. It's just here so that toolchains from the - # environment don't match. + # environment don't match. Keep the target settings mismatched so this + # unconstrained toolchain cannot satisfy host execution platforms. native.toolchain( name = "99_target_default", toolchain_type = TARGET_TOOLCHAIN_TYPE, @@ -119,6 +120,7 @@ def _test_exec_matches_target_python_version(name): name = "99_exec_default", toolchain_type = EXEC_TOOLS_TOOLCHAIN_TYPE, toolchain = ":exec_default", + target_settings = ["//python/config_settings:is_python_3.11"], ) analysis_test(