From c0e493a6cebc0da5a1d50e224e114ed74c9284a5 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 14:58:26 +0300
Subject: [PATCH 01/15] Adds skeleton extracted from molcryst v0.2.2 (dev
 branch)

---
 .flake8                               |   3 +
 .gitattributes                        |   2 +
 .github/workflows/ci.yml              |  23 +
 .github/workflows/docs.yml            |  21 +
 CHANGELOG.md                          |   9 +
 COPYING                               | 674 ++++++++++++++++++++++++++
 DEV_PLAN.md                           |  28 ++
 LICENSE                               | 165 +++++++
 NOTICE.md                             |  12 +
 README.md                             |  66 +++
 docs/api/atomref.md                   |   3 +
 docs/api/index.md                     |   4 +
 docs/datasets/atomic_radius.md        |   8 +
 docs/datasets/covalent_radius.md      |   6 +
 docs/datasets/index.md                |  10 +
 docs/datasets/van_der_waals_radius.md |  11 +
 docs/dev/architecture.md              |   7 +
 docs/dev/data_curation.md             |   7 +
 docs/dev/dev_plan.md                  |  28 ++
 docs/guide/custom_sets.md             |  18 +
 docs/guide/install.md                 |   8 +
 docs/guide/non_goals.md               |  11 +
 docs/guide/policies.md                |  20 +
 docs/guide/quickstart.md              |  16 +
 docs/index.md                         |  66 +++
 mkdocs.yml                            |  37 ++
 pyproject.toml                        |  94 ++++
 src/atomref/__about__.py              |   1 +
 src/atomref/__init__.py               |  60 +++
 src/atomref/data/__init__.py          |   1 +
 src/atomref/data/covalent.csv         | 119 +++++
 src/atomref/data/periodic_table.csv   | 119 +++++
 src/atomref/data/registry.json        | 434 +++++++++++++++++
 src/atomref/data/van_der_waals.csv    | 119 +++++
 src/atomref/elements.py               |  99 ++++
 src/atomref/errors.py                 |  14 +
 src/atomref/policy.py                 | 261 ++++++++++
 src/atomref/py.typed                  |   0
 src/atomref/radii.py                  | 233 +++++++++
 src/atomref/registry.py               | 343 +++++++++++++
 src/atomref/transfer.py               |  31 ++
 tests/conftest.py                     |   9 +
 tests/elements/test_elements.py       |  19 +
 tests/meta/test_imports.py            |  18 +
 tests/meta/test_readme_sync.py        |  20 +
 tests/radii/test_assessment.py        |  37 ++
 tests/radii/test_selection.py         |  53 ++
 tests/registry/test_registry.py       |  32 ++
 tests/test_smoke.py                   |  13 +
 tools/gen_readme.py                   |  20 +
 50 files changed, 3412 insertions(+)
 create mode 100644 .flake8
 create mode 100644 .gitattributes
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/docs.yml
 create mode 100644 CHANGELOG.md
 create mode 100644 COPYING
 create mode 100644 DEV_PLAN.md
 create mode 100644 LICENSE
 create mode 100644 NOTICE.md
 create mode 100644 README.md
 create mode 100644 docs/api/atomref.md
 create mode 100644 docs/api/index.md
 create mode 100644 docs/datasets/atomic_radius.md
 create mode 100644 docs/datasets/covalent_radius.md
 create mode 100644 docs/datasets/index.md
 create mode 100644 docs/datasets/van_der_waals_radius.md
 create mode 100644 docs/dev/architecture.md
 create mode 100644 docs/dev/data_curation.md
 create mode 100644 docs/dev/dev_plan.md
 create mode 100644 docs/guide/custom_sets.md
 create mode 100644 docs/guide/install.md
 create mode 100644 docs/guide/non_goals.md
 create mode 100644 docs/guide/policies.md
 create mode 100644 docs/guide/quickstart.md
 create mode 100644 docs/index.md
 create mode 100644 mkdocs.yml
 create mode 100644 pyproject.toml
 create mode 100644 src/atomref/__about__.py
 create mode 100644 src/atomref/__init__.py
 create mode 100644 src/atomref/data/__init__.py
 create mode 100644 src/atomref/data/covalent.csv
 create mode 100644 src/atomref/data/periodic_table.csv
 create mode 100644 src/atomref/data/registry.json
 create mode 100644 src/atomref/data/van_der_waals.csv
 create mode 100644 src/atomref/elements.py
 create mode 100644 src/atomref/errors.py
 create mode 100644 src/atomref/policy.py
 create mode 100644 src/atomref/py.typed
 create mode 100644 src/atomref/radii.py
 create mode 100644 src/atomref/registry.py
 create mode 100644 src/atomref/transfer.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/elements/test_elements.py
 create mode 100644 tests/meta/test_imports.py
 create mode 100644 tests/meta/test_readme_sync.py
 create mode 100644 tests/radii/test_assessment.py
 create mode 100644 tests/radii/test_selection.py
 create mode 100644 tests/registry/test_registry.py
 create mode 100644 tests/test_smoke.py
 create mode 100644 tools/gen_readme.py

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..8dd399a
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 88
+extend-ignore = E203
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..3225814
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Enforce Linux-style line endings for all text files
+* text=auto eol=lf
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..dbc7a70
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,23 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[test]
+      - name: Test
+        run: pytest
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..590aad5
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,21 @@
+name: Docs
+
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  build-docs:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install docs extras
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[docs]
+      - name: Build docs
+        run: mkdocs build --strict
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..faca26a
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,9 @@
+# Changelog
+
+## 0.1.0a0
+
+- Initial scaffold extracted from the `molcryst` chemistry data layer.
+- Added packaged element metadata and radii tables.
+- Added registry design separating operational quantity from scientific
+  classification.
+- Added radii policies with substitution and linear transfer models.
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/DEV_PLAN.md b/DEV_PLAN.md
new file mode 100644
index 0000000..7252862
--- /dev/null
+++ b/DEV_PLAN.md
@@ -0,0 +1,28 @@
+# Development plan
+
+## v0.1
+
+- element metadata
+- covalent and van der Waals radii sets
+- explicit provenance
+- radii policies
+- substitution and linear transfer
+- custom element-indexed scalar sets
+
+## v0.2
+
+- X-H bond-length datasets
+- experimental plus computational support sets
+- restoration of incomplete experimental data from broader-support predictors
+
+## v0.3
+
+- radial atomic reference functions
+- simple proto-density support based on spherically averaged atomic data
+
+## Possible future directions
+
+- more radii sets
+- uncertainty and confidence flags
+- ion-specific or atom-type-specific domains
+- density-derived radii and related reference transforms
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0a04128
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,165 @@
+                   GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/NOTICE.md b/NOTICE.md
new file mode 100644
index 0000000..01f1cf1
--- /dev/null
+++ b/NOTICE.md
@@ -0,0 +1,12 @@
+# atomref
+
+atomref is a Python library for curated atomic reference data and transfer
+policies for geometry and structure-analysis algorithms.
+
+Copyright (c) 2026 Ivan Chernyshov
+License: LGPL-3.0-or-later (see LICENSE and COPYING)
+
+## Third-party material
+
+The initial scaffold reuses and adapts data tables and design ideas from the
+Delone Commons `molcryst` repository, also authored by Ivan Chernyshov.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d5b9154
--- /dev/null
+++ b/README.md
@@ -0,0 +1,66 @@
+# atomref
+
+`atomref` is a small pure-Python package for curated atomic reference data and
+policy-based lookup in geometry and structure-analysis code.
+
+It is **not** a periodic-table encyclopedia. The package is meant to sit under
+higher-level scientific software and provide:
+
+- stable element metadata,
+- named radii sets,
+- explicit dataset provenance,
+- deterministic lookup policies,
+- transfer from broader-support datasets into narrower target sets.
+
+For v0.1 the public scope is intentionally radii-first.
+
+## Why this exists
+
+Many geometry algorithms need a complete reference table, but the scientifically
+preferred dataset is often incomplete. `atomref` makes that situation explicit:
+choose a target dataset, add one or more transfer steps, and keep provenance on
+what was returned.
+
+The default examples mirror the current `molcryst` behavior:
+
+- covalent radii: use `cordero2008`, substitute from `csd_legacy_cov`
+- van der Waals radii: use `alvarez2013`, linearly transfer from
+  `atomic_radius:rahm2016`
+
+## Quick example
+
+```python
+import atomref as ar
+
+r_c = ar.get_covalent_radius("C")
+r_vdw = ar.get_vdw_radius("O")
+
+lookup = ar.lookup_vdw_radius("Pm")
+print(lookup.value, lookup.source, lookup.resolved_from)
+```
+
+## Public API split: `get_*` vs `lookup_*`
+
+- `get_*` returns only the selected numeric value, or `None`.
+- `lookup_*` returns the provenance-carrying `LookupResult` object.
+
+This follows the current `molcryst` pattern.
+
+## Current built-in quantities
+
+- `covalent_radius`
+- `van_der_waals_radius`
+- `atomic_radius` (support quantity; currently used for transfer from
+  `rahm2016`)
+
+## Relationship to the Delone Commons ecosystem
+
+`atomref` is intended to be reusable outside the surrounding ecosystem, but it
+fits naturally beneath:
+
+- `molcryst`
+- `pyvoro2`
+- `pbcgraph`
+
+Those packages should consume atomic reference data from `atomref` rather than
+re-curating such datasets independently.
diff --git a/docs/api/atomref.md b/docs/api/atomref.md
new file mode 100644
index 0000000..dcbc5e0
--- /dev/null
+++ b/docs/api/atomref.md
@@ -0,0 +1,3 @@
+# atomref
+
+::: atomref
diff --git a/docs/api/index.md b/docs/api/index.md
new file mode 100644
index 0000000..da15dbf
--- /dev/null
+++ b/docs/api/index.md
@@ -0,0 +1,4 @@
+# API
+
+The top-level package exports the main radii helpers together with the registry,
+policy, and transfer data structures.
diff --git a/docs/datasets/atomic_radius.md b/docs/datasets/atomic_radius.md
new file mode 100644
index 0000000..cbbe61b
--- /dev/null
+++ b/docs/datasets/atomic_radius.md
@@ -0,0 +1,8 @@
+# Atomic radius
+
+This quantity currently exists to hold transferable support datasets that are
+not best described as direct condensed-phase vdW radii.
+
+Built-in v0.1 support set:
+
+- `rahm2016`
diff --git a/docs/datasets/covalent_radius.md b/docs/datasets/covalent_radius.md
new file mode 100644
index 0000000..f298635
--- /dev/null
+++ b/docs/datasets/covalent_radius.md
@@ -0,0 +1,6 @@
+# Covalent radius
+
+Built-in v0.1 sets:
+
+- `cordero2008`
+- `csd_legacy_cov`
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
new file mode 100644
index 0000000..a58d78b
--- /dev/null
+++ b/docs/datasets/index.md
@@ -0,0 +1,10 @@
+# Datasets
+
+The package distinguishes between:
+
+- **quantity** — the operational property being requested,
+- **semantic class** — what the dataset scientifically represents,
+- **origin / phase context** — how and where it was derived.
+
+This is what keeps support-only datasets such as `rahm2016` usable without
+misclassifying them as direct condensed-phase vdW radii.
diff --git a/docs/datasets/van_der_waals_radius.md b/docs/datasets/van_der_waals_radius.md
new file mode 100644
index 0000000..d757bab
--- /dev/null
+++ b/docs/datasets/van_der_waals_radius.md
@@ -0,0 +1,11 @@
+# van der Waals radius
+
+Built-in v0.1 target sets:
+
+- `bondi1964`
+- `rowland_taylor1996`
+- `alvarez2013`
+- `chernyshov2020`
+- `csd_legacy_vdw`
+
+Support-only sets may live under other quantities.
diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md
new file mode 100644
index 0000000..7dd08b4
--- /dev/null
+++ b/docs/dev/architecture.md
@@ -0,0 +1,7 @@
+# Architecture
+
+Publicly, v0.1 is radii-first.
+
+Internally, the package is built around element-indexed scalar datasets plus a
+small transfer layer. That keeps the public API simple while leaving a clean
+path to later quantities such as X-H bond lengths.
diff --git a/docs/dev/data_curation.md b/docs/dev/data_curation.md
new file mode 100644
index 0000000..02f406b
--- /dev/null
+++ b/docs/dev/data_curation.md
@@ -0,0 +1,7 @@
+# Data curation
+
+Packaged tables are stored as CSV files indexed by atomic number. Dataset
+metadata and provenance live in `src/atomref/data/registry.json`.
+
+Placeholder values are modeled as dataset metadata, not as hard-coded Python
+constants.
diff --git a/docs/dev/dev_plan.md b/docs/dev/dev_plan.md
new file mode 100644
index 0000000..7252862
--- /dev/null
+++ b/docs/dev/dev_plan.md
@@ -0,0 +1,28 @@
+# Development plan
+
+## v0.1
+
+- element metadata
+- covalent and van der Waals radii sets
+- explicit provenance
+- radii policies
+- substitution and linear transfer
+- custom element-indexed scalar sets
+
+## v0.2
+
+- X-H bond-length datasets
+- experimental plus computational support sets
+- restoration of incomplete experimental data from broader-support predictors
+
+## v0.3
+
+- radial atomic reference functions
+- simple proto-density support based on spherically averaged atomic data
+
+## Possible future directions
+
+- more radii sets
+- uncertainty and confidence flags
+- ion-specific or atom-type-specific domains
+- density-derived radii and related reference transforms
diff --git a/docs/guide/custom_sets.md b/docs/guide/custom_sets.md
new file mode 100644
index 0000000..bfc55cb
--- /dev/null
+++ b/docs/guide/custom_sets.md
@@ -0,0 +1,18 @@
+# Custom sets
+
+Custom element-indexed scalar datasets can be built with
+`ElementScalarSet.from_mapping(...)` and then used directly in a `RadiiPolicy`
+or a transfer model.
+
+```python
+from atomref import DatasetRef, ElementScalarSet, RadiiPolicy
+
+custom = ElementScalarSet.from_mapping(
+    ref=DatasetRef("covalent_radius", "my_cov"),
+    values={"C": 0.75, "H": 0.31},
+    name="My custom covalent radii",
+    units="angstrom",
+)
+
+policy = RadiiPolicy(kind="covalent", base_set=custom)
+```
diff --git a/docs/guide/install.md b/docs/guide/install.md
new file mode 100644
index 0000000..2e2ae65
--- /dev/null
+++ b/docs/guide/install.md
@@ -0,0 +1,8 @@
+# Install
+
+```bash
+pip install atomref
+```
+
+The runtime package is pure Python and has no required runtime dependencies
+outside the standard library.
diff --git a/docs/guide/non_goals.md b/docs/guide/non_goals.md
new file mode 100644
index 0000000..57bca94
--- /dev/null
+++ b/docs/guide/non_goals.md
@@ -0,0 +1,11 @@
+# Non-goals
+
+`atomref` does not aim to handle:
+
+- file parsing,
+- crystallographic symmetry,
+- structure inference,
+- Voronoi or power tessellation,
+- chemistry-specific plane-position logic.
+
+Those concerns belong in higher-level packages.
diff --git a/docs/guide/policies.md b/docs/guide/policies.md
new file mode 100644
index 0000000..a7e9130
--- /dev/null
+++ b/docs/guide/policies.md
@@ -0,0 +1,20 @@
+# Policies
+
+A policy is the ordered rule set for selecting a value.
+
+Resolution order in v0.1:
+
+1. override
+2. base dataset
+3. transfers in order
+4. fallback
+5. missing
+
+Built-in transfer models:
+
+- `SubstitutionTransfer`
+- `LinearTransfer`
+
+`LinearTransfer` is intentionally limited to one predictor in v0.1, but the API
+already accepts a predictor tuple so later multi-predictor linear models do not
+require a redesign.
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
new file mode 100644
index 0000000..62de165
--- /dev/null
+++ b/docs/guide/quickstart.md
@@ -0,0 +1,16 @@
+# Quickstart
+
+```python
+import atomref as ar
+
+print(ar.get_covalent_radius("C"))
+print(ar.get_vdw_radius("O"))
+
+m = ar.lookup_vdw_radius("Pm")
+print(m.value)
+print(m.source)
+print(m.resolved_from)
+```
+
+Use `get_*` when you only need the number, and `lookup_*` when you need
+provenance.
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..d5b9154
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,66 @@
+# atomref
+
+`atomref` is a small pure-Python package for curated atomic reference data and
+policy-based lookup in geometry and structure-analysis code.
+
+It is **not** a periodic-table encyclopedia. The package is meant to sit under
+higher-level scientific software and provide:
+
+- stable element metadata,
+- named radii sets,
+- explicit dataset provenance,
+- deterministic lookup policies,
+- transfer from broader-support datasets into narrower target sets.
+
+For v0.1 the public scope is intentionally radii-first.
+
+## Why this exists
+
+Many geometry algorithms need a complete reference table, but the scientifically
+preferred dataset is often incomplete. `atomref` makes that situation explicit:
+choose a target dataset, add one or more transfer steps, and keep provenance on
+what was returned.
+
+The default examples mirror the current `molcryst` behavior:
+
+- covalent radii: use `cordero2008`, substitute from `csd_legacy_cov`
+- van der Waals radii: use `alvarez2013`, linearly transfer from
+  `atomic_radius:rahm2016`
+
+## Quick example
+
+```python
+import atomref as ar
+
+r_c = ar.get_covalent_radius("C")
+r_vdw = ar.get_vdw_radius("O")
+
+lookup = ar.lookup_vdw_radius("Pm")
+print(lookup.value, lookup.source, lookup.resolved_from)
+```
+
+## Public API split: `get_*` vs `lookup_*`
+
+- `get_*` returns only the selected numeric value, or `None`.
+- `lookup_*` returns the provenance-carrying `LookupResult` object.
+
+This follows the current `molcryst` pattern.
+
+## Current built-in quantities
+
+- `covalent_radius`
+- `van_der_waals_radius`
+- `atomic_radius` (support quantity; currently used for transfer from
+  `rahm2016`)
+
+## Relationship to the Delone Commons ecosystem
+
+`atomref` is intended to be reusable outside the surrounding ecosystem, but it
+fits naturally beneath:
+
+- `molcryst`
+- `pyvoro2`
+- `pbcgraph`
+
+Those packages should consume atomic reference data from `atomref` rather than
+re-curating such datasets independently.
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..8b5060c
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,37 @@
+site_name: atomref
+site_url: https://delonecommons.github.io/atomref/
+repo_url: https://github.com/DeloneCommons/atomref
+repo_name: DeloneCommons/atomref
+
+theme:
+  name: material
+
+plugins:
+  - search
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            show_root_heading: true
+            show_source: false
+
+nav:
+  - Home: index.md
+  - Guide:
+      - Install: guide/install.md
+      - Quickstart: guide/quickstart.md
+      - Policies: guide/policies.md
+      - Custom sets: guide/custom_sets.md
+      - Non-goals: guide/non_goals.md
+  - Datasets:
+      - Overview: datasets/index.md
+      - Covalent radius: datasets/covalent_radius.md
+      - van der Waals radius: datasets/van_der_waals_radius.md
+      - Atomic radius: datasets/atomic_radius.md
+  - Development:
+      - Architecture: dev/architecture.md
+      - Data curation: dev/data_curation.md
+      - Development plan: dev/dev_plan.md
+  - API:
+      - Overview: api/index.md
+      - atomref: api/atomref.md
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..ea2b569
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,94 @@
+[build-system]
+requires = ["hatchling>=1.24"]
+build-backend = "hatchling.build"
+
+[project]
+name = "atomref"
+dynamic = ["version"]
+description = "Curated atomic reference data and transfer policies for geometry and structure-analysis algorithms."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+authors = [
+  { name = "Ivan Yu. Chernyshov", email = "ivan.chernyshoff@gmail.com" }
+]
+keywords = ["chemistry", "materials", "crystallography", "reference data", "atomic radii"]
+classifiers = [
+  "Development Status :: 2 - Pre-Alpha",
+  "Intended Audience :: Science/Research",
+  "Topic :: Scientific/Engineering :: Chemistry",
+  "Topic :: Software Development :: Libraries",
+  "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3 :: Only",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Operating System :: OS Independent",
+]
+dependencies = []
+
+[project.urls]
+Homepage = "https://delonecommons.github.io/atomref/"
+Documentation = "https://delonecommons.github.io/atomref/"
+Repository = "https://github.com/DeloneCommons/atomref"
+Issues = "https://github.com/DeloneCommons/atomref/issues"
+
+[project.optional-dependencies]
+test = [
+  "pytest>=7",
+  "tomli>=2; python_version < '3.11'",
+]
+docs = [
+  "mkdocs>=1.6,<2",
+  "mkdocs-material>=9.5",
+  "mkdocstrings[python]>=0.25",
+  "mkdocs-include-markdown-plugin>=6.2",
+  "pymdown-extensions>=10.0",
+  "tomli>=2; python_version < '3.11'",
+]
+dev = [
+  "build>=1.2",
+  "twine>=5",
+  "flake8>=7",
+]
+
+[tool.hatch.version]
+path = "src/atomref/__about__.py"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/atomref"]
+include = [
+  "src/atomref/data/*.csv",
+  "src/atomref/data/*.json",
+]
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "/src",
+  "/tests",
+  "/docs",
+  "/tools",
+  "/mkdocs.yml",
+  "/README.md",
+  "/CHANGELOG.md",
+  "/DEV_PLAN.md",
+  "/NOTICE.md",
+  "/LICENSE",
+  "/COPYING",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-ra --ignore=build --ignore=dist"
+norecursedirs = [
+  ".git",
+  ".pytest_cache",
+  "__pycache__",
+  ".venv",
+  ".tox",
+  "dist",
+  ".eggs",
+  "*.egg-info",
+]
diff --git a/src/atomref/__about__.py b/src/atomref/__about__.py
new file mode 100644
index 0000000..44cdb9a
--- /dev/null
+++ b/src/atomref/__about__.py
@@ -0,0 +1 @@
+__version__ = '0.1.0a0'
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
new file mode 100644
index 0000000..fd07068
--- /dev/null
+++ b/src/atomref/__init__.py
@@ -0,0 +1,60 @@
+from .__about__ import __version__
+from .elements import Element, canonicalize_element_symbol, get_element, iter_elements, is_valid_element_symbol
+from .policy import LookupResult, ValuePolicy
+from .radii import (
+    DEFAULT_COVALENT_POLICY,
+    DEFAULT_VDW_POLICY,
+    RadiiElementAssessment,
+    RadiiPolicy,
+    RadiiPolicyAssessment,
+    assess_radii_policy,
+    get_covalent_radius,
+    get_radii_set_info,
+    get_vdw_radius,
+    list_radii_sets,
+    lookup_covalent_radius,
+    lookup_vdw_radius,
+)
+from .registry import (
+    CoverageInfo,
+    DatasetInfo,
+    DatasetRef,
+    ElementScalarSet,
+    Reference,
+    get_dataset_info,
+    list_dataset_ids,
+)
+from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer
+
+__all__ = [
+    '__version__',
+    'Element',
+    'canonicalize_element_symbol',
+    'get_element',
+    'iter_elements',
+    'is_valid_element_symbol',
+    'CoverageInfo',
+    'DatasetInfo',
+    'DatasetRef',
+    'ElementScalarSet',
+    'Reference',
+    'get_dataset_info',
+    'list_dataset_ids',
+    'LinearFit',
+    'LinearTransfer',
+    'SubstitutionTransfer',
+    'LookupResult',
+    'ValuePolicy',
+    'RadiiPolicy',
+    'RadiiElementAssessment',
+    'RadiiPolicyAssessment',
+    'DEFAULT_COVALENT_POLICY',
+    'DEFAULT_VDW_POLICY',
+    'list_radii_sets',
+    'get_radii_set_info',
+    'lookup_covalent_radius',
+    'get_covalent_radius',
+    'lookup_vdw_radius',
+    'get_vdw_radius',
+    'assess_radii_policy',
+]
diff --git a/src/atomref/data/__init__.py b/src/atomref/data/__init__.py
new file mode 100644
index 0000000..835d4e0
--- /dev/null
+++ b/src/atomref/data/__init__.py
@@ -0,0 +1 @@
+"""Packaged data files for atomref."""
diff --git a/src/atomref/data/covalent.csv b/src/atomref/data/covalent.csv
new file mode 100644
index 0000000..053a71a
--- /dev/null
+++ b/src/atomref/data/covalent.csv
@@ -0,0 +1,119 @@
+z,cordero2008,csd_legacy_cov
+1,0.31,0.23
+2,0.28,1.5
+3,1.28,1.28
+4,0.96,0.96
+5,0.84,0.83
+6,0.76,0.68
+7,0.71,0.68
+8,0.66,0.68
+9,0.57,0.64
+10,0.58,1.5
+11,1.66,1.66
+12,1.41,1.41
+13,1.21,1.21
+14,1.11,1.2
+15,1.07,1.05
+16,1.05,1.02
+17,1.02,0.99
+18,1.06,1.51
+19,2.03,2.03
+20,1.76,1.76
+21,1.7,1.7
+22,1.6,1.6
+23,1.53,1.53
+24,1.39,1.39
+25,1.61,1.61
+26,1.52,1.52
+27,1.5,1.26
+28,1.24,1.24
+29,1.32,1.32
+30,1.22,1.22
+31,1.22,1.22
+32,1.2,1.17
+33,1.19,1.21
+34,1.2,1.22
+35,1.2,1.21
+36,1.16,1.5
+37,2.2,2.2
+38,1.95,1.95
+39,1.9,1.9
+40,1.75,1.75
+41,1.64,1.64
+42,1.54,1.54
+43,1.47,1.47
+44,1.46,1.46
+45,1.42,1.42
+46,1.39,1.39
+47,1.45,1.45
+48,1.44,1.54
+49,1.42,1.42
+50,1.39,1.39
+51,1.39,1.39
+52,1.38,1.47
+53,1.39,1.4
+54,1.4,1.5
+55,2.44,2.44
+56,2.15,2.15
+57,2.07,2.07
+58,2.04,2.04
+59,2.03,2.03
+60,2.01,2.01
+61,1.99,1.99
+62,1.98,1.98
+63,1.98,1.98
+64,1.96,1.96
+65,1.94,1.94
+66,1.92,1.92
+67,1.92,1.92
+68,1.89,1.89
+69,1.9,1.9
+70,1.87,1.87
+71,1.87,1.87
+72,1.75,1.75
+73,1.7,1.7
+74,1.62,1.62
+75,1.51,1.51
+76,1.44,1.44
+77,1.41,1.41
+78,1.36,1.36
+79,1.36,1.36
+80,1.32,1.32
+81,1.45,1.45
+82,1.46,1.46
+83,1.48,1.48
+84,1.4,1.4
+85,1.5,1.21
+86,1.5,1.5
+87,2.6,2.6
+88,2.21,2.21
+89,2.15,2.15
+90,2.06,2.06
+91,2,2
+92,1.96,1.96
+93,1.9,1.9
+94,1.87,1.87
+95,1.8,1.8
+96,1.69,1.69
+97,,1.54
+98,,1.83
+99,,1.5
+100,,1.5
+101,,1.5
+102,,1.5
+103,,1.5
+104,,1.5
+105,,1.5
+106,,1.5
+107,,1.5
+108,,1.5
+109,,1.5
+110,,1.5
+111,,
+112,,
+113,,
+114,,
+115,,
+116,,
+117,,
+118,,
diff --git a/src/atomref/data/periodic_table.csv b/src/atomref/data/periodic_table.csv
new file mode 100644
index 0000000..744b4aa
--- /dev/null
+++ b/src/atomref/data/periodic_table.csv
@@ -0,0 +1,119 @@
+z,symbol,name
+1,H,Hydrogen
+2,He,Helium
+3,Li,Lithium
+4,Be,Beryllium
+5,B,Boron
+6,C,Carbon
+7,N,Nitrogen
+8,O,Oxygen
+9,F,Fluorine
+10,Ne,Neon
+11,Na,Sodium
+12,Mg,Magnesium
+13,Al,Aluminium
+14,Si,Silicon
+15,P,Phosphorus
+16,S,Sulphur
+17,Cl,Chlorine
+18,Ar,Argon
+19,K,Potassium
+20,Ca,Calcium
+21,Sc,Scandium
+22,Ti,Titanium
+23,V,Vanadium
+24,Cr,Chromium
+25,Mn,Manganese
+26,Fe,Iron
+27,Co,Cobalt
+28,Ni,Nickel
+29,Cu,Copper
+30,Zn,Zinc
+31,Ga,Gallium
+32,Ge,Germanium
+33,As,Arsenic
+34,Se,Selenium
+35,Br,Bromine
+36,Kr,Krypton
+37,Rb,Rubidium
+38,Sr,Strontium
+39,Y,Yttrium
+40,Zr,Zirconium
+41,Nb,Niobium
+42,Mo,Molybdenum
+43,Tc,Technetium
+44,Ru,Ruthenium
+45,Rh,Rhodium
+46,Pd,Palladium
+47,Ag,Silver
+48,Cd,Cadmium
+49,In,Indium
+50,Sn,Tin
+51,Sb,Antimony
+52,Te,Tellurium
+53,I,Iodine
+54,Xe,Xenon
+55,Cs,Caesium
+56,Ba,Barium
+57,La,Lanthanum
+58,Ce,Cerium
+59,Pr,Praseodymium
+60,Nd,Neodymium
+61,Pm,Promethium
+62,Sm,Samarium
+63,Eu,Europium
+64,Gd,Gadolinium
+65,Tb,Terbium
+66,Dy,Dysprosium
+67,Ho,Holmium
+68,Er,Erbium
+69,Tm,Thulium
+70,Yb,Ytterbium
+71,Lu,Lutetium
+72,Hf,Hafnium
+73,Ta,Tantalum
+74,W,Tungsten
+75,Re,Rhenium
+76,Os,Osmium
+77,Ir,Iridium
+78,Pt,Platinum
+79,Au,Gold
+80,Hg,Mercury
+81,Tl,Thallium
+82,Pb,Lead
+83,Bi,Bismuth
+84,Po,Polonium
+85,At,Astatine
+86,Rn,Radon
+87,Fr,Francium
+88,Ra,Radium
+89,Ac,Actinium
+90,Th,Thorium
+91,Pa,Protactinium
+92,U,Uranium
+93,Np,Neptunium
+94,Pu,Plutonium
+95,Am,Americium
+96,Cm,Curium
+97,Bk,Berkelium
+98,Cf,Californium
+99,Es,Einsteinium
+100,Fm,Fermium
+101,Md,Mendelevium
+102,No,Nobelium
+103,Lr,Lawrencium
+104,Rf,Rutherfordium
+105,Db,Dubnium
+106,Sg,Seaborgium
+107,Bh,Bohrium
+108,Hs,Hassium
+109,Mt,Meitnerium
+110,Ds,Darmstadtium
+111,Rg,Roentgenium
+112,Cn,Copernicium
+113,Nh,Nihonium
+114,Fl,Flerovium
+115,Mc,Moscovium
+116,Lv,Livermorium
+117,Ts,Tennessine
+118,Og,Oganesson
diff --git a/src/atomref/data/registry.json b/src/atomref/data/registry.json
new file mode 100644
index 0000000..2577ab7
--- /dev/null
+++ b/src/atomref/data/registry.json
@@ -0,0 +1,434 @@
+{
+  "schema_version": "0.1",
+  "created_from": {
+    "source_project": "molcryst",
+    "source_schema_version": "0.2",
+    "notes": [
+      "Transformed for the initial atomref v0.1 scaffold.",
+      "Rahm 2016 is reclassified from van_der_waals to atomic_radius."
+    ]
+  },
+  "quantities": {
+    "covalent_radius": {
+      "domain": "element",
+      "units": "angstrom",
+      "description": "Element-indexed covalent radii intended for geometry and bonding heuristics."
+    },
+    "van_der_waals_radius": {
+      "domain": "element",
+      "units": "angstrom",
+      "description": "Element-indexed condensed-phase or contact-derived van der Waals radii."
+    },
+    "atomic_radius": {
+      "domain": "element",
+      "units": "angstrom",
+      "description": "Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data."
+    }
+  },
+  "datasets": {
+    "covalent_radius": {
+      "cordero2008": {
+        "name": "Cordero et al. covalent radii",
+        "description": "Covalent radii from Cordero et al. (2008) (last author: Alvarez).",
+        "semantic_class": "covalent_structural",
+        "origin_class": "compiled_experimental",
+        "phase_context": "condensed_phase",
+        "method_summary": "Derived from crystallographic bond distances (primarily single bonds) across the periodic table.",
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "covalent.csv",
+          "column": "cordero2008"
+        },
+        "coverage": {
+          "n_values": 96,
+          "z_min": 1,
+          "z_max": 96,
+          "has_placeholders": false
+        },
+        "placeholder_value": null,
+        "extraction_source": "Table 2 in B. Cordero et al. (2008), column 'r'",
+        "aliases": [
+          "Cordero covalent radii",
+          "Cordero–Alvarez covalent radii",
+          "Alvarez covalent radii (2008)"
+        ],
+        "references": [
+          {
+            "authors": "B. Cordero et al.",
+            "doi": "10.1039/B801115J",
+            "title": "Covalent radii revisited",
+            "venue": "Dalton Trans. (2008) 2832-2838"
+          }
+        ],
+        "notes": [
+          "The source paper provides multiple radii per element for different atom types/environments; this package currently includes C(sp3) value for C and high-spin values for Mn/Fe/Co."
+        ]
+      },
+      "csd_legacy_cov": {
+        "name": "CSD legacy covalent radii (bond perception)",
+        "description": "Legacy covalent radii used in CSD software for bond assignment (Rcov).",
+        "semantic_class": "covalent_legacy",
+        "origin_class": "curated_heuristic",
+        "phase_context": "mixed_or_legacy",
+        "method_summary": null,
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "covalent.csv",
+          "column": "csd_legacy_cov"
+        },
+        "coverage": {
+          "n_values": 110,
+          "z_min": 1,
+          "z_max": 110,
+          "has_placeholders": true
+        },
+        "placeholder_value": 1.5,
+        "extraction_source": "CCDC Elemental_Radii.xlsx (CSD radii table), column 'Covalent Radius'.",
+        "aliases": [],
+        "references": [
+          {
+            "publisher": "Cambridge Crystallographic Data Centre (CCDC)",
+            "title": "Elemental Data and Radii (Excel)",
+            "url": "https://www.ccdc.cam.ac.uk/media/Documentation/F8D8439E-30C5-4FA8-B781-D9E65AAB0BF3/Elemental_Radii.xlsx"
+          },
+          {
+            "authors": "B. Cordero et al.",
+            "doi": "10.1039/B801115J",
+            "title": "Covalent radii revisited",
+            "venue": "Dalton Trans. (2008) 2832-2838"
+          }
+        ],
+        "notes": [
+          "CSD bond assignment heuristic: a bond A-B may be inferred if distance d satisfies Rcov(A)+Rcov(B)-t <= d <= Rcov(A)+Rcov(B)+t, with typical t=0.4 Å. (See the CCDC spreadsheet notes.)",
+          "For Z>=111, csd_legacy values are omitted because the legacy CSD table does not provide radii beyond Darmstadtium (Z=110).",
+          "Elements not yet encountered in the CSD have Rcov = 1.50 Å."
+        ]
+      }
+    },
+    "van_der_waals_radius": {
+      "bondi1964": {
+        "name": "Bondi van der Waals radii",
+        "description": "Classic van der Waals radii compiled by Bondi (1964), available for 38 elements.",
+        "semantic_class": "vdw_compiled",
+        "origin_class": "compiled_experimental",
+        "phase_context": "mixed_or_legacy",
+        "method_summary": "Bondi compiled van der Waals radii from a combination of experimental sources (e.g., crystal structures, liquid-state properties, gas kinetic data) to reproduce molecular/atomic volumes and sizes. This set is widely used as a historical reference and in many computational chemistry defaults.",
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "van_der_waals.csv",
+          "column": "bondi1964"
+        },
+        "coverage": {
+          "n_values": 38,
+          "z_min": 1,
+          "z_max": 92,
+          "has_placeholders": false,
+          "covered_z": [
+            1,
+            2,
+            3,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            28,
+            29,
+            30,
+            31,
+            33,
+            34,
+            35,
+            36,
+            46,
+            47,
+            48,
+            49,
+            50,
+            52,
+            53,
+            54,
+            78,
+            79,
+            80,
+            81,
+            82,
+            92
+          ]
+        },
+        "placeholder_value": null,
+        "extraction_source": "Bondi column in Table 1 of Alvarez (2013) (used as a convenient transcription of Bondi's tabulation).",
+        "aliases": [
+          "Bondi radii",
+          "Bondi vdW radii"
+        ],
+        "references": [
+          {
+            "authors": "A. Bondi",
+            "title": "van der Waals Volumes and Radii",
+            "venue": "J. Phys. Chem. 68 (1964) 441-451",
+            "doi": "10.1021/j100785a001"
+          },
+          {
+            "authors": "S. Alvarez",
+            "title": "A cartography of the van der Waals territories",
+            "venue": "Dalton Trans. 42 (2013) 8617-8636",
+            "doi": "10.1039/C3DT50599E",
+            "note": "Table 1 reproduces Bondi radii for 38 elements."
+          }
+        ],
+        "notes": [
+          "Coverage is limited (38 elements, including only a few transition metals and uranium).",
+          "Because Bondi radii were not derived exclusively from crystal nonbonded contact statistics, they can differ slightly from later 'structural' vdW radii."
+        ]
+      },
+      "rowland_taylor1996": {
+        "name": "Rowland & Taylor nonbonded contact radii",
+        "description": "Nonbonded contact radii derived from organic crystal structures (Rowland & Taylor, 1996).",
+        "semantic_class": "vdw_structural",
+        "origin_class": "structural",
+        "phase_context": "condensed_phase",
+        "method_summary": "Rowland & Taylor analyzed distributions of intermolecular nonbonded contact distances in organic crystal structures from the Cambridge Structural Database (CSD). They fitted/estimated characteristic contact distances and solved for per-element radii by least-squares analysis over many element-pair distance distributions.",
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "van_der_waals.csv",
+          "column": "rowland_taylor1996"
+        },
+        "coverage": {
+          "n_values": 9,
+          "z_min": 1,
+          "z_max": 53,
+          "has_placeholders": false,
+          "covered_z": [
+            1,
+            6,
+            7,
+            8,
+            9,
+            16,
+            17,
+            35,
+            53
+          ]
+        },
+        "placeholder_value": null,
+        "extraction_source": "Table 3 in Rowland & Taylor (1996), column 'r_c' (least-squares radii, not the normalized R_d column).",
+        "aliases": [
+          "Rowland–Taylor radii",
+          "Rowland & Taylor vdW radii"
+        ],
+        "references": [
+          {
+            "authors": "R. S. Rowland; R. Taylor",
+            "title": "Intermolecular Nonbonded Contact Distances in Organic Crystal Structures: Comparison with Distances Expected from van der Waals Radii",
+            "venue": "J. Phys. Chem. 100 (1996) 7384-7391",
+            "doi": "10.1021/jp953141+"
+          }
+        ],
+        "notes": [
+          "Coverage is intentionally limited to common organic-crystal nonmetals (H, C, N, O, F, S, Cl, Br, I).",
+          "Rowland & Taylor also report a normalized set (R_d) constrained to match the total of Bondi radii; this package uses the raw least-squares r_c values."
+        ]
+      },
+      "alvarez2013": {
+        "name": "Alvarez van der Waals radii",
+        "description": "van der Waals radii from Alvarez (2013).",
+        "semantic_class": "vdw_structural",
+        "origin_class": "structural",
+        "phase_context": "condensed_phase",
+        "method_summary": null,
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "van_der_waals.csv",
+          "column": "alvarez2013"
+        },
+        "coverage": {
+          "n_values": 93,
+          "z_min": 1,
+          "z_max": 99,
+          "has_placeholders": false,
+          "missing_z": [
+            61,
+            84,
+            85,
+            86,
+            87,
+            88
+          ]
+        },
+        "placeholder_value": null,
+        "extraction_source": "Table 1 in Alvarez (2013), column 'r_vdW'.",
+        "aliases": [
+          "Alvarez vdW radii",
+          "Alvarez (2013) r_vdW",
+          "Dalton Trans. vdW cartography radii"
+        ],
+        "references": [
+          {
+            "authors": "S. Alvarez",
+            "doi": "10.1039/C3DT50599E",
+            "title": "A cartography of the van der Waals territories",
+            "venue": "Dalton Trans. 42 (2013) 8617-8636"
+          }
+        ],
+        "notes": [
+          "Obtained by statistical analysis of millions of interatomic distances in the Cambridge Structural Database (CSD), locating the vdW peak after the vdW gap."
+        ]
+      },
+      "chernyshov2020": {
+        "name": "Chernyshov LoS van der Waals radii",
+        "description": "van der Waals radii from Chernyshov et al. (ChemPhysChem 2020) using line-of-sight (LoS) classification of direct contacts.",
+        "semantic_class": "vdw_structural_typed_reduced",
+        "origin_class": "structural",
+        "phase_context": "condensed_phase",
+        "method_summary": "Chernyshov et al. introduce a line-of-sight (LoS) criterion to identify 'direct' interatomic contacts in complex molecular crystals. vdW radii are then inferred from statistically analyzed contact-distance distributions for specific atom types, yielding radii (including R_half and R_max variants) intended to better reflect steric/anistropic effects than simple distance-based heuristics.",
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "van_der_waals.csv",
+          "column": "chernyshov2020"
+        },
+        "coverage": {
+          "n_values": 10,
+          "z_min": 1,
+          "z_max": 53,
+          "has_placeholders": false,
+          "covered_z": [
+            1,
+            6,
+            7,
+            8,
+            9,
+            16,
+            17,
+            34,
+            35,
+            53
+          ]
+        },
+        "placeholder_value": null,
+        "extraction_source": "Table 1 in Chernyshov et al. (2020): R_max values for the 'default' atom types typical for organic compounds.",
+        "aliases": [
+          "LoS vdW radii",
+          "Chernyshov vdW radii"
+        ],
+        "references": [
+          {
+            "authors": "I. Yu. Chernyshov; I. V. Ananyev; E. A. Pidko",
+            "title": "Revisiting van der Waals Radii: From Comprehensive Structural Analysis to Knowledge-Based Classification of Interatomic Contacts",
+            "venue": "ChemPhysChem 21 (2020) 1–8",
+            "doi": "10.1002/cphc.201901083"
+          }
+        ],
+        "notes": [
+          "The source paper provides multiple radii per element for different atom types/environments; this package currently includes only the main/default R_max values used in Table 1.",
+          "Primarily targeted at elements common in organic crystals (H, C, N, O, F, S, Cl, Se, Br, I)."
+        ]
+      },
+      "csd_legacy_vdw": {
+        "name": "CSD legacy van der Waals radii (pre-2024.3)",
+        "description": "Legacy van der Waals radii historically used in CSD tools (pre-2024.3).",
+        "semantic_class": "vdw_legacy",
+        "origin_class": "curated_heuristic",
+        "phase_context": "mixed_or_legacy",
+        "method_summary": null,
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "van_der_waals.csv",
+          "column": "csd_legacy_vdw"
+        },
+        "coverage": {
+          "n_values": 110,
+          "z_min": 1,
+          "z_max": 110,
+          "has_placeholders": true
+        },
+        "placeholder_value": 2.0,
+        "extraction_source": "CCDC Elemental_Radii.xlsx (CSD radii table), column 'vdW Radius' (Bondi/Rowland-Taylor based with defaults).",
+        "aliases": [],
+        "references": [
+          {
+            "authors": "A. Bondi",
+            "doi": "10.1021/j100785a001",
+            "title": "van der Waals Volumes and Radii",
+            "venue": "J. Phys. Chem. 68 (1964) 441-451"
+          },
+          {
+            "authors": "R. S. Rowland; R. Taylor",
+            "doi": "10.1021/jp953141+",
+            "title": "Intermolecular Nonbonded Contact Distances in Organic Crystal Structures: Comparison with Distances Expected from van der Waals Radii",
+            "venue": "J. Phys. Chem. 100 (1996) 7384-7391"
+          },
+          {
+            "publisher": "CCDC",
+            "title": "Elemental Data and Radii (Excel)",
+            "url": "https://www.ccdc.cam.ac.uk/media/Documentation/F8D8439E-30C5-4FA8-B781-D9E65AAB0BF3/Elemental_Radii.xlsx"
+          },
+          {
+            "publisher": "CCDC blog",
+            "title": "Updates to van der Waals radii used in the CSD and Mercury",
+            "url": "https://www.ccdc.cam.ac.uk/discover/blog/updates-to-van-der-waals-radii-csd-mercury/"
+          }
+        ],
+        "notes": [
+          "For Z>=111, csd_legacy values are omitted because the legacy CSD table does not provide radii beyond Darmstadtium (Z=110).",
+          "Radii that are not available in either Bondi or Rowland & Taylor versions were assigned RvdW of 2.00 Å.",
+          "The CSD 2024.3 release updated the vdW radii used in CSD and Mercury to Alvarez-derived values (see CCDC blog post)."
+        ]
+      }
+    },
+    "atomic_radius": {
+      "rahm2016": {
+        "name": "Rahm isodensity atomic radii (ρ=0.001 e/bohr³)",
+        "description": "Computed atomic radii for neutral atoms (elements 1–96) defined by the ρ=0.001 e/bohr³ electron-density isosurface (Rahm et al., 2016).",
+        "semantic_class": "atomic_isodensity",
+        "origin_class": "computational",
+        "phase_context": "isolated_atom",
+        "method_summary": "Rahm et al. computed relativistic all-electron DFT electron densities (close to the basis-set limit) for isolated atoms and ions. Radii are defined by an electron-density threshold, producing a consistent, theory-based size measure that correlates well with structural van der Waals radii derived from crystal structures.",
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "van_der_waals.csv",
+          "column": "rahm2016"
+        },
+        "coverage": {
+          "n_values": 96,
+          "z_min": 1,
+          "z_max": 96,
+          "has_placeholders": false
+        },
+        "placeholder_value": null,
+        "extraction_source": "Supporting Information for Rahm et al. (2016), Table S1: neutral-atom radii for elements 1–96.",
+        "aliases": [
+          "Rahm radii",
+          "Rahm–Hoffmann–Ashcroft atomic radii",
+          "0.001 e/bohr^3 radii"
+        ],
+        "references": [
+          {
+            "authors": "M. Rahm; R. Hoffmann; N. W. Ashcroft",
+            "title": "Atomic and Ionic Radii of Elements 1–96",
+            "venue": "Chem. Eur. J. 22 (2016) 14625–14632",
+            "doi": "10.1002/chem.201602949"
+          },
+          {
+            "title": "Chem. Eur. J. 2016, 22, 14625–14632 (Rahm et al.) – Misc. Information",
+            "url": "http://dx.doi.org/10.1002/chem.201602949",
+            "publisher": "Supporting Information",
+            "note": "Table S1 contains the neutral-atom radii used here."
+          }
+        ],
+        "notes": [
+          "The original work also reports cationic radii (+1) for the first 96 elements and selected anionic radii (−1) for some elements; these are not yet included in the current CSV.",
+          "Despite the fact that in this project this radii are classified as vdW radii for the purpose of simplicity, they should be treated as a correlational/transferable baseline rather than a direct condensed-phase vdW radius since they describe isolated atoms in vacuum."
+        ]
+      }
+    }
+  }
+}
diff --git a/src/atomref/data/van_der_waals.csv b/src/atomref/data/van_der_waals.csv
new file mode 100644
index 0000000..86e7be3
--- /dev/null
+++ b/src/atomref/data/van_der_waals.csv
@@ -0,0 +1,119 @@
+z,bondi1964,rowland_taylor1996,alvarez2013,chernyshov2020,csd_legacy_vdw,rahm2016
+1,1.2,1.1,1.2,1.21,1.09,1.54
+2,1.4,,1.43,,1.4,1.34
+3,1.81,,2.12,,1.82,2.2
+4,,,1.98,,2,2.19
+5,,,1.91,,2,2.05
+6,1.7,1.77,1.77,1.91,1.7,1.9
+7,1.55,1.64,1.66,1.76,1.55,1.79
+8,1.52,1.58,1.5,1.74,1.52,1.71
+9,1.47,1.46,1.46,1.55,1.47,1.63
+10,1.54,,1.58,,1.54,1.56
+11,2.27,,2.5,,2.27,2.25
+12,1.73,,2.51,,1.73,2.4
+13,,,2.25,,2,2.39
+14,2.22,,2.19,,2.1,2.32
+15,1.8,,1.9,,1.8,2.23
+16,1.8,1.81,1.89,1.95,1.8,2.14
+17,1.75,1.76,1.82,1.91,1.75,2.06
+18,1.76,,1.83,,1.88,1.97
+19,2.75,,2.73,,2.75,2.34
+20,,,2.62,,2,2.7
+21,,,2.58,,2,2.63
+22,,,2.46,,2,2.57
+23,,,2.42,,2,2.52
+24,,,2.45,,2,2.33
+25,,,2.45,,2,2.42
+26,,,2.44,,2,2.26
+27,,,2.4,,2,2.22
+28,1.63,,2.4,,1.63,2.19
+29,1.4,,2.38,,1.4,2.17
+30,1.39,,2.39,,1.39,2.22
+31,1.87,,2.32,,1.87,2.33
+32,,,2.29,,2,2.34
+33,1.85,,1.88,,1.85,2.31
+34,1.9,,1.82,2.04,1.9,2.24
+35,1.83,1.87,1.86,2,1.85,2.19
+36,2.02,,2.25,,2.02,2.12
+37,,,3.21,,2,2.4
+38,,,2.84,,2,2.79
+39,,,2.75,,2,2.74
+40,,,2.52,,2,2.68
+41,,,2.56,,2,2.51
+42,,,2.45,,2,2.44
+43,,,2.44,,2,2.41
+44,,,2.46,,2,2.37
+45,,,2.44,,2,2.33
+46,1.63,,2.15,,1.63,2.15
+47,1.72,,2.53,,1.72,2.25
+48,1.62,,2.49,,1.58,2.38
+49,1.93,,2.43,,1.93,2.46
+50,2.17,,2.42,,2.17,2.48
+51,,,2.47,,2,2.46
+52,2,,1.99,,2.06,2.42
+53,1.98,2.03,2.04,2.17,1.98,2.38
+54,2.16,,2.06,,2.16,2.32
+55,,,3.48,,2,2.49
+56,,,3.03,,2,2.93
+57,,,2.98,,2,2.84
+58,,,2.88,,2,2.82
+59,,,2.92,,2,2.86
+60,,,2.95,,2,2.84
+61,,,,,2,2.83
+62,,,2.9,,2,2.8
+63,,,2.87,,2,2.8
+64,,,2.83,,2,2.77
+65,,,2.79,,2,2.76
+66,,,2.87,,2,2.75
+67,,,2.81,,2,2.73
+68,,,2.83,,2,2.72
+69,,,2.79,,2,2.71
+70,,,2.8,,2,2.77
+71,,,2.74,,2,2.7
+72,,,2.63,,2,2.64
+73,,,2.53,,2,2.58
+74,,,2.57,,2,2.53
+75,,,2.49,,2,2.49
+76,,,2.48,,2,2.44
+77,,,2.41,,2,2.33
+78,1.72,,2.29,,1.72,2.3
+79,1.66,,2.32,,1.66,2.26
+80,1.7,,2.45,,1.55,2.29
+81,1.96,,2.47,,1.96,2.42
+82,2.02,,2.6,,2.02,2.49
+83,,,2.54,,2,2.5
+84,,,,,2,2.5
+85,,,,,2,2.47
+86,,,,,2,2.43
+87,,,,,2,2.58
+88,,,,,2,2.92
+89,,,2.8,,2,2.93
+90,,,2.93,,2,2.89
+91,,,2.88,,2,2.85
+92,1.86,,2.71,,1.86,2.83
+93,,,2.82,,2,2.8
+94,,,2.81,,2,2.78
+95,,,2.83,,2,2.76
+96,,,3.05,,2,2.76
+97,,,3.4,,2,
+98,,,3.05,,2,
+99,,,2.7,,2,
+100,,,,,2,
+101,,,,,2,
+102,,,,,2,
+103,,,,,2,
+104,,,,,2,
+105,,,,,2,
+106,,,,,2,
+107,,,,,2,
+108,,,,,2,
+109,,,,,2,
+110,,,,,2,
+111,,,,,,
+112,,,,,,
+113,,,,,,
+114,,,,,,
+115,,,,,,
+116,,,,,,
+117,,,,,,
+118,,,,,,
diff --git a/src/atomref/elements.py b/src/atomref/elements.py
new file mode 100644
index 0000000..42f0598
--- /dev/null
+++ b/src/atomref/elements.py
@@ -0,0 +1,99 @@
+"""Periodic table access for stable element identity."""
+
+from __future__ import annotations
+
+import csv
+import re
+from dataclasses import dataclass
+from functools import lru_cache
+from importlib import resources
+
+
+_MISSING_TOKENS = {'', '?', '.'}
+_LEADING_ALPHA_RE = re.compile(r'([A-Za-z]{1,3})')
+
+
+@dataclass(frozen=True, slots=True)
+class Element:
+    """Chemical element identity."""
+
+    z: int
+    symbol: str
+    name: str
+
+
+def _normalize_element_token(token: str | None) -> str | None:
+    if token is None:
+        return None
+
+    raw = token.strip()
+    if raw in _MISSING_TOKENS:
+        return None
+
+    if (raw.startswith("'") and raw.endswith("'")) or (
+        raw.startswith('"') and raw.endswith('"')
+    ):
+        raw = raw[1:-1].strip()
+        if raw in _MISSING_TOKENS:
+            return None
+
+    if not raw:
+        return None
+    return raw
+
+
+def canonicalize_element_symbol(token: str | None) -> str | None:
+    """Canonicalize a free-form element token."""
+
+    raw = _normalize_element_token(token)
+    if raw is None:
+        return None
+
+    match = _LEADING_ALPHA_RE.match(raw)
+    if match is None:
+        return None
+
+    letters = match.group(1)
+    return letters[0].upper() + letters[1:].lower()
+
+
+@lru_cache(maxsize=1)
+def _load_elements_by_symbol() -> dict[str, Element]:
+    table_path = resources.files('atomref.data').joinpath('periodic_table.csv')
+    with table_path.open('r', encoding='utf-8', newline='') as handle:
+        reader = csv.DictReader(handle)
+        out: dict[str, Element] = {}
+        for row in reader:
+            z = int(row['z'])
+            symbol = row['symbol']
+            name = row['name']
+            out[symbol] = Element(z=z, symbol=symbol, name=name)
+    return out
+
+
+@lru_cache(maxsize=1)
+def _elements_in_z_order() -> tuple[Element, ...]:
+    return tuple(sorted(_load_elements_by_symbol().values(), key=lambda e: e.z))
+
+
+def is_valid_element_symbol(symbol: str | None) -> bool:
+    """Return ``True`` if ``symbol`` is a known element symbol."""
+
+    if symbol is None:
+        return False
+    return symbol in _load_elements_by_symbol()
+
+
+def get_element(symbol: str | None) -> Element | None:
+    """Look up element identity by symbol or free-form token."""
+
+    sym = canonicalize_element_symbol(symbol)
+    if sym is None:
+        return None
+    return _load_elements_by_symbol().get(sym)
+
+
+def iter_elements() -> tuple[Element, ...]:
+    """Return all packaged elements in increasing atomic-number order."""
+
+    return _elements_in_z_order()
diff --git a/src/atomref/errors.py b/src/atomref/errors.py
new file mode 100644
index 0000000..1922cf5
--- /dev/null
+++ b/src/atomref/errors.py
@@ -0,0 +1,14 @@
+class AtomrefError(Exception):
+    """Base package error."""
+
+
+class DatasetError(AtomrefError):
+    """Packaged dataset or registry error."""
+
+
+class MissingValueError(AtomrefError):
+    """Raised when a required reference value is unavailable."""
+
+
+class PolicyError(AtomrefError):
+    """Raised for invalid policy configuration."""
diff --git a/src/atomref/policy.py b/src/atomref/policy.py
new file mode 100644
index 0000000..b7df87b
--- /dev/null
+++ b/src/atomref/policy.py
@@ -0,0 +1,261 @@
+"""Generic value-policy resolution for element-indexed scalar datasets."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from functools import lru_cache
+import math
+from typing import Generic, Literal, TypeVar
+
+from .elements import canonicalize_element_symbol, get_element, is_valid_element_symbol
+from .errors import PolicyError
+from .registry import (
+    DatasetLike,
+    DatasetRef,
+    ElementScalarSet,
+    _is_placeholder_value,
+    get_builtin_set,
+    resolve_dataset_like,
+)
+from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer, TransferModel
+
+
+K = TypeVar('K')
+
+LookupSource = Literal[
+    'override',
+    'base',
+    'transfer_substitution',
+    'transfer_linear',
+    'fallback',
+    'missing',
+]
+
+
+@dataclass(frozen=True, slots=True)
+class LookupResult:
+    value: float | None
+    source: LookupSource
+    target: DatasetRef
+    resolved_from: tuple[DatasetRef, ...] = ()
+    is_placeholder: bool = False
+    fit: LinearFit | None = None
+    notes: tuple[str, ...] = ()
+
+    def __float__(self) -> float:
+        if self.value is None:
+            raise TypeError('reference value is missing')
+        return float(self.value)
+
+
+@dataclass(frozen=True, slots=True)
+class ValuePolicy(Generic[K]):
+    base: DatasetLike
+    transfers: tuple[TransferModel, ...] = ()
+    overrides: Mapping[K, float] = field(default_factory=dict)
+    fallback: float | None = None
+
+
+def _normalize_element_symbol(symbol: str | None) -> str | None:
+    cand = canonicalize_element_symbol(symbol)
+    if cand in {'D', 'T'}:
+        cand = 'H'
+    if cand is None:
+        return None
+    if not is_valid_element_symbol(cand):
+        return None
+    return cand
+
+
+def _resolve_target_ref(policy: ValuePolicy[object]) -> DatasetRef:
+    return resolve_dataset_like(policy.base).ref
+
+
+def _fit_linear_transfer(base_set: ElementScalarSet, predictor_set: ElementScalarSet, *, min_points: int, exclude_placeholders: bool) -> LinearFit:
+    xs: list[float] = []
+    ys: list[float] = []
+
+    n_z = min(len(base_set.values_by_z), len(predictor_set.values_by_z))
+    for z in range(1, n_z):
+        y = base_set.values_by_z[z]
+        x = predictor_set.values_by_z[z]
+        if y is None or x is None:
+            continue
+        y_f = float(y)
+        x_f = float(x)
+        if exclude_placeholders and (
+            _is_placeholder_value(base_set.info, y_f)
+            or _is_placeholder_value(predictor_set.info, x_f)
+        ):
+            continue
+        xs.append(x_f)
+        ys.append(y_f)
+
+    n = len(xs)
+    if n < min_points:
+        raise PolicyError('not enough overlapping elements to fit linear transfer')
+
+    x_mean = sum(xs) / n
+    y_mean = sum(ys) / n
+    sxx = sum((x - x_mean) ** 2 for x in xs)
+    if sxx == 0:
+        raise PolicyError('cannot fit linear transfer: zero predictor variance')
+
+    sxy = sum((x - x_mean) * (y - y_mean) for x, y in zip(xs, ys))
+    slope = sxy / sxx
+    intercept = y_mean - slope * x_mean
+
+    y_hat = [slope * x + intercept for x in xs]
+    sse = sum((y - yh) ** 2 for y, yh in zip(ys, y_hat))
+    sst = sum((y - y_mean) ** 2 for y in ys)
+    r2 = 1.0 - sse / sst if sst != 0 else 1.0
+    rmse = math.sqrt(sse / n)
+
+    return LinearFit(
+        coefficients=(slope,),
+        intercept=intercept,
+        n_points=n,
+        r2=r2,
+        rmse=rmse,
+    )
+
+
+@lru_cache(maxsize=None)
+def _fit_linear_transfer_cached(base_ref: DatasetRef, predictor_ref: DatasetRef, min_points: int, exclude_placeholders: bool) -> LinearFit:
+    return _fit_linear_transfer(
+        get_builtin_set(base_ref),
+        get_builtin_set(predictor_ref),
+        min_points=min_points,
+        exclude_placeholders=exclude_placeholders,
+    )
+
+
+def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit | None:
+    if not isinstance(transfer, LinearTransfer):
+        return None
+    if len(transfer.predictors) != 1:
+        raise PolicyError('v0.1 LinearTransfer supports exactly one predictor dataset')
+
+    predictor = transfer.predictors[0]
+    if isinstance(base, DatasetRef) and isinstance(predictor, DatasetRef):
+        return _fit_linear_transfer_cached(
+            base, predictor, transfer.min_points, transfer.exclude_placeholders
+        )
+    return _fit_linear_transfer(
+        resolve_dataset_like(base),
+        resolve_dataset_like(predictor),
+        min_points=transfer.min_points,
+        exclude_placeholders=transfer.exclude_placeholders,
+    )
+
+
+def _apply_substitution_transfer(symbol: str, *, target: DatasetRef, transfer: SubstitutionTransfer) -> tuple[LookupResult | None, str | None]:
+    source_set = resolve_dataset_like(transfer.source)
+    value = source_set.get(symbol)
+    if value is None:
+        return None, f'no substitution value in {source_set.ref.set_id}'
+    value_f = float(value)
+    return (
+        LookupResult(
+            value=value_f,
+            source='transfer_substitution',
+            target=target,
+            resolved_from=(source_set.ref,),
+            is_placeholder=_is_placeholder_value(source_set.info, value_f),
+            notes=('missing in base set; substituted from transfer source',),
+        ),
+        None,
+    )
+
+
+def _apply_linear_transfer(symbol: str, *, base: DatasetLike, target: DatasetRef, transfer: LinearTransfer) -> tuple[LookupResult | None, str | None]:
+    if len(transfer.predictors) != 1:
+        raise PolicyError('v0.1 LinearTransfer supports exactly one predictor dataset')
+
+    predictor_set = resolve_dataset_like(transfer.predictors[0])
+    predictor_value = predictor_set.get(symbol)
+    if predictor_value is None:
+        return None, f'no predictor value in {predictor_set.ref.set_id}'
+    predictor_f = float(predictor_value)
+
+    if transfer.exclude_placeholders and _is_placeholder_value(predictor_set.info, predictor_f):
+        return None, f'predictor value in {predictor_set.ref.set_id} is a placeholder'
+
+    fit = _fit_transfer_model(base, transfer)
+    if fit is None:
+        return None, 'no fit available for linear transfer'
+    predicted = fit.coefficients[0] * predictor_f + fit.intercept
+    return (
+        LookupResult(
+            value=float(predicted),
+            source='transfer_linear',
+            target=target,
+            resolved_from=(predictor_set.ref,),
+            is_placeholder=False,
+            fit=fit,
+            notes=('missing in base set; inferred via linear transfer',),
+        ),
+        None,
+    )
+
+
+def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
+    target = _resolve_target_ref(policy)
+    base_set = resolve_dataset_like(policy.base)
+    if base_set.info.domain != 'element':
+        raise PolicyError('v0.1 resolver supports only element-domain datasets')
+
+    sym = _normalize_element_symbol(symbol)
+    if sym is None:
+        note = 'unknown element' if symbol is not None else 'missing element symbol'
+        return LookupResult(value=None, source='missing', target=target, notes=(note,))
+
+    if sym in policy.overrides:
+        return LookupResult(
+            value=float(policy.overrides[sym]),
+            source='override',
+            target=target,
+            notes=('value supplied by policy override',),
+        )
+
+    base_value = base_set.get(sym)
+    if base_value is not None:
+        base_f = float(base_value)
+        return LookupResult(
+            value=base_f,
+            source='base',
+            target=target,
+            resolved_from=(base_set.ref,),
+            is_placeholder=_is_placeholder_value(base_set.info, base_f),
+            notes=(),
+        )
+
+    transfer_notes: list[str] = ['missing in base set']
+    for transfer in policy.transfers:
+        if isinstance(transfer, SubstitutionTransfer):
+            result, note = _apply_substitution_transfer(sym, target=target, transfer=transfer)
+        elif isinstance(transfer, LinearTransfer):
+            result, note = _apply_linear_transfer(sym, base=policy.base, target=target, transfer=transfer)
+        else:  # pragma: no cover - closed union today
+            raise PolicyError(f'unsupported transfer model: {type(transfer)!r}')
+
+        if result is not None:
+            return result
+        if note:
+            transfer_notes.append(note)
+
+    if policy.fallback is not None:
+        return LookupResult(
+            value=float(policy.fallback),
+            source='fallback',
+            target=target,
+            notes=tuple(transfer_notes + ['using fallback value']),
+        )
+
+    return LookupResult(
+        value=None,
+        source='missing',
+        target=target,
+        notes=tuple(transfer_notes),
+    )
diff --git a/src/atomref/py.typed b/src/atomref/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
new file mode 100644
index 0000000..61cebda
--- /dev/null
+++ b/src/atomref/radii.py
@@ -0,0 +1,233 @@
+"""Radii-specific public API built on the generic policy core."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass, field
+from typing import Literal
+
+from .elements import canonicalize_element_symbol, get_element, is_valid_element_symbol
+from .errors import PolicyError
+from .policy import LookupResult, ValuePolicy, _fit_transfer_model, _resolve_value
+from .registry import DatasetInfo, DatasetRef, ElementScalarSet, get_dataset_info, list_dataset_ids
+from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer, TransferModel
+
+
+RadiiKind = Literal['covalent', 'van_der_waals']
+RadiiSet = ElementScalarSet
+
+
+_KIND_TO_QUANTITY = {
+    'covalent': 'covalent_radius',
+    'van_der_waals': 'van_der_waals_radius',
+}
+
+
+@dataclass(frozen=True, slots=True)
+class RadiiPolicy:
+    kind: RadiiKind
+    base_set: str | RadiiSet
+    transfers: tuple[TransferModel, ...] = ()
+    overrides: Mapping[str, float] = field(default_factory=dict)
+    fallback: float | None = None
+
+    def as_value_policy(self) -> ValuePolicy[str]:
+        quantity = _quantity_for_kind(self.kind)
+        if isinstance(self.base_set, ElementScalarSet):
+            if self.base_set.ref.quantity != quantity:
+                raise PolicyError(
+                    f'base_set quantity {self.base_set.ref.quantity!r} is incompatible with radii kind {self.kind!r}'
+                )
+            base = self.base_set
+        else:
+            base = DatasetRef(quantity, self.base_set)
+
+        normalized_overrides: dict[str, float] = {}
+        for key, value in self.overrides.items():
+            sym = _normalize_radii_symbol(key)
+            if sym is None or not is_valid_element_symbol(sym):
+                raise PolicyError(f'invalid override element symbol: {key!r}')
+            normalized_overrides[sym] = float(value)
+
+        return ValuePolicy(
+            base=base,
+            transfers=self.transfers,
+            overrides=normalized_overrides,
+            fallback=self.fallback,
+        )
+
+
+@dataclass(frozen=True, slots=True)
+class RadiiElementAssessment:
+    symbol: str
+    lookup: LookupResult
+
+
+@dataclass(frozen=True, slots=True)
+class RadiiPolicyAssessment:
+    kind: RadiiKind
+    policy: RadiiPolicy
+    elements: tuple[str, ...]
+
+    n_elements: int
+    n_override: int
+    n_base: int
+    n_transfer_substitution: int
+    n_transfer_linear: int
+    n_fallback: int
+    n_missing: int
+    n_placeholders: int
+
+    missing_symbols: tuple[str, ...]
+    placeholder_symbols: tuple[str, ...]
+
+    fits: tuple[LinearFit, ...] = ()
+    warnings: tuple[str, ...] = ()
+    per_element: tuple[RadiiElementAssessment, ...] = ()
+
+
+def _quantity_for_kind(kind: RadiiKind) -> str:
+    try:
+        return _KIND_TO_QUANTITY[kind]
+    except KeyError as exc:
+        raise PolicyError(f'unknown radii kind: {kind!r}') from exc
+
+
+def _normalize_radii_symbol(symbol: str | None) -> str | None:
+    cand = canonicalize_element_symbol(symbol)
+    if cand in {'D', 'T'}:
+        cand = 'H'
+    return cand
+
+
+def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
+    symbols: set[str] = set()
+    for token in elements:
+        sym = _normalize_radii_symbol(token)
+        if sym is None:
+            raise ValueError('missing element symbol')
+        if not is_valid_element_symbol(sym):
+            raise ValueError(f'invalid element symbol: {sym!r}')
+        symbols.add(sym)
+    return tuple(sorted(symbols, key=lambda s: get_element(s).z if get_element(s) else 0))
+
+
+def list_radii_sets(kind: RadiiKind) -> tuple[str, ...]:
+    return list_dataset_ids(_quantity_for_kind(kind))
+
+
+def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
+    return get_dataset_info(DatasetRef(_quantity_for_kind(kind), set_id))
+
+
+def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
+    if policy.kind != expected:
+        raise PolicyError(f'expected a {expected!r} radii policy, got {policy.kind!r}')
+
+
+def _lookup_radius(symbol: str | None, *, policy: RadiiPolicy) -> LookupResult:
+    return _resolve_value(symbol, policy=policy.as_value_policy())
+
+
+def lookup_covalent_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> LookupResult:
+    active = DEFAULT_COVALENT_POLICY if policy is None else policy
+    _validate_policy_kind(active, expected='covalent')
+    return _lookup_radius(symbol, policy=active)
+
+
+def get_covalent_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> float | None:
+    return lookup_covalent_radius(symbol, policy=policy).value
+
+
+def lookup_vdw_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> LookupResult:
+    active = DEFAULT_VDW_POLICY if policy is None else policy
+    _validate_policy_kind(active, expected='van_der_waals')
+    return _lookup_radius(symbol, policy=active)
+
+
+def get_vdw_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> float | None:
+    return lookup_vdw_radius(symbol, policy=policy).value
+
+
+def assess_radii_policy(elements: Iterable[str], *, policy: RadiiPolicy, detail: bool = False) -> RadiiPolicyAssessment:
+    elems = _normalize_assessment_elements(elements)
+    value_policy = policy.as_value_policy()
+
+    n_override = 0
+    n_base = 0
+    n_transfer_substitution = 0
+    n_transfer_linear = 0
+    n_fallback = 0
+    n_missing = 0
+    n_placeholders = 0
+
+    missing_symbols: list[str] = []
+    placeholder_symbols: list[str] = []
+    per_element: list[RadiiElementAssessment] = []
+
+    for symbol in elems:
+        lookup = _resolve_value(symbol, policy=value_policy)
+        if lookup.source == 'override':
+            n_override += 1
+        elif lookup.source == 'base':
+            n_base += 1
+        elif lookup.source == 'transfer_substitution':
+            n_transfer_substitution += 1
+        elif lookup.source == 'transfer_linear':
+            n_transfer_linear += 1
+        elif lookup.source == 'fallback':
+            n_fallback += 1
+        elif lookup.source == 'missing':
+            n_missing += 1
+            missing_symbols.append(symbol)
+
+        if lookup.is_placeholder:
+            n_placeholders += 1
+            placeholder_symbols.append(symbol)
+
+        if detail:
+            per_element.append(RadiiElementAssessment(symbol=symbol, lookup=lookup))
+
+    fits: list[LinearFit] = []
+    warnings: list[str] = []
+    for transfer in value_policy.transfers:
+        if isinstance(transfer, LinearTransfer):
+            try:
+                fit = _fit_transfer_model(value_policy.base, transfer)
+            except Exception as exc:  # noqa: BLE001
+                warnings.append(str(exc))
+            else:
+                if fit is not None:
+                    fits.append(fit)
+
+    return RadiiPolicyAssessment(
+        kind=policy.kind,
+        policy=policy,
+        elements=elems,
+        n_elements=len(elems),
+        n_override=n_override,
+        n_base=n_base,
+        n_transfer_substitution=n_transfer_substitution,
+        n_transfer_linear=n_transfer_linear,
+        n_fallback=n_fallback,
+        n_missing=n_missing,
+        n_placeholders=n_placeholders,
+        missing_symbols=tuple(missing_symbols),
+        placeholder_symbols=tuple(placeholder_symbols),
+        fits=tuple(fits),
+        warnings=tuple(warnings),
+        per_element=tuple(per_element),
+    )
+
+
+DEFAULT_COVALENT_POLICY = RadiiPolicy(
+    kind='covalent',
+    base_set='cordero2008',
+    transfers=(SubstitutionTransfer(source=DatasetRef('covalent_radius', 'csd_legacy_cov')),),
+)
+
+DEFAULT_VDW_POLICY = RadiiPolicy(
+    kind='van_der_waals',
+    base_set='alvarez2013',
+    transfers=(LinearTransfer(predictors=(DatasetRef('atomic_radius', 'rahm2016'),)),),
+)
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
new file mode 100644
index 0000000..196dbc3
--- /dev/null
+++ b/src/atomref/registry.py
@@ -0,0 +1,343 @@
+"""Dataset registry and packaged element-scalar set loading."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable, Mapping
+from dataclasses import dataclass
+import csv
+import json
+from functools import lru_cache
+from importlib import resources
+
+from .elements import canonicalize_element_symbol, get_element, iter_elements
+from .errors import DatasetError
+
+
+QuantityId = str
+DomainId = str
+
+
+@dataclass(frozen=True, slots=True)
+class DatasetRef:
+    quantity: QuantityId
+    set_id: str
+
+
+@dataclass(frozen=True, slots=True)
+class Reference:
+    authors: str | None = None
+    year: int | None = None
+    title: str | None = None
+    venue: str | None = None
+    doi: str | None = None
+    url: str | None = None
+    publisher: str | None = None
+    note: str | None = None
+
+
+@dataclass(frozen=True, slots=True)
+class CoverageInfo:
+    n_values: int
+    z_min: int | None = None
+    z_max: int | None = None
+    has_placeholders: bool = False
+    covered_z: tuple[int, ...] = ()
+    missing_z: tuple[int, ...] = ()
+
+
+@dataclass(frozen=True, slots=True)
+class DatasetInfo:
+    ref: DatasetRef
+    domain: DomainId
+    units: str | None
+    name: str
+    description: str | None = None
+    semantic_class: str | None = None
+    origin_class: str | None = None
+    phase_context: str | None = None
+    method_summary: str | None = None
+    placeholder_value: float | None = None
+    extraction_source: str | None = None
+    aliases: tuple[str, ...] = ()
+    references: tuple[Reference, ...] = ()
+    notes: tuple[str, ...] = ()
+    storage: Mapping[str, object] | None = None
+    coverage: CoverageInfo | None = None
+
+
+@dataclass(frozen=True, slots=True)
+class ElementScalarSet:
+    ref: DatasetRef
+    info: DatasetInfo
+    values_by_z: tuple[float | None, ...]
+
+    @classmethod
+    def from_mapping(
+        cls,
+        *,
+        ref: DatasetRef,
+        values: Mapping[str, float | None],
+        name: str,
+        units: str | None,
+        description: str | None = None,
+        semantic_class: str = 'user',
+        origin_class: str = 'user',
+        phase_context: str | None = None,
+        references: Iterable[Reference] = (),
+        notes: Iterable[str] = (),
+        placeholder_value: float | None = None,
+    ) -> 'ElementScalarSet':
+        n_z = max(e.z for e in iter_elements())
+        values_by_z: list[float | None] = [None] * (n_z + 1)
+
+        for key, value in values.items():
+            sym = _normalize_element_domain_symbol(key)
+            elem = get_element(sym)
+            if elem is None:
+                raise DatasetError(f'invalid element symbol in custom set: {key!r}')
+            values_by_z[elem.z] = None if value is None else float(value)
+
+        covered_z = tuple(z for z, value in enumerate(values_by_z) if z > 0 and value is not None)
+        has_placeholders = False
+        if placeholder_value is not None:
+            has_placeholders = any(
+                value is not None and abs(value - placeholder_value) < 1e-12
+                for value in values_by_z[1:]
+            )
+
+        info = DatasetInfo(
+            ref=ref,
+            domain='element',
+            units=units,
+            name=name,
+            description=description,
+            semantic_class=semantic_class,
+            origin_class=origin_class,
+            phase_context=phase_context,
+            placeholder_value=placeholder_value,
+            aliases=(),
+            references=tuple(references),
+            notes=tuple(notes),
+            storage=None,
+            coverage=CoverageInfo(
+                n_values=len(covered_z),
+                z_min=min(covered_z) if covered_z else None,
+                z_max=max(covered_z) if covered_z else None,
+                has_placeholders=has_placeholders,
+                covered_z=covered_z,
+                missing_z=tuple(z for z in range(1, n_z + 1) if values_by_z[z] is None),
+            ),
+        )
+        return cls(ref=ref, info=info, values_by_z=tuple(values_by_z))
+
+    def get(self, symbol: str | None) -> float | None:
+        sym = _normalize_element_domain_symbol(symbol)
+        elem = get_element(sym)
+        if elem is None:
+            return None
+        return self.values_by_z[elem.z]
+
+
+DatasetLike = DatasetRef | ElementScalarSet
+
+
+def _normalize_element_domain_symbol(symbol: str | None) -> str | None:
+    cand = canonicalize_element_symbol(symbol)
+    if cand in {'D', 'T'}:
+        return 'H'
+    return cand
+
+
+@lru_cache(maxsize=1)
+def _load_registry_json() -> dict[str, object]:
+    path = resources.files('atomref.data').joinpath('registry.json')
+    with path.open('r', encoding='utf-8') as handle:
+        data = json.load(handle)
+    if not isinstance(data, dict):
+        raise DatasetError('invalid registry.json: expected JSON object')
+    return data
+
+
+def _get_quantities_mapping() -> Mapping[str, object]:
+    quantities = _load_registry_json().get('quantities')
+    if not isinstance(quantities, dict):
+        raise DatasetError('invalid registry.json: missing quantities mapping')
+    return quantities
+
+
+def _get_datasets_mapping() -> Mapping[str, object]:
+    datasets = _load_registry_json().get('datasets')
+    if not isinstance(datasets, dict):
+        raise DatasetError('invalid registry.json: missing datasets mapping')
+    return datasets
+
+
+def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
+    datasets = _get_datasets_mapping().get(quantity)
+    if not isinstance(datasets, dict):
+        raise DatasetError(f'unknown quantity: {quantity!r}')
+    return datasets
+
+
+def _canonicalize_alias_token(value: str) -> str:
+    return ' '.join(value.strip().lower().split())
+
+
+def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
+    by_quantity = _datasets_for_quantity(quantity)
+    if set_id in by_quantity:
+        return set_id
+
+    wanted = _canonicalize_alias_token(set_id)
+    for actual_id, raw_entry in by_quantity.items():
+        if _canonicalize_alias_token(actual_id) == wanted:
+            return actual_id
+        if isinstance(raw_entry, dict):
+            aliases = raw_entry.get('aliases', ())
+            if isinstance(aliases, list):
+                for alias in aliases:
+                    if isinstance(alias, str) and _canonicalize_alias_token(alias) == wanted:
+                        return actual_id
+    raise DatasetError(f'unknown dataset id for {quantity!r}: {set_id!r}')
+
+
+def list_dataset_ids(quantity: QuantityId) -> tuple[str, ...]:
+    return tuple(_datasets_for_quantity(quantity).keys())
+
+
+def _coerce_reference(obj: object) -> Reference:
+    if not isinstance(obj, dict):
+        raise DatasetError('invalid reference entry in registry.json')
+    return Reference(
+        authors=obj.get('authors') if isinstance(obj.get('authors'), str) else None,
+        year=obj.get('year') if isinstance(obj.get('year'), int) else None,
+        title=obj.get('title') if isinstance(obj.get('title'), str) else None,
+        venue=obj.get('venue') if isinstance(obj.get('venue'), str) else None,
+        doi=obj.get('doi') if isinstance(obj.get('doi'), str) else None,
+        url=obj.get('url') if isinstance(obj.get('url'), str) else None,
+        publisher=obj.get('publisher') if isinstance(obj.get('publisher'), str) else None,
+        note=obj.get('note') if isinstance(obj.get('note'), str) else None,
+    )
+
+
+def _coerce_coverage(obj: object) -> CoverageInfo | None:
+    if not isinstance(obj, dict):
+        return None
+    covered = obj.get('covered_z')
+    missing = obj.get('missing_z')
+    covered_z = tuple(int(z) for z in covered) if isinstance(covered, list) else ()
+    missing_z = tuple(int(z) for z in missing) if isinstance(missing, list) else ()
+    return CoverageInfo(
+        n_values=int(obj['n_values']),
+        z_min=int(obj['z_min']) if isinstance(obj.get('z_min'), int) else None,
+        z_max=int(obj['z_max']) if isinstance(obj.get('z_max'), int) else None,
+        has_placeholders=bool(obj.get('has_placeholders', False)),
+        covered_z=covered_z,
+        missing_z=missing_z,
+    )
+
+
+def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
+    actual_set_id = _resolve_set_id(ref.quantity, ref.set_id)
+    actual_ref = DatasetRef(quantity=ref.quantity, set_id=actual_set_id)
+
+    quantities = _get_quantities_mapping()
+    quantity_info = quantities.get(actual_ref.quantity)
+    if not isinstance(quantity_info, dict):
+        raise DatasetError(f'unknown quantity: {actual_ref.quantity!r}')
+
+    units = quantity_info.get('units') if isinstance(quantity_info.get('units'), str) else None
+    domain = quantity_info.get('domain') if isinstance(quantity_info.get('domain'), str) else None
+    if domain is None:
+        raise DatasetError(f'missing domain for quantity: {actual_ref.quantity!r}')
+
+    raw_entry = _datasets_for_quantity(actual_ref.quantity).get(actual_ref.set_id)
+    if not isinstance(raw_entry, dict):
+        raise DatasetError(f'unknown dataset: {actual_ref}')
+
+    refs_raw = raw_entry.get('references', [])
+    references = tuple(_coerce_reference(item) for item in refs_raw) if isinstance(refs_raw, list) else ()
+    aliases_raw = raw_entry.get('aliases', [])
+    aliases = tuple(item for item in aliases_raw if isinstance(item, str)) if isinstance(aliases_raw, list) else ()
+    notes_raw = raw_entry.get('notes', [])
+    notes = tuple(item for item in notes_raw if isinstance(item, str)) if isinstance(notes_raw, list) else ()
+    storage = raw_entry.get('storage') if isinstance(raw_entry.get('storage'), dict) else None
+
+    return DatasetInfo(
+        ref=actual_ref,
+        domain=domain,
+        units=units,
+        name=raw_entry.get('name') if isinstance(raw_entry.get('name'), str) else actual_ref.set_id,
+        description=raw_entry.get('description') if isinstance(raw_entry.get('description'), str) else None,
+        semantic_class=raw_entry.get('semantic_class') if isinstance(raw_entry.get('semantic_class'), str) else None,
+        origin_class=raw_entry.get('origin_class') if isinstance(raw_entry.get('origin_class'), str) else None,
+        phase_context=raw_entry.get('phase_context') if isinstance(raw_entry.get('phase_context'), str) else None,
+        method_summary=raw_entry.get('method_summary') if isinstance(raw_entry.get('method_summary'), str) else None,
+        placeholder_value=(
+            float(raw_entry['placeholder_value'])
+            if raw_entry.get('placeholder_value') is not None
+            else None
+        ),
+        extraction_source=raw_entry.get('extraction_source') if isinstance(raw_entry.get('extraction_source'), str) else None,
+        aliases=aliases,
+        references=references,
+        notes=notes,
+        storage=storage,
+        coverage=_coerce_coverage(raw_entry.get('coverage')),
+    )
+
+
+@lru_cache(maxsize=None)
+def _load_csv_columns(filename: str) -> dict[str, tuple[float | None, ...]]:
+    path = resources.files('atomref.data').joinpath(filename)
+    with path.open('r', encoding='utf-8', newline='') as handle:
+        reader = csv.DictReader(handle)
+        if reader.fieldnames is None or 'z' not in reader.fieldnames:
+            raise DatasetError(f'invalid CSV file: {filename!r}')
+        columns = [name for name in reader.fieldnames if name != 'z']
+        values: dict[str, list[float | None]] = {name: [None] * 119 for name in columns}
+        for row in reader:
+            z_text = row.get('z')
+            if z_text is None:
+                continue
+            z = int(z_text)
+            for name in columns:
+                raw = row.get(name)
+                if raw is None:
+                    values[name][z] = None
+                    continue
+                raw = raw.strip()
+                values[name][z] = float(raw) if raw else None
+    return {name: tuple(vals) for name, vals in values.items()}
+
+
+@lru_cache(maxsize=None)
+def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
+    info = get_dataset_info(ref)
+    if info.domain != 'element':
+        raise DatasetError(f'only element-domain datasets are supported in v0.1: {info.ref!r}')
+    if not isinstance(info.storage, Mapping):
+        raise DatasetError(f'missing storage metadata for dataset: {info.ref!r}')
+
+    filename = info.storage.get('filename')
+    column = info.storage.get('column')
+    if not isinstance(filename, str) or not isinstance(column, str):
+        raise DatasetError(f'invalid storage metadata for dataset: {info.ref!r}')
+
+    table = _load_csv_columns(filename)
+    if column not in table:
+        raise DatasetError(f'column {column!r} not found in {filename!r}')
+
+    return ElementScalarSet(ref=info.ref, info=info, values_by_z=table[column])
+
+
+def resolve_dataset_like(dataset: DatasetLike) -> ElementScalarSet:
+    if isinstance(dataset, ElementScalarSet):
+        return dataset
+    return get_builtin_set(dataset)
+
+
+def _is_placeholder_value(info: DatasetInfo, value: float) -> bool:
+    if info.placeholder_value is None:
+        return False
+    return abs(value - info.placeholder_value) < 1e-12
diff --git a/src/atomref/transfer.py b/src/atomref/transfer.py
new file mode 100644
index 0000000..d7f5d5e
--- /dev/null
+++ b/src/atomref/transfer.py
@@ -0,0 +1,31 @@
+"""Transfer model configuration types."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from .registry import DatasetLike
+
+
+@dataclass(frozen=True, slots=True)
+class LinearFit:
+    coefficients: tuple[float, ...]
+    intercept: float
+    n_points: int
+    r2: float
+    rmse: float
+
+
+@dataclass(frozen=True, slots=True)
+class SubstitutionTransfer:
+    source: DatasetLike
+
+
+@dataclass(frozen=True, slots=True)
+class LinearTransfer:
+    predictors: tuple[DatasetLike, ...]
+    min_points: int = 2
+    exclude_placeholders: bool = True
+
+
+TransferModel = SubstitutionTransfer | LinearTransfer
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..08328a4
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SRC = REPO_ROOT / 'src'
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
diff --git a/tests/elements/test_elements.py b/tests/elements/test_elements.py
new file mode 100644
index 0000000..161b420
--- /dev/null
+++ b/tests/elements/test_elements.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+
+import atomref as ar
+
+
+def test_element_lookup_and_validation() -> None:
+    assert ar.is_valid_element_symbol('C')
+    assert ar.is_valid_element_symbol('cl') is False
+    assert ar.get_element('cl') is not None
+    assert ar.get_element('C').z == 6
+    assert ar.get_element('Xx') is None
+
+
+def test_iter_elements_is_sorted_and_complete() -> None:
+    elems = ar.iter_elements()
+    assert elems[0].symbol == 'H'
+    assert elems[-1].symbol == 'Og'
+    assert elems[0].z == 1
+    assert elems[-1].z == 118
diff --git a/tests/meta/test_imports.py b/tests/meta/test_imports.py
new file mode 100644
index 0000000..374996a
--- /dev/null
+++ b/tests/meta/test_imports.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+import importlib
+
+
+MODULES = [
+    'atomref',
+    'atomref.elements',
+    'atomref.registry',
+    'atomref.transfer',
+    'atomref.policy',
+    'atomref.radii',
+]
+
+
+def test_imports() -> None:
+    for name in MODULES:
+        importlib.import_module(name)
diff --git a/tests/meta/test_readme_sync.py b/tests/meta/test_readme_sync.py
new file mode 100644
index 0000000..fe56ac2
--- /dev/null
+++ b/tests/meta/test_readme_sync.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from pathlib import Path
+import subprocess
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+README = REPO_ROOT / 'README.md'
+SCRIPT = REPO_ROOT / 'tools' / 'gen_readme.py'
+
+
+def test_readme_is_in_sync(tmp_path: Path) -> None:
+    generated = tmp_path / 'README.generated.md'
+    subprocess.run(
+        [sys.executable, str(SCRIPT), '--output', str(generated)],
+        cwd=REPO_ROOT,
+        check=True,
+    )
+    assert generated.read_text(encoding='utf-8') == README.read_text(encoding='utf-8')
diff --git a/tests/radii/test_assessment.py b/tests/radii/test_assessment.py
new file mode 100644
index 0000000..664d867
--- /dev/null
+++ b/tests/radii/test_assessment.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+import atomref as ar
+
+
+def test_assess_vdw_default_linear_counts() -> None:
+    rep = ar.assess_radii_policy(['Pm', 'O'], policy=ar.DEFAULT_VDW_POLICY)
+    assert rep.kind == 'van_der_waals'
+    assert rep.n_elements == 2
+    assert rep.n_base == 1
+    assert rep.n_transfer_linear == 1
+    assert rep.n_missing == 0
+    assert rep.fits
+    assert rep.fits[0].n_points == 90
+
+
+def test_assess_vdw_detail_reports_sources() -> None:
+    rep = ar.assess_radii_policy(['Pm', 'O'], policy=ar.DEFAULT_VDW_POLICY, detail=True)
+    by_sym = {d.symbol: d for d in rep.per_element}
+    assert by_sym['O'].lookup.source == 'base'
+    assert by_sym['Pm'].lookup.source == 'transfer_linear'
+
+
+def test_assess_covalent_sub_placeholder_count() -> None:
+    rep = ar.assess_radii_policy(['Es'], policy=ar.DEFAULT_COVALENT_POLICY)
+    assert rep.kind == 'covalent'
+    assert rep.n_elements == 1
+    assert rep.n_transfer_substitution == 1
+    assert rep.n_placeholders == 1
+    assert rep.placeholder_symbols == ('Es',)
+    assert rep.n_missing == 0
+
+
+def test_assess_covalent_missing_in_both_sets() -> None:
+    rep = ar.assess_radii_policy(['Rg'], policy=ar.DEFAULT_COVALENT_POLICY)
+    assert rep.n_missing == 1
+    assert rep.missing_symbols == ('Rg',)
diff --git a/tests/radii/test_selection.py b/tests/radii/test_selection.py
new file mode 100644
index 0000000..c432d8e
--- /dev/null
+++ b/tests/radii/test_selection.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import pytest
+
+import atomref as ar
+
+
+def test_get_covalent_radius_default_prefers_cordero() -> None:
+    assert ar.get_covalent_radius('C') == pytest.approx(0.76)
+
+
+def test_get_covalent_radius_maps_deuterium_to_hydrogen() -> None:
+    assert ar.get_covalent_radius('D') == pytest.approx(0.31)
+
+
+def test_get_vdw_radius_default_prefers_alvarez() -> None:
+    assert ar.get_vdw_radius('C') == pytest.approx(1.77)
+
+
+def test_completion_is_used_for_missing_base_values() -> None:
+    m = ar.lookup_covalent_radius('Bk')
+    assert m.value is not None
+    assert m.source == 'transfer_substitution'
+
+    m2 = ar.lookup_vdw_radius('Pm')
+    assert m2.value is not None
+    assert m2.source == 'transfer_linear'
+    assert m2.value == pytest.approx(2.897226539514835)
+
+
+def test_linear_transfer_rejects_placeholder_values() -> None:
+    scheme = ar.RadiiPolicy(
+        kind='van_der_waals',
+        base_set='bondi1964',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(ar.DatasetRef('van_der_waals_radius', 'csd_legacy_vdw'),)
+            ),
+        ),
+    )
+    m = ar.lookup_vdw_radius('Be', policy=scheme)
+    assert m.value is None
+    assert m.source == 'missing'
+    assert any('placeholder' in s for s in m.notes)
+
+
+def test_lookup_float_conversion() -> None:
+    m = ar.lookup_covalent_radius('C')
+    assert float(m) == pytest.approx(0.76)
+
+    m_missing = ar.lookup_covalent_radius('Xx')
+    with pytest.raises(TypeError):
+        float(m_missing)
diff --git a/tests/registry/test_registry.py b/tests/registry/test_registry.py
new file mode 100644
index 0000000..e8811d1
--- /dev/null
+++ b/tests/registry/test_registry.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from importlib import resources
+
+import atomref as ar
+from atomref.registry import get_builtin_set
+
+
+def test_packaged_data_files_exist() -> None:
+    pkg = 'atomref.data'
+    assert resources.files(pkg).joinpath('periodic_table.csv').is_file()
+    assert resources.files(pkg).joinpath('covalent.csv').is_file()
+    assert resources.files(pkg).joinpath('van_der_waals.csv').is_file()
+    assert resources.files(pkg).joinpath('registry.json').is_file()
+
+
+def test_registry_lists_vdw_sets_but_not_atomic_support_sets() -> None:
+    vdw_sets = ar.list_radii_sets('van_der_waals')
+    assert 'alvarez2013' in vdw_sets
+    assert 'rahm2016' not in vdw_sets
+
+
+def test_rahm_is_registered_as_atomic_radius() -> None:
+    info = ar.get_dataset_info(ar.DatasetRef('atomic_radius', 'rahm2016'))
+    assert info.ref.quantity == 'atomic_radius'
+    assert info.semantic_class == 'atomic_isodensity'
+    assert info.phase_context == 'isolated_atom'
+
+
+def test_builtin_set_loading_works() -> None:
+    ds = get_builtin_set(ar.DatasetRef('covalent_radius', 'cordero2008'))
+    assert ds.get('C') == 0.76
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
new file mode 100644
index 0000000..6a96b08
--- /dev/null
+++ b/tests/test_smoke.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+import atomref as ar
+
+
+def test_version_is_present() -> None:
+    assert isinstance(ar.__version__, str)
+    assert ar.__version__
+
+
+def test_basic_smoke_import_and_lookup() -> None:
+    assert ar.get_covalent_radius('C') == 0.76
+    assert ar.get_vdw_radius('C') == 1.77
diff --git a/tools/gen_readme.py b/tools/gen_readme.py
new file mode 100644
index 0000000..cad0335
--- /dev/null
+++ b/tools/gen_readme.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SOURCE = REPO_ROOT / 'docs' / 'index.md'
+README = REPO_ROOT / 'README.md'
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--output', type=Path, default=README)
+    args = parser.parse_args()
+    args.output.write_text(SOURCE.read_text(encoding='utf-8'), encoding='utf-8')
+
+
+if __name__ == '__main__':
+    main()

From c7ee02ceba9c72a241dbfd6f7c06deb4a3a21219 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 15:33:20 +0300
Subject: [PATCH 02/15] Adds quantity introspection

---
 README.md                       |  9 +++++++
 docs/datasets/atomic_radius.md  |  2 ++
 docs/datasets/index.md          |  2 ++
 docs/guide/quickstart.md        | 10 +++++++
 docs/index.md                   |  9 +++++++
 src/atomref/__init__.py         |  6 +++++
 src/atomref/data/registry.json  |  2 +-
 src/atomref/registry.py         | 24 +++++++++++++++++
 tests/radii/test_selection.py   | 47 +++++++++++++++++++++++++++++++++
 tests/registry/test_registry.py | 19 +++++++++++++
 10 files changed, 129 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d5b9154..efdbd73 100644
--- a/README.md
+++ b/README.md
@@ -53,6 +53,15 @@ This follows the current `molcryst` pattern.
 - `atomic_radius` (support quantity; currently used for transfer from
   `rahm2016`)
 
+You can inspect the packaged quantity layer directly:
+
+```python
+import atomref as ar
+
+print(ar.list_quantities())
+print(ar.get_quantity_info("atomic_radius"))
+```
+
 ## Relationship to the Delone Commons ecosystem
 
 `atomref` is intended to be reusable outside the surrounding ecosystem, but it
diff --git a/docs/datasets/atomic_radius.md b/docs/datasets/atomic_radius.md
index cbbe61b..00a43cd 100644
--- a/docs/datasets/atomic_radius.md
+++ b/docs/datasets/atomic_radius.md
@@ -6,3 +6,5 @@ not best described as direct condensed-phase vdW radii.
 Built-in v0.1 support set:
 
 - `rahm2016`
+
+`rahm2016` is intentionally classified here as atomic support data rather than as a direct vdW target set.
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
index a58d78b..e9e2565 100644
--- a/docs/datasets/index.md
+++ b/docs/datasets/index.md
@@ -8,3 +8,5 @@ The package distinguishes between:
 
 This is what keeps support-only datasets such as `rahm2016` usable without
 misclassifying them as direct condensed-phase vdW radii.
+
+For programmatic inspection, use `atomref.list_quantities()` and `atomref.get_quantity_info(...)`.
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
index 62de165..5cb1637 100644
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@@ -14,3 +14,13 @@ print(m.resolved_from)
 
 Use `get_*` when you only need the number, and `lookup_*` when you need
 provenance.
+
+You can also inspect the packaged quantity layer directly:
+
+```python
+import atomref as ar
+
+print(ar.list_quantities())
+print(ar.get_quantity_info("atomic_radius"))
+```
+
diff --git a/docs/index.md b/docs/index.md
index d5b9154..efdbd73 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -53,6 +53,15 @@ This follows the current `molcryst` pattern.
 - `atomic_radius` (support quantity; currently used for transfer from
   `rahm2016`)
 
+You can inspect the packaged quantity layer directly:
+
+```python
+import atomref as ar
+
+print(ar.list_quantities())
+print(ar.get_quantity_info("atomic_radius"))
+```
+
 ## Relationship to the Delone Commons ecosystem
 
 `atomref` is intended to be reusable outside the surrounding ecosystem, but it
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
index fd07068..a639619 100644
--- a/src/atomref/__init__.py
+++ b/src/atomref/__init__.py
@@ -20,9 +20,12 @@
     DatasetInfo,
     DatasetRef,
     ElementScalarSet,
+    QuantityInfo,
     Reference,
     get_dataset_info,
+    get_quantity_info,
     list_dataset_ids,
+    list_quantities,
 )
 from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer
 
@@ -37,9 +40,12 @@
     'DatasetInfo',
     'DatasetRef',
     'ElementScalarSet',
+    'QuantityInfo',
     'Reference',
     'get_dataset_info',
+    'get_quantity_info',
     'list_dataset_ids',
+    'list_quantities',
     'LinearFit',
     'LinearTransfer',
     'SubstitutionTransfer',
diff --git a/src/atomref/data/registry.json b/src/atomref/data/registry.json
index 2577ab7..3d9bf6e 100644
--- a/src/atomref/data/registry.json
+++ b/src/atomref/data/registry.json
@@ -426,7 +426,7 @@
         ],
         "notes": [
           "The original work also reports cationic radii (+1) for the first 96 elements and selected anionic radii (−1) for some elements; these are not yet included in the current CSV.",
-          "Despite the fact that in this project this radii are classified as vdW radii for the purpose of simplicity, they should be treated as a correlational/transferable baseline rather than a direct condensed-phase vdW radius since they describe isolated atoms in vacuum."
+          "In atomref this dataset is classified as atomic support data, not as a direct condensed-phase van der Waals-radius set, because it describes isolated atoms in vacuum and is used here primarily as a transferable baseline."
         ]
       }
     }
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index 196dbc3..d61dd29 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -45,6 +45,14 @@ class CoverageInfo:
     missing_z: tuple[int, ...] = ()
 
 
+@dataclass(frozen=True, slots=True)
+class QuantityInfo:
+    quantity: QuantityId
+    domain: DomainId
+    units: str | None = None
+    description: str | None = None
+
+
 @dataclass(frozen=True, slots=True)
 class DatasetInfo:
     ref: DatasetRef
@@ -179,6 +187,22 @@ def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
     return datasets
 
 
+def list_quantities() -> tuple[str, ...]:
+    return tuple(_get_quantities_mapping().keys())
+
+
+def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
+    raw = _get_quantities_mapping().get(quantity)
+    if not isinstance(raw, dict):
+        raise DatasetError(f'unknown quantity: {quantity!r}')
+    domain = raw.get('domain') if isinstance(raw.get('domain'), str) else None
+    if domain is None:
+        raise DatasetError(f'missing domain for quantity: {quantity!r}')
+    units = raw.get('units') if isinstance(raw.get('units'), str) else None
+    description = raw.get('description') if isinstance(raw.get('description'), str) else None
+    return QuantityInfo(quantity=quantity, domain=domain, units=units, description=description)
+
+
 def _canonicalize_alias_token(value: str) -> str:
     return ' '.join(value.strip().lower().split())
 
diff --git a/tests/radii/test_selection.py b/tests/radii/test_selection.py
index c432d8e..9eb16de 100644
--- a/tests/radii/test_selection.py
+++ b/tests/radii/test_selection.py
@@ -3,6 +3,7 @@
 import pytest
 
 import atomref as ar
+from atomref.errors import PolicyError
 
 
 def test_get_covalent_radius_default_prefers_cordero() -> None:
@@ -51,3 +52,49 @@ def test_lookup_float_conversion() -> None:
     m_missing = ar.lookup_covalent_radius('Xx')
     with pytest.raises(TypeError):
         float(m_missing)
+
+
+def test_override_precedes_base_value() -> None:
+    policy = ar.RadiiPolicy(
+        kind='covalent',
+        base_set='cordero2008',
+        overrides={'C': 9.99},
+    )
+    lookup = ar.lookup_covalent_radius('C', policy=policy)
+    assert lookup.source == 'override'
+    assert lookup.value == pytest.approx(9.99)
+
+
+def test_fallback_is_used_only_after_transfers_fail() -> None:
+    policy = ar.RadiiPolicy(
+        kind='van_der_waals',
+        base_set='bondi1964',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(ar.DatasetRef('van_der_waals_radius', 'csd_legacy_vdw'),)
+            ),
+        ),
+        fallback=2.5,
+    )
+    lookup = ar.lookup_vdw_radius('Be', policy=policy)
+    assert lookup.source == 'fallback'
+    assert lookup.value == pytest.approx(2.5)
+    assert any('placeholder' in note for note in lookup.notes)
+
+
+def test_linear_transfer_rejects_multiple_predictors_in_v0_1() -> None:
+    policy = ar.RadiiPolicy(
+        kind='van_der_waals',
+        base_set='alvarez2013',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(
+                    ar.DatasetRef('atomic_radius', 'rahm2016'),
+                    ar.DatasetRef('covalent_radius', 'cordero2008'),
+                )
+            ),
+        ),
+    )
+    with pytest.raises(PolicyError):
+        ar.lookup_vdw_radius('Pm', policy=policy)
+
diff --git a/tests/registry/test_registry.py b/tests/registry/test_registry.py
index e8811d1..b18327e 100644
--- a/tests/registry/test_registry.py
+++ b/tests/registry/test_registry.py
@@ -30,3 +30,22 @@ def test_rahm_is_registered_as_atomic_radius() -> None:
 def test_builtin_set_loading_works() -> None:
     ds = get_builtin_set(ar.DatasetRef('covalent_radius', 'cordero2008'))
     assert ds.get('C') == 0.76
+
+
+def test_list_quantities_and_quantity_info() -> None:
+    quantities = ar.list_quantities()
+    assert quantities == ('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+
+    info = ar.get_quantity_info('atomic_radius')
+    assert info.quantity == 'atomic_radius'
+    assert info.domain == 'element'
+    assert info.units == 'angstrom'
+    assert 'support' in (info.description or '')
+
+
+def test_rahm_note_no_longer_claims_it_is_classified_as_vdw() -> None:
+    info = ar.get_dataset_info(ar.DatasetRef('atomic_radius', 'rahm2016'))
+    joined = ' '.join(info.notes).lower()
+    assert 'classified as vdw' not in joined
+    assert 'atomic support data' in joined
+

From 5c9ff264c5ea46cb35693462c3c0431393c54dec Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 17:47:02 +0300
Subject: [PATCH 03/15] Adds usage roles

---
 README.md                       |  2 ++
 docs/datasets/index.md          |  5 +++++
 docs/guide/policies.md          |  8 ++++++++
 docs/index.md                   |  2 ++
 src/atomref/data/registry.json  | 24 ++++++++++++++++--------
 src/atomref/radii.py            |  4 ++--
 src/atomref/registry.py         | 19 +++++++++++++++++--
 tests/registry/test_registry.py | 19 +++++++++++++++++++
 8 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index efdbd73..fc2a6fd 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,8 @@ import atomref as ar
 
 print(ar.list_quantities())
 print(ar.get_quantity_info("atomic_radius"))
+print(ar.list_dataset_ids("van_der_waals_radius", usage_role="target"))
+print(ar.list_dataset_ids("atomic_radius", usage_role="support"))
 ```
 
 ## Relationship to the Delone Commons ecosystem
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
index e9e2565..1095225 100644
--- a/docs/datasets/index.md
+++ b/docs/datasets/index.md
@@ -10,3 +10,8 @@ This is what keeps support-only datasets such as `rahm2016` usable without
 misclassifying them as direct condensed-phase vdW radii.
 
 For programmatic inspection, use `atomref.list_quantities()` and `atomref.get_quantity_info(...)`.
+
+Dataset metadata also carries a package-level `usage_role`, which currently
+distinguishes direct target sets from support-only sets used for substitution or
+linear transfer. Use `atomref.list_dataset_ids(..., usage_role=...)` to inspect
+that layer programmatically.
diff --git a/docs/guide/policies.md b/docs/guide/policies.md
index a7e9130..a5a5b1b 100644
--- a/docs/guide/policies.md
+++ b/docs/guide/policies.md
@@ -18,3 +18,11 @@ Built-in transfer models:
 `LinearTransfer` is intentionally limited to one predictor in v0.1, but the API
 already accepts a predictor tuple so later multi-predictor linear models do not
 require a redesign.
+
+## Target vs support sets
+
+`atomref` keeps the lookup behavior separate from the scientific classification
+of a dataset. In addition, each built-in dataset now carries a package-level
+`usage_role` such as `target` or `support`. This is how `rahm2016` can remain
+available for linear transfer into `alvarez2013`-style vdW values without being
+misrepresented as a direct condensed-phase vdW target set.
diff --git a/docs/index.md b/docs/index.md
index efdbd73..fc2a6fd 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -60,6 +60,8 @@ import atomref as ar
 
 print(ar.list_quantities())
 print(ar.get_quantity_info("atomic_radius"))
+print(ar.list_dataset_ids("van_der_waals_radius", usage_role="target"))
+print(ar.list_dataset_ids("atomic_radius", usage_role="support"))
 ```
 
 ## Relationship to the Delone Commons ecosystem
diff --git a/src/atomref/data/registry.json b/src/atomref/data/registry.json
index 3d9bf6e..a722e1c 100644
--- a/src/atomref/data/registry.json
+++ b/src/atomref/data/registry.json
@@ -62,7 +62,8 @@
         ],
         "notes": [
           "The source paper provides multiple radii per element for different atom types/environments; this package currently includes C(sp3) value for C and high-spin values for Mn/Fe/Co."
-        ]
+        ],
+        "usage_role": "target"
       },
       "csd_legacy_cov": {
         "name": "CSD legacy covalent radii (bond perception)",
@@ -102,7 +103,8 @@
           "CSD bond assignment heuristic: a bond A-B may be inferred if distance d satisfies Rcov(A)+Rcov(B)-t <= d <= Rcov(A)+Rcov(B)+t, with typical t=0.4 Å. (See the CCDC spreadsheet notes.)",
           "For Z>=111, csd_legacy values are omitted because the legacy CSD table does not provide radii beyond Darmstadtium (Z=110).",
           "Elements not yet encountered in the CSD have Rcov = 1.50 Å."
-        ]
+        ],
+        "usage_role": "support"
       }
     },
     "van_der_waals_radius": {
@@ -188,7 +190,8 @@
         "notes": [
           "Coverage is limited (38 elements, including only a few transition metals and uranium).",
           "Because Bondi radii were not derived exclusively from crystal nonbonded contact statistics, they can differ slightly from later 'structural' vdW radii."
-        ]
+        ],
+        "usage_role": "target"
       },
       "rowland_taylor1996": {
         "name": "Rowland & Taylor nonbonded contact radii",
@@ -236,7 +239,8 @@
         "notes": [
           "Coverage is intentionally limited to common organic-crystal nonmetals (H, C, N, O, F, S, Cl, Br, I).",
           "Rowland & Taylor also report a normalized set (R_d) constrained to match the total of Bondi radii; this package uses the raw least-squares r_c values."
-        ]
+        ],
+        "usage_role": "target"
       },
       "alvarez2013": {
         "name": "Alvarez van der Waals radii",
@@ -281,7 +285,8 @@
         ],
         "notes": [
           "Obtained by statistical analysis of millions of interatomic distances in the Cambridge Structural Database (CSD), locating the vdW peak after the vdW gap."
-        ]
+        ],
+        "usage_role": "target"
       },
       "chernyshov2020": {
         "name": "Chernyshov LoS van der Waals radii",
@@ -330,7 +335,8 @@
         "notes": [
           "The source paper provides multiple radii per element for different atom types/environments; this package currently includes only the main/default R_max values used in Table 1.",
           "Primarily targeted at elements common in organic crystals (H, C, N, O, F, S, Cl, Se, Br, I)."
-        ]
+        ],
+        "usage_role": "target"
       },
       "csd_legacy_vdw": {
         "name": "CSD legacy van der Waals radii (pre-2024.3)",
@@ -381,7 +387,8 @@
           "For Z>=111, csd_legacy values are omitted because the legacy CSD table does not provide radii beyond Darmstadtium (Z=110).",
           "Radii that are not available in either Bondi or Rowland & Taylor versions were assigned RvdW of 2.00 Å.",
           "The CSD 2024.3 release updated the vdW radii used in CSD and Mercury to Alvarez-derived values (see CCDC blog post)."
-        ]
+        ],
+        "usage_role": "support"
       }
     },
     "atomic_radius": {
@@ -427,7 +434,8 @@
         "notes": [
           "The original work also reports cationic radii (+1) for the first 96 elements and selected anionic radii (−1) for some elements; these are not yet included in the current CSV.",
           "In atomref this dataset is classified as atomic support data, not as a direct condensed-phase van der Waals-radius set, because it describes isolated atoms in vacuum and is used here primarily as a transferable baseline."
-        ]
+        ],
+        "usage_role": "support"
       }
     }
   }
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index 61cebda..7ce73ed 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -112,8 +112,8 @@ def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
     return tuple(sorted(symbols, key=lambda s: get_element(s).z if get_element(s) else 0))
 
 
-def list_radii_sets(kind: RadiiKind) -> tuple[str, ...]:
-    return list_dataset_ids(_quantity_for_kind(kind))
+def list_radii_sets(kind: RadiiKind, *, usage_role: str | None = None) -> tuple[str, ...]:
+    return list_dataset_ids(_quantity_for_kind(kind), usage_role=usage_role)
 
 
 def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index d61dd29..458d202 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -60,6 +60,7 @@ class DatasetInfo:
     units: str | None
     name: str
     description: str | None = None
+    usage_role: str | None = None
     semantic_class: str | None = None
     origin_class: str | None = None
     phase_context: str | None = None
@@ -88,6 +89,7 @@ def from_mapping(
         name: str,
         units: str | None,
         description: str | None = None,
+        usage_role: str = 'user',
         semantic_class: str = 'user',
         origin_class: str = 'user',
         phase_context: str | None = None,
@@ -119,6 +121,7 @@ def from_mapping(
             units=units,
             name=name,
             description=description,
+            usage_role=usage_role,
             semantic_class=semantic_class,
             origin_class=origin_class,
             phase_context=phase_context,
@@ -225,8 +228,19 @@ def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
     raise DatasetError(f'unknown dataset id for {quantity!r}: {set_id!r}')
 
 
-def list_dataset_ids(quantity: QuantityId) -> tuple[str, ...]:
-    return tuple(_datasets_for_quantity(quantity).keys())
+def list_dataset_ids(quantity: QuantityId, *, usage_role: str | None = None) -> tuple[str, ...]:
+    dataset_ids = tuple(_datasets_for_quantity(quantity).keys())
+    if usage_role is None:
+        return dataset_ids
+
+    filtered: list[str] = []
+    wanted = usage_role.strip().lower()
+    for set_id in dataset_ids:
+        info = get_dataset_info(DatasetRef(quantity, set_id))
+        role = (info.usage_role or '').strip().lower()
+        if role == wanted:
+            filtered.append(set_id)
+    return tuple(filtered)
 
 
 def _coerce_reference(obj: object) -> Reference:
@@ -293,6 +307,7 @@ def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
         units=units,
         name=raw_entry.get('name') if isinstance(raw_entry.get('name'), str) else actual_ref.set_id,
         description=raw_entry.get('description') if isinstance(raw_entry.get('description'), str) else None,
+        usage_role=raw_entry.get('usage_role') if isinstance(raw_entry.get('usage_role'), str) else None,
         semantic_class=raw_entry.get('semantic_class') if isinstance(raw_entry.get('semantic_class'), str) else None,
         origin_class=raw_entry.get('origin_class') if isinstance(raw_entry.get('origin_class'), str) else None,
         phase_context=raw_entry.get('phase_context') if isinstance(raw_entry.get('phase_context'), str) else None,
diff --git a/tests/registry/test_registry.py b/tests/registry/test_registry.py
index b18327e..23b401e 100644
--- a/tests/registry/test_registry.py
+++ b/tests/registry/test_registry.py
@@ -49,3 +49,22 @@ def test_rahm_note_no_longer_claims_it_is_classified_as_vdw() -> None:
     assert 'classified as vdw' not in joined
     assert 'atomic support data' in joined
 
+
+def test_usage_role_is_exposed_on_dataset_info() -> None:
+    info = ar.get_dataset_info(ar.DatasetRef('atomic_radius', 'rahm2016'))
+    assert info.usage_role == 'support'
+
+
+def test_list_dataset_ids_can_filter_by_usage_role() -> None:
+    assert ar.list_dataset_ids('atomic_radius', usage_role='support') == ('rahm2016',)
+    assert ar.list_dataset_ids('van_der_waals_radius', usage_role='target') == (
+        'bondi1964',
+        'rowland_taylor1996',
+        'alvarez2013',
+        'chernyshov2020',
+    )
+
+
+def test_list_radii_sets_can_filter_by_usage_role() -> None:
+    assert ar.list_radii_sets('covalent', usage_role='support') == ('csd_legacy_cov',)
+    assert 'alvarez2013' in ar.list_radii_sets('van_der_waals', usage_role='target')

From 8a74c00ea5a28d487e803dd00a19e66a38980034 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 18:37:06 +0300
Subject: [PATCH 04/15] Fixes formatting

---
 .github/workflows/ci.yml      |  14 +++
 src/atomref/__init__.py       |  74 ++++++-----
 src/atomref/policy.py         | 106 +++++++++-------
 src/atomref/radii.py          |  93 ++++++++------
 src/atomref/registry.py       | 230 ++++++++++++++++++++++------------
 tests/radii/test_selection.py |  63 +++++-----
 6 files changed, 355 insertions(+), 225 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dbc7a70..38602c6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,6 +5,20 @@ on:
   pull_request:
 
 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install lint dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[dev]
+      - name: Lint
+        run: flake8 src tests
+
   test:
     runs-on: ubuntu-latest
     strategy:
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
index a639619..1b0ef6b 100644
--- a/src/atomref/__init__.py
+++ b/src/atomref/__init__.py
@@ -1,5 +1,11 @@
 from .__about__ import __version__
-from .elements import Element, canonicalize_element_symbol, get_element, iter_elements, is_valid_element_symbol
+from .elements import (
+    Element,
+    canonicalize_element_symbol,
+    get_element,
+    iter_elements,
+    is_valid_element_symbol,
+)
 from .policy import LookupResult, ValuePolicy
 from .radii import (
     DEFAULT_COVALENT_POLICY,
@@ -30,37 +36,37 @@
 from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer
 
 __all__ = [
-    '__version__',
-    'Element',
-    'canonicalize_element_symbol',
-    'get_element',
-    'iter_elements',
-    'is_valid_element_symbol',
-    'CoverageInfo',
-    'DatasetInfo',
-    'DatasetRef',
-    'ElementScalarSet',
-    'QuantityInfo',
-    'Reference',
-    'get_dataset_info',
-    'get_quantity_info',
-    'list_dataset_ids',
-    'list_quantities',
-    'LinearFit',
-    'LinearTransfer',
-    'SubstitutionTransfer',
-    'LookupResult',
-    'ValuePolicy',
-    'RadiiPolicy',
-    'RadiiElementAssessment',
-    'RadiiPolicyAssessment',
-    'DEFAULT_COVALENT_POLICY',
-    'DEFAULT_VDW_POLICY',
-    'list_radii_sets',
-    'get_radii_set_info',
-    'lookup_covalent_radius',
-    'get_covalent_radius',
-    'lookup_vdw_radius',
-    'get_vdw_radius',
-    'assess_radii_policy',
+    "__version__",
+    "Element",
+    "canonicalize_element_symbol",
+    "get_element",
+    "iter_elements",
+    "is_valid_element_symbol",
+    "CoverageInfo",
+    "DatasetInfo",
+    "DatasetRef",
+    "ElementScalarSet",
+    "QuantityInfo",
+    "Reference",
+    "get_dataset_info",
+    "get_quantity_info",
+    "list_dataset_ids",
+    "list_quantities",
+    "LinearFit",
+    "LinearTransfer",
+    "SubstitutionTransfer",
+    "LookupResult",
+    "ValuePolicy",
+    "RadiiPolicy",
+    "RadiiElementAssessment",
+    "RadiiPolicyAssessment",
+    "DEFAULT_COVALENT_POLICY",
+    "DEFAULT_VDW_POLICY",
+    "list_radii_sets",
+    "get_radii_set_info",
+    "lookup_covalent_radius",
+    "get_covalent_radius",
+    "lookup_vdw_radius",
+    "get_vdw_radius",
+    "assess_radii_policy",
 ]
diff --git a/src/atomref/policy.py b/src/atomref/policy.py
index b7df87b..5b242e2 100644
--- a/src/atomref/policy.py
+++ b/src/atomref/policy.py
@@ -8,7 +8,7 @@
 import math
 from typing import Generic, Literal, TypeVar
 
-from .elements import canonicalize_element_symbol, get_element, is_valid_element_symbol
+from .elements import canonicalize_element_symbol, is_valid_element_symbol
 from .errors import PolicyError
 from .registry import (
     DatasetLike,
@@ -20,16 +20,15 @@
 )
 from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer, TransferModel
 
-
-K = TypeVar('K')
+K = TypeVar("K")
 
 LookupSource = Literal[
-    'override',
-    'base',
-    'transfer_substitution',
-    'transfer_linear',
-    'fallback',
-    'missing',
+    "override",
+    "base",
+    "transfer_substitution",
+    "transfer_linear",
+    "fallback",
+    "missing",
 ]
 
 
@@ -45,7 +44,7 @@ class LookupResult:
 
     def __float__(self) -> float:
         if self.value is None:
-            raise TypeError('reference value is missing')
+            raise TypeError("reference value is missing")
         return float(self.value)
 
 
@@ -59,8 +58,8 @@ class ValuePolicy(Generic[K]):
 
 def _normalize_element_symbol(symbol: str | None) -> str | None:
     cand = canonicalize_element_symbol(symbol)
-    if cand in {'D', 'T'}:
-        cand = 'H'
+    if cand in {"D", "T"}:
+        cand = "H"
     if cand is None:
         return None
     if not is_valid_element_symbol(cand):
@@ -72,7 +71,13 @@ def _resolve_target_ref(policy: ValuePolicy[object]) -> DatasetRef:
     return resolve_dataset_like(policy.base).ref
 
 
-def _fit_linear_transfer(base_set: ElementScalarSet, predictor_set: ElementScalarSet, *, min_points: int, exclude_placeholders: bool) -> LinearFit:
+def _fit_linear_transfer(
+    base_set: ElementScalarSet,
+    predictor_set: ElementScalarSet,
+    *,
+    min_points: int,
+    exclude_placeholders: bool,
+) -> LinearFit:
     xs: list[float] = []
     ys: list[float] = []
 
@@ -94,13 +99,13 @@ def _fit_linear_transfer(base_set: ElementScalarSet, predictor_set: ElementScala
 
     n = len(xs)
     if n < min_points:
-        raise PolicyError('not enough overlapping elements to fit linear transfer')
+        raise PolicyError("not enough overlapping elements to fit linear transfer")
 
     x_mean = sum(xs) / n
     y_mean = sum(ys) / n
     sxx = sum((x - x_mean) ** 2 for x in xs)
     if sxx == 0:
-        raise PolicyError('cannot fit linear transfer: zero predictor variance')
+        raise PolicyError("cannot fit linear transfer: zero predictor variance")
 
     sxy = sum((x - x_mean) * (y - y_mean) for x, y in zip(xs, ys))
     slope = sxy / sxx
@@ -122,7 +127,12 @@ def _fit_linear_transfer(base_set: ElementScalarSet, predictor_set: ElementScala
 
 
 @lru_cache(maxsize=None)
-def _fit_linear_transfer_cached(base_ref: DatasetRef, predictor_ref: DatasetRef, min_points: int, exclude_placeholders: bool) -> LinearFit:
+def _fit_linear_transfer_cached(
+    base_ref: DatasetRef,
+    predictor_ref: DatasetRef,
+    min_points: int,
+    exclude_placeholders: bool,
+) -> LinearFit:
     return _fit_linear_transfer(
         get_builtin_set(base_ref),
         get_builtin_set(predictor_ref),
@@ -135,7 +145,7 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
     if not isinstance(transfer, LinearTransfer):
         return None
     if len(transfer.predictors) != 1:
-        raise PolicyError('v0.1 LinearTransfer supports exactly one predictor dataset')
+        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor dataset")
 
     predictor = transfer.predictors[0]
     if isinstance(base, DatasetRef) and isinstance(predictor, DatasetRef):
@@ -150,51 +160,57 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
     )
 
 
-def _apply_substitution_transfer(symbol: str, *, target: DatasetRef, transfer: SubstitutionTransfer) -> tuple[LookupResult | None, str | None]:
+def _apply_substitution_transfer(
+    symbol: str, *, target: DatasetRef, transfer: SubstitutionTransfer
+) -> tuple[LookupResult | None, str | None]:
     source_set = resolve_dataset_like(transfer.source)
     value = source_set.get(symbol)
     if value is None:
-        return None, f'no substitution value in {source_set.ref.set_id}'
+        return None, f"no substitution value in {source_set.ref.set_id}"
     value_f = float(value)
     return (
         LookupResult(
             value=value_f,
-            source='transfer_substitution',
+            source="transfer_substitution",
             target=target,
             resolved_from=(source_set.ref,),
             is_placeholder=_is_placeholder_value(source_set.info, value_f),
-            notes=('missing in base set; substituted from transfer source',),
+            notes=("missing in base set; substituted from transfer source",),
         ),
         None,
     )
 
 
-def _apply_linear_transfer(symbol: str, *, base: DatasetLike, target: DatasetRef, transfer: LinearTransfer) -> tuple[LookupResult | None, str | None]:
+def _apply_linear_transfer(
+    symbol: str, *, base: DatasetLike, target: DatasetRef, transfer: LinearTransfer
+) -> tuple[LookupResult | None, str | None]:
     if len(transfer.predictors) != 1:
-        raise PolicyError('v0.1 LinearTransfer supports exactly one predictor dataset')
+        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor dataset")
 
     predictor_set = resolve_dataset_like(transfer.predictors[0])
     predictor_value = predictor_set.get(symbol)
     if predictor_value is None:
-        return None, f'no predictor value in {predictor_set.ref.set_id}'
+        return None, f"no predictor value in {predictor_set.ref.set_id}"
     predictor_f = float(predictor_value)
 
-    if transfer.exclude_placeholders and _is_placeholder_value(predictor_set.info, predictor_f):
-        return None, f'predictor value in {predictor_set.ref.set_id} is a placeholder'
+    if transfer.exclude_placeholders and _is_placeholder_value(
+        predictor_set.info, predictor_f
+    ):
+        return None, f"predictor value in {predictor_set.ref.set_id} is a placeholder"
 
     fit = _fit_transfer_model(base, transfer)
     if fit is None:
-        return None, 'no fit available for linear transfer'
+        return None, "no fit available for linear transfer"
     predicted = fit.coefficients[0] * predictor_f + fit.intercept
     return (
         LookupResult(
             value=float(predicted),
-            source='transfer_linear',
+            source="transfer_linear",
             target=target,
             resolved_from=(predictor_set.ref,),
             is_placeholder=False,
             fit=fit,
-            notes=('missing in base set; inferred via linear transfer',),
+            notes=("missing in base set; inferred via linear transfer",),
         ),
         None,
     )
@@ -203,20 +219,20 @@ def _apply_linear_transfer(symbol: str, *, base: DatasetLike, target: DatasetRef
 def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
     target = _resolve_target_ref(policy)
     base_set = resolve_dataset_like(policy.base)
-    if base_set.info.domain != 'element':
-        raise PolicyError('v0.1 resolver supports only element-domain datasets')
+    if base_set.info.domain != "element":
+        raise PolicyError("v0.1 resolver supports only element-domain datasets")
 
     sym = _normalize_element_symbol(symbol)
     if sym is None:
-        note = 'unknown element' if symbol is not None else 'missing element symbol'
-        return LookupResult(value=None, source='missing', target=target, notes=(note,))
+        note = "unknown element" if symbol is not None else "missing element symbol"
+        return LookupResult(value=None, source="missing", target=target, notes=(note,))
 
     if sym in policy.overrides:
         return LookupResult(
             value=float(policy.overrides[sym]),
-            source='override',
+            source="override",
             target=target,
-            notes=('value supplied by policy override',),
+            notes=("value supplied by policy override",),
         )
 
     base_value = base_set.get(sym)
@@ -224,21 +240,25 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
         base_f = float(base_value)
         return LookupResult(
             value=base_f,
-            source='base',
+            source="base",
             target=target,
             resolved_from=(base_set.ref,),
             is_placeholder=_is_placeholder_value(base_set.info, base_f),
             notes=(),
         )
 
-    transfer_notes: list[str] = ['missing in base set']
+    transfer_notes: list[str] = ["missing in base set"]
     for transfer in policy.transfers:
         if isinstance(transfer, SubstitutionTransfer):
-            result, note = _apply_substitution_transfer(sym, target=target, transfer=transfer)
+            result, note = _apply_substitution_transfer(
+                sym, target=target, transfer=transfer
+            )
         elif isinstance(transfer, LinearTransfer):
-            result, note = _apply_linear_transfer(sym, base=policy.base, target=target, transfer=transfer)
+            result, note = _apply_linear_transfer(
+                sym, base=policy.base, target=target, transfer=transfer
+            )
         else:  # pragma: no cover - closed union today
-            raise PolicyError(f'unsupported transfer model: {type(transfer)!r}')
+            raise PolicyError(f"unsupported transfer model: {type(transfer)!r}")
 
         if result is not None:
             return result
@@ -248,14 +268,14 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
     if policy.fallback is not None:
         return LookupResult(
             value=float(policy.fallback),
-            source='fallback',
+            source="fallback",
             target=target,
-            notes=tuple(transfer_notes + ['using fallback value']),
+            notes=tuple(transfer_notes + ["using fallback value"]),
         )
 
     return LookupResult(
         value=None,
-        source='missing',
+        source="missing",
         target=target,
         notes=tuple(transfer_notes),
     )
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index 7ce73ed..1095667 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -9,17 +9,22 @@
 from .elements import canonicalize_element_symbol, get_element, is_valid_element_symbol
 from .errors import PolicyError
 from .policy import LookupResult, ValuePolicy, _fit_transfer_model, _resolve_value
-from .registry import DatasetInfo, DatasetRef, ElementScalarSet, get_dataset_info, list_dataset_ids
+from .registry import (
+    DatasetInfo,
+    DatasetRef,
+    ElementScalarSet,
+    get_dataset_info,
+    list_dataset_ids,
+)
 from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer, TransferModel
 
-
-RadiiKind = Literal['covalent', 'van_der_waals']
+RadiiKind = Literal["covalent", "van_der_waals"]
 RadiiSet = ElementScalarSet
 
 
 _KIND_TO_QUANTITY = {
-    'covalent': 'covalent_radius',
-    'van_der_waals': 'van_der_waals_radius',
+    "covalent": "covalent_radius",
+    "van_der_waals": "van_der_waals_radius",
 }
 
 
@@ -35,9 +40,11 @@ def as_value_policy(self) -> ValuePolicy[str]:
         quantity = _quantity_for_kind(self.kind)
         if isinstance(self.base_set, ElementScalarSet):
             if self.base_set.ref.quantity != quantity:
-                raise PolicyError(
-                    f'base_set quantity {self.base_set.ref.quantity!r} is incompatible with radii kind {self.kind!r}'
+                msg = (
+                    f"base_set quantity {self.base_set.ref.quantity!r} "
+                    f"is incompatible with radii kind {self.kind!r}"
                 )
+                raise PolicyError(msg)
             base = self.base_set
         else:
             base = DatasetRef(quantity, self.base_set)
@@ -46,7 +53,7 @@ def as_value_policy(self) -> ValuePolicy[str]:
         for key, value in self.overrides.items():
             sym = _normalize_radii_symbol(key)
             if sym is None or not is_valid_element_symbol(sym):
-                raise PolicyError(f'invalid override element symbol: {key!r}')
+                raise PolicyError(f"invalid override element symbol: {key!r}")
             normalized_overrides[sym] = float(value)
 
         return ValuePolicy(
@@ -90,13 +97,13 @@ def _quantity_for_kind(kind: RadiiKind) -> str:
     try:
         return _KIND_TO_QUANTITY[kind]
     except KeyError as exc:
-        raise PolicyError(f'unknown radii kind: {kind!r}') from exc
+        raise PolicyError(f"unknown radii kind: {kind!r}") from exc
 
 
 def _normalize_radii_symbol(symbol: str | None) -> str | None:
     cand = canonicalize_element_symbol(symbol)
-    if cand in {'D', 'T'}:
-        cand = 'H'
+    if cand in {"D", "T"}:
+        cand = "H"
     return cand
 
 
@@ -105,14 +112,18 @@ def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
     for token in elements:
         sym = _normalize_radii_symbol(token)
         if sym is None:
-            raise ValueError('missing element symbol')
+            raise ValueError("missing element symbol")
         if not is_valid_element_symbol(sym):
-            raise ValueError(f'invalid element symbol: {sym!r}')
+            raise ValueError(f"invalid element symbol: {sym!r}")
         symbols.add(sym)
-    return tuple(sorted(symbols, key=lambda s: get_element(s).z if get_element(s) else 0))
+    return tuple(
+        sorted(symbols, key=lambda s: get_element(s).z if get_element(s) else 0)
+    )
 
 
-def list_radii_sets(kind: RadiiKind, *, usage_role: str | None = None) -> tuple[str, ...]:
+def list_radii_sets(
+    kind: RadiiKind, *, usage_role: str | None = None
+) -> tuple[str, ...]:
     return list_dataset_ids(_quantity_for_kind(kind), usage_role=usage_role)
 
 
@@ -122,34 +133,44 @@ def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
 
 def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
     if policy.kind != expected:
-        raise PolicyError(f'expected a {expected!r} radii policy, got {policy.kind!r}')
+        raise PolicyError(f"expected a {expected!r} radii policy, got {policy.kind!r}")
 
 
 def _lookup_radius(symbol: str | None, *, policy: RadiiPolicy) -> LookupResult:
     return _resolve_value(symbol, policy=policy.as_value_policy())
 
 
-def lookup_covalent_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> LookupResult:
+def lookup_covalent_radius(
+    symbol: str | None, *, policy: RadiiPolicy | None = None
+) -> LookupResult:
     active = DEFAULT_COVALENT_POLICY if policy is None else policy
-    _validate_policy_kind(active, expected='covalent')
+    _validate_policy_kind(active, expected="covalent")
     return _lookup_radius(symbol, policy=active)
 
 
-def get_covalent_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> float | None:
+def get_covalent_radius(
+    symbol: str | None, *, policy: RadiiPolicy | None = None
+) -> float | None:
     return lookup_covalent_radius(symbol, policy=policy).value
 
 
-def lookup_vdw_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> LookupResult:
+def lookup_vdw_radius(
+    symbol: str | None, *, policy: RadiiPolicy | None = None
+) -> LookupResult:
     active = DEFAULT_VDW_POLICY if policy is None else policy
-    _validate_policy_kind(active, expected='van_der_waals')
+    _validate_policy_kind(active, expected="van_der_waals")
     return _lookup_radius(symbol, policy=active)
 
 
-def get_vdw_radius(symbol: str | None, *, policy: RadiiPolicy | None = None) -> float | None:
+def get_vdw_radius(
+    symbol: str | None, *, policy: RadiiPolicy | None = None
+) -> float | None:
     return lookup_vdw_radius(symbol, policy=policy).value
 
 
-def assess_radii_policy(elements: Iterable[str], *, policy: RadiiPolicy, detail: bool = False) -> RadiiPolicyAssessment:
+def assess_radii_policy(
+    elements: Iterable[str], *, policy: RadiiPolicy, detail: bool = False
+) -> RadiiPolicyAssessment:
     elems = _normalize_assessment_elements(elements)
     value_policy = policy.as_value_policy()
 
@@ -167,17 +188,17 @@ def assess_radii_policy(elements: Iterable[str], *, policy: RadiiPolicy, detail:
 
     for symbol in elems:
         lookup = _resolve_value(symbol, policy=value_policy)
-        if lookup.source == 'override':
+        if lookup.source == "override":
             n_override += 1
-        elif lookup.source == 'base':
+        elif lookup.source == "base":
             n_base += 1
-        elif lookup.source == 'transfer_substitution':
+        elif lookup.source == "transfer_substitution":
             n_transfer_substitution += 1
-        elif lookup.source == 'transfer_linear':
+        elif lookup.source == "transfer_linear":
             n_transfer_linear += 1
-        elif lookup.source == 'fallback':
+        elif lookup.source == "fallback":
             n_fallback += 1
-        elif lookup.source == 'missing':
+        elif lookup.source == "missing":
             n_missing += 1
             missing_symbols.append(symbol)
 
@@ -221,13 +242,15 @@ def assess_radii_policy(elements: Iterable[str], *, policy: RadiiPolicy, detail:
 
 
 DEFAULT_COVALENT_POLICY = RadiiPolicy(
-    kind='covalent',
-    base_set='cordero2008',
-    transfers=(SubstitutionTransfer(source=DatasetRef('covalent_radius', 'csd_legacy_cov')),),
+    kind="covalent",
+    base_set="cordero2008",
+    transfers=(
+        SubstitutionTransfer(source=DatasetRef("covalent_radius", "csd_legacy_cov")),
+    ),
 )
 
 DEFAULT_VDW_POLICY = RadiiPolicy(
-    kind='van_der_waals',
-    base_set='alvarez2013',
-    transfers=(LinearTransfer(predictors=(DatasetRef('atomic_radius', 'rahm2016'),)),),
+    kind="van_der_waals",
+    base_set="alvarez2013",
+    transfers=(LinearTransfer(predictors=(DatasetRef("atomic_radius", "rahm2016"),)),),
 )
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index 458d202..f84b14b 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -12,7 +12,6 @@
 from .elements import canonicalize_element_symbol, get_element, iter_elements
 from .errors import DatasetError
 
-
 QuantityId = str
 DomainId = str
 
@@ -89,14 +88,14 @@ def from_mapping(
         name: str,
         units: str | None,
         description: str | None = None,
-        usage_role: str = 'user',
-        semantic_class: str = 'user',
-        origin_class: str = 'user',
+        usage_role: str = "user",
+        semantic_class: str = "user",
+        origin_class: str = "user",
         phase_context: str | None = None,
         references: Iterable[Reference] = (),
         notes: Iterable[str] = (),
         placeholder_value: float | None = None,
-    ) -> 'ElementScalarSet':
+    ) -> "ElementScalarSet":
         n_z = max(e.z for e in iter_elements())
         values_by_z: list[float | None] = [None] * (n_z + 1)
 
@@ -104,10 +103,12 @@ def from_mapping(
             sym = _normalize_element_domain_symbol(key)
             elem = get_element(sym)
             if elem is None:
-                raise DatasetError(f'invalid element symbol in custom set: {key!r}')
+                raise DatasetError(f"invalid element symbol in custom set: {key!r}")
             values_by_z[elem.z] = None if value is None else float(value)
 
-        covered_z = tuple(z for z, value in enumerate(values_by_z) if z > 0 and value is not None)
+        covered_z = tuple(
+            z for z, value in enumerate(values_by_z) if z > 0 and value is not None
+        )
         has_placeholders = False
         if placeholder_value is not None:
             has_placeholders = any(
@@ -117,7 +118,7 @@ def from_mapping(
 
         info = DatasetInfo(
             ref=ref,
-            domain='element',
+            domain="element",
             units=units,
             name=name,
             description=description,
@@ -154,39 +155,39 @@ def get(self, symbol: str | None) -> float | None:
 
 def _normalize_element_domain_symbol(symbol: str | None) -> str | None:
     cand = canonicalize_element_symbol(symbol)
-    if cand in {'D', 'T'}:
-        return 'H'
+    if cand in {"D", "T"}:
+        return "H"
     return cand
 
 
 @lru_cache(maxsize=1)
 def _load_registry_json() -> dict[str, object]:
-    path = resources.files('atomref.data').joinpath('registry.json')
-    with path.open('r', encoding='utf-8') as handle:
+    path = resources.files("atomref.data").joinpath("registry.json")
+    with path.open("r", encoding="utf-8") as handle:
         data = json.load(handle)
     if not isinstance(data, dict):
-        raise DatasetError('invalid registry.json: expected JSON object')
+        raise DatasetError("invalid registry.json: expected JSON object")
     return data
 
 
 def _get_quantities_mapping() -> Mapping[str, object]:
-    quantities = _load_registry_json().get('quantities')
+    quantities = _load_registry_json().get("quantities")
     if not isinstance(quantities, dict):
-        raise DatasetError('invalid registry.json: missing quantities mapping')
+        raise DatasetError("invalid registry.json: missing quantities mapping")
     return quantities
 
 
 def _get_datasets_mapping() -> Mapping[str, object]:
-    datasets = _load_registry_json().get('datasets')
+    datasets = _load_registry_json().get("datasets")
     if not isinstance(datasets, dict):
-        raise DatasetError('invalid registry.json: missing datasets mapping')
+        raise DatasetError("invalid registry.json: missing datasets mapping")
     return datasets
 
 
 def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
     datasets = _get_datasets_mapping().get(quantity)
     if not isinstance(datasets, dict):
-        raise DatasetError(f'unknown quantity: {quantity!r}')
+        raise DatasetError(f"unknown quantity: {quantity!r}")
     return datasets
 
 
@@ -197,17 +198,21 @@ def list_quantities() -> tuple[str, ...]:
 def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
     raw = _get_quantities_mapping().get(quantity)
     if not isinstance(raw, dict):
-        raise DatasetError(f'unknown quantity: {quantity!r}')
-    domain = raw.get('domain') if isinstance(raw.get('domain'), str) else None
+        raise DatasetError(f"unknown quantity: {quantity!r}")
+    domain = raw.get("domain") if isinstance(raw.get("domain"), str) else None
     if domain is None:
-        raise DatasetError(f'missing domain for quantity: {quantity!r}')
-    units = raw.get('units') if isinstance(raw.get('units'), str) else None
-    description = raw.get('description') if isinstance(raw.get('description'), str) else None
-    return QuantityInfo(quantity=quantity, domain=domain, units=units, description=description)
+        raise DatasetError(f"missing domain for quantity: {quantity!r}")
+    units = raw.get("units") if isinstance(raw.get("units"), str) else None
+    description = (
+        raw.get("description") if isinstance(raw.get("description"), str) else None
+    )
+    return QuantityInfo(
+        quantity=quantity, domain=domain, units=units, description=description
+    )
 
 
 def _canonicalize_alias_token(value: str) -> str:
-    return ' '.join(value.strip().lower().split())
+    return " ".join(value.strip().lower().split())
 
 
 def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
@@ -220,15 +225,20 @@ def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
         if _canonicalize_alias_token(actual_id) == wanted:
             return actual_id
         if isinstance(raw_entry, dict):
-            aliases = raw_entry.get('aliases', ())
+            aliases = raw_entry.get("aliases", ())
             if isinstance(aliases, list):
                 for alias in aliases:
-                    if isinstance(alias, str) and _canonicalize_alias_token(alias) == wanted:
+                    if (
+                        isinstance(alias, str)
+                        and _canonicalize_alias_token(alias) == wanted
+                    ):
                         return actual_id
-    raise DatasetError(f'unknown dataset id for {quantity!r}: {set_id!r}')
+    raise DatasetError(f"unknown dataset id for {quantity!r}: {set_id!r}")
 
 
-def list_dataset_ids(quantity: QuantityId, *, usage_role: str | None = None) -> tuple[str, ...]:
+def list_dataset_ids(
+    quantity: QuantityId, *, usage_role: str | None = None
+) -> tuple[str, ...]:
     dataset_ids = tuple(_datasets_for_quantity(quantity).keys())
     if usage_role is None:
         return dataset_ids
@@ -237,7 +247,7 @@ def list_dataset_ids(quantity: QuantityId, *, usage_role: str | None = None) ->
     wanted = usage_role.strip().lower()
     for set_id in dataset_ids:
         info = get_dataset_info(DatasetRef(quantity, set_id))
-        role = (info.usage_role or '').strip().lower()
+        role = (info.usage_role or "").strip().lower()
         if role == wanted:
             filtered.append(set_id)
     return tuple(filtered)
@@ -245,31 +255,33 @@ def list_dataset_ids(quantity: QuantityId, *, usage_role: str | None = None) ->
 
 def _coerce_reference(obj: object) -> Reference:
     if not isinstance(obj, dict):
-        raise DatasetError('invalid reference entry in registry.json')
+        raise DatasetError("invalid reference entry in registry.json")
     return Reference(
-        authors=obj.get('authors') if isinstance(obj.get('authors'), str) else None,
-        year=obj.get('year') if isinstance(obj.get('year'), int) else None,
-        title=obj.get('title') if isinstance(obj.get('title'), str) else None,
-        venue=obj.get('venue') if isinstance(obj.get('venue'), str) else None,
-        doi=obj.get('doi') if isinstance(obj.get('doi'), str) else None,
-        url=obj.get('url') if isinstance(obj.get('url'), str) else None,
-        publisher=obj.get('publisher') if isinstance(obj.get('publisher'), str) else None,
-        note=obj.get('note') if isinstance(obj.get('note'), str) else None,
+        authors=obj.get("authors") if isinstance(obj.get("authors"), str) else None,
+        year=obj.get("year") if isinstance(obj.get("year"), int) else None,
+        title=obj.get("title") if isinstance(obj.get("title"), str) else None,
+        venue=obj.get("venue") if isinstance(obj.get("venue"), str) else None,
+        doi=obj.get("doi") if isinstance(obj.get("doi"), str) else None,
+        url=obj.get("url") if isinstance(obj.get("url"), str) else None,
+        publisher=(
+            obj.get("publisher") if isinstance(obj.get("publisher"), str) else None
+        ),
+        note=obj.get("note") if isinstance(obj.get("note"), str) else None,
     )
 
 
 def _coerce_coverage(obj: object) -> CoverageInfo | None:
     if not isinstance(obj, dict):
         return None
-    covered = obj.get('covered_z')
-    missing = obj.get('missing_z')
+    covered = obj.get("covered_z")
+    missing = obj.get("missing_z")
     covered_z = tuple(int(z) for z in covered) if isinstance(covered, list) else ()
     missing_z = tuple(int(z) for z in missing) if isinstance(missing, list) else ()
     return CoverageInfo(
-        n_values=int(obj['n_values']),
-        z_min=int(obj['z_min']) if isinstance(obj.get('z_min'), int) else None,
-        z_max=int(obj['z_max']) if isinstance(obj.get('z_max'), int) else None,
-        has_placeholders=bool(obj.get('has_placeholders', False)),
+        n_values=int(obj["n_values"]),
+        z_min=int(obj["z_min"]) if isinstance(obj.get("z_min"), int) else None,
+        z_max=int(obj["z_max"]) if isinstance(obj.get("z_max"), int) else None,
+        has_placeholders=bool(obj.get("has_placeholders", False)),
         covered_z=covered_z,
         missing_z=missing_z,
     )
@@ -282,61 +294,115 @@ def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
     quantities = _get_quantities_mapping()
     quantity_info = quantities.get(actual_ref.quantity)
     if not isinstance(quantity_info, dict):
-        raise DatasetError(f'unknown quantity: {actual_ref.quantity!r}')
+        raise DatasetError(f"unknown quantity: {actual_ref.quantity!r}")
 
-    units = quantity_info.get('units') if isinstance(quantity_info.get('units'), str) else None
-    domain = quantity_info.get('domain') if isinstance(quantity_info.get('domain'), str) else None
+    units = (
+        quantity_info.get("units")
+        if isinstance(quantity_info.get("units"), str)
+        else None
+    )
+    domain = (
+        quantity_info.get("domain")
+        if isinstance(quantity_info.get("domain"), str)
+        else None
+    )
     if domain is None:
-        raise DatasetError(f'missing domain for quantity: {actual_ref.quantity!r}')
+        raise DatasetError(f"missing domain for quantity: {actual_ref.quantity!r}")
 
     raw_entry = _datasets_for_quantity(actual_ref.quantity).get(actual_ref.set_id)
     if not isinstance(raw_entry, dict):
-        raise DatasetError(f'unknown dataset: {actual_ref}')
+        raise DatasetError(f"unknown dataset: {actual_ref}")
 
-    refs_raw = raw_entry.get('references', [])
-    references = tuple(_coerce_reference(item) for item in refs_raw) if isinstance(refs_raw, list) else ()
-    aliases_raw = raw_entry.get('aliases', [])
-    aliases = tuple(item for item in aliases_raw if isinstance(item, str)) if isinstance(aliases_raw, list) else ()
-    notes_raw = raw_entry.get('notes', [])
-    notes = tuple(item for item in notes_raw if isinstance(item, str)) if isinstance(notes_raw, list) else ()
-    storage = raw_entry.get('storage') if isinstance(raw_entry.get('storage'), dict) else None
+    refs_raw = raw_entry.get("references", [])
+    references = (
+        tuple(_coerce_reference(item) for item in refs_raw)
+        if isinstance(refs_raw, list)
+        else ()
+    )
+    aliases_raw = raw_entry.get("aliases", [])
+    aliases = (
+        tuple(item for item in aliases_raw if isinstance(item, str))
+        if isinstance(aliases_raw, list)
+        else ()
+    )
+    notes_raw = raw_entry.get("notes", [])
+    notes = (
+        tuple(item for item in notes_raw if isinstance(item, str))
+        if isinstance(notes_raw, list)
+        else ()
+    )
+    storage = (
+        raw_entry.get("storage") if isinstance(raw_entry.get("storage"), dict) else None
+    )
 
     return DatasetInfo(
         ref=actual_ref,
         domain=domain,
         units=units,
-        name=raw_entry.get('name') if isinstance(raw_entry.get('name'), str) else actual_ref.set_id,
-        description=raw_entry.get('description') if isinstance(raw_entry.get('description'), str) else None,
-        usage_role=raw_entry.get('usage_role') if isinstance(raw_entry.get('usage_role'), str) else None,
-        semantic_class=raw_entry.get('semantic_class') if isinstance(raw_entry.get('semantic_class'), str) else None,
-        origin_class=raw_entry.get('origin_class') if isinstance(raw_entry.get('origin_class'), str) else None,
-        phase_context=raw_entry.get('phase_context') if isinstance(raw_entry.get('phase_context'), str) else None,
-        method_summary=raw_entry.get('method_summary') if isinstance(raw_entry.get('method_summary'), str) else None,
+        name=(
+            raw_entry.get("name")
+            if isinstance(raw_entry.get("name"), str)
+            else actual_ref.set_id
+        ),
+        description=(
+            raw_entry.get("description")
+            if isinstance(raw_entry.get("description"), str)
+            else None
+        ),
+        usage_role=(
+            raw_entry.get("usage_role")
+            if isinstance(raw_entry.get("usage_role"), str)
+            else None
+        ),
+        semantic_class=(
+            raw_entry.get("semantic_class")
+            if isinstance(raw_entry.get("semantic_class"), str)
+            else None
+        ),
+        origin_class=(
+            raw_entry.get("origin_class")
+            if isinstance(raw_entry.get("origin_class"), str)
+            else None
+        ),
+        phase_context=(
+            raw_entry.get("phase_context")
+            if isinstance(raw_entry.get("phase_context"), str)
+            else None
+        ),
+        method_summary=(
+            raw_entry.get("method_summary")
+            if isinstance(raw_entry.get("method_summary"), str)
+            else None
+        ),
         placeholder_value=(
-            float(raw_entry['placeholder_value'])
-            if raw_entry.get('placeholder_value') is not None
+            float(raw_entry["placeholder_value"])
+            if raw_entry.get("placeholder_value") is not None
+            else None
+        ),
+        extraction_source=(
+            raw_entry.get("extraction_source")
+            if isinstance(raw_entry.get("extraction_source"), str)
             else None
         ),
-        extraction_source=raw_entry.get('extraction_source') if isinstance(raw_entry.get('extraction_source'), str) else None,
         aliases=aliases,
         references=references,
         notes=notes,
         storage=storage,
-        coverage=_coerce_coverage(raw_entry.get('coverage')),
+        coverage=_coerce_coverage(raw_entry.get("coverage")),
     )
 
 
 @lru_cache(maxsize=None)
 def _load_csv_columns(filename: str) -> dict[str, tuple[float | None, ...]]:
-    path = resources.files('atomref.data').joinpath(filename)
-    with path.open('r', encoding='utf-8', newline='') as handle:
+    path = resources.files("atomref.data").joinpath(filename)
+    with path.open("r", encoding="utf-8", newline="") as handle:
         reader = csv.DictReader(handle)
-        if reader.fieldnames is None or 'z' not in reader.fieldnames:
-            raise DatasetError(f'invalid CSV file: {filename!r}')
-        columns = [name for name in reader.fieldnames if name != 'z']
+        if reader.fieldnames is None or "z" not in reader.fieldnames:
+            raise DatasetError(f"invalid CSV file: {filename!r}")
+        columns = [name for name in reader.fieldnames if name != "z"]
         values: dict[str, list[float | None]] = {name: [None] * 119 for name in columns}
         for row in reader:
-            z_text = row.get('z')
+            z_text = row.get("z")
             if z_text is None:
                 continue
             z = int(z_text)
@@ -353,19 +419,21 @@ def _load_csv_columns(filename: str) -> dict[str, tuple[float | None, ...]]:
 @lru_cache(maxsize=None)
 def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
     info = get_dataset_info(ref)
-    if info.domain != 'element':
-        raise DatasetError(f'only element-domain datasets are supported in v0.1: {info.ref!r}')
+    if info.domain != "element":
+        raise DatasetError(
+            f"only element-domain datasets are supported in v0.1: {info.ref!r}"
+        )
     if not isinstance(info.storage, Mapping):
-        raise DatasetError(f'missing storage metadata for dataset: {info.ref!r}')
+        raise DatasetError(f"missing storage metadata for dataset: {info.ref!r}")
 
-    filename = info.storage.get('filename')
-    column = info.storage.get('column')
+    filename = info.storage.get("filename")
+    column = info.storage.get("column")
     if not isinstance(filename, str) or not isinstance(column, str):
-        raise DatasetError(f'invalid storage metadata for dataset: {info.ref!r}')
+        raise DatasetError(f"invalid storage metadata for dataset: {info.ref!r}")
 
     table = _load_csv_columns(filename)
     if column not in table:
-        raise DatasetError(f'column {column!r} not found in {filename!r}')
+        raise DatasetError(f"column {column!r} not found in {filename!r}")
 
     return ElementScalarSet(ref=info.ref, info=info, values_by_z=table[column])
 
diff --git a/tests/radii/test_selection.py b/tests/radii/test_selection.py
index 9eb16de..e84a4f0 100644
--- a/tests/radii/test_selection.py
+++ b/tests/radii/test_selection.py
@@ -7,94 +7,93 @@
 
 
 def test_get_covalent_radius_default_prefers_cordero() -> None:
-    assert ar.get_covalent_radius('C') == pytest.approx(0.76)
+    assert ar.get_covalent_radius("C") == pytest.approx(0.76)
 
 
 def test_get_covalent_radius_maps_deuterium_to_hydrogen() -> None:
-    assert ar.get_covalent_radius('D') == pytest.approx(0.31)
+    assert ar.get_covalent_radius("D") == pytest.approx(0.31)
 
 
 def test_get_vdw_radius_default_prefers_alvarez() -> None:
-    assert ar.get_vdw_radius('C') == pytest.approx(1.77)
+    assert ar.get_vdw_radius("C") == pytest.approx(1.77)
 
 
 def test_completion_is_used_for_missing_base_values() -> None:
-    m = ar.lookup_covalent_radius('Bk')
+    m = ar.lookup_covalent_radius("Bk")
     assert m.value is not None
-    assert m.source == 'transfer_substitution'
+    assert m.source == "transfer_substitution"
 
-    m2 = ar.lookup_vdw_radius('Pm')
+    m2 = ar.lookup_vdw_radius("Pm")
     assert m2.value is not None
-    assert m2.source == 'transfer_linear'
+    assert m2.source == "transfer_linear"
     assert m2.value == pytest.approx(2.897226539514835)
 
 
 def test_linear_transfer_rejects_placeholder_values() -> None:
     scheme = ar.RadiiPolicy(
-        kind='van_der_waals',
-        base_set='bondi1964',
+        kind="van_der_waals",
+        base_set="bondi1964",
         transfers=(
             ar.LinearTransfer(
-                predictors=(ar.DatasetRef('van_der_waals_radius', 'csd_legacy_vdw'),)
+                predictors=(ar.DatasetRef("van_der_waals_radius", "csd_legacy_vdw"),)
             ),
         ),
     )
-    m = ar.lookup_vdw_radius('Be', policy=scheme)
+    m = ar.lookup_vdw_radius("Be", policy=scheme)
     assert m.value is None
-    assert m.source == 'missing'
-    assert any('placeholder' in s for s in m.notes)
+    assert m.source == "missing"
+    assert any("placeholder" in s for s in m.notes)
 
 
 def test_lookup_float_conversion() -> None:
-    m = ar.lookup_covalent_radius('C')
+    m = ar.lookup_covalent_radius("C")
     assert float(m) == pytest.approx(0.76)
 
-    m_missing = ar.lookup_covalent_radius('Xx')
+    m_missing = ar.lookup_covalent_radius("Xx")
     with pytest.raises(TypeError):
         float(m_missing)
 
 
 def test_override_precedes_base_value() -> None:
     policy = ar.RadiiPolicy(
-        kind='covalent',
-        base_set='cordero2008',
-        overrides={'C': 9.99},
+        kind="covalent",
+        base_set="cordero2008",
+        overrides={"C": 9.99},
     )
-    lookup = ar.lookup_covalent_radius('C', policy=policy)
-    assert lookup.source == 'override'
+    lookup = ar.lookup_covalent_radius("C", policy=policy)
+    assert lookup.source == "override"
     assert lookup.value == pytest.approx(9.99)
 
 
 def test_fallback_is_used_only_after_transfers_fail() -> None:
     policy = ar.RadiiPolicy(
-        kind='van_der_waals',
-        base_set='bondi1964',
+        kind="van_der_waals",
+        base_set="bondi1964",
         transfers=(
             ar.LinearTransfer(
-                predictors=(ar.DatasetRef('van_der_waals_radius', 'csd_legacy_vdw'),)
+                predictors=(ar.DatasetRef("van_der_waals_radius", "csd_legacy_vdw"),)
             ),
         ),
         fallback=2.5,
     )
-    lookup = ar.lookup_vdw_radius('Be', policy=policy)
-    assert lookup.source == 'fallback'
+    lookup = ar.lookup_vdw_radius("Be", policy=policy)
+    assert lookup.source == "fallback"
     assert lookup.value == pytest.approx(2.5)
-    assert any('placeholder' in note for note in lookup.notes)
+    assert any("placeholder" in note for note in lookup.notes)
 
 
 def test_linear_transfer_rejects_multiple_predictors_in_v0_1() -> None:
     policy = ar.RadiiPolicy(
-        kind='van_der_waals',
-        base_set='alvarez2013',
+        kind="van_der_waals",
+        base_set="alvarez2013",
         transfers=(
             ar.LinearTransfer(
                 predictors=(
-                    ar.DatasetRef('atomic_radius', 'rahm2016'),
-                    ar.DatasetRef('covalent_radius', 'cordero2008'),
+                    ar.DatasetRef("atomic_radius", "rahm2016"),
+                    ar.DatasetRef("covalent_radius", "cordero2008"),
                 )
             ),
         ),
     )
     with pytest.raises(PolicyError):
-        ar.lookup_vdw_radius('Pm', policy=policy)
-
+        ar.lookup_vdw_radius("Pm", policy=policy)

From 85cd280452b086e7cc8a563f0d27c45603e4b32f Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 19:09:48 +0300
Subject: [PATCH 05/15] Tightens CI

---
 .github/workflows/ci.yml        | 45 ++++++++++++++++-
 .github/workflows/docs.yml      |  2 +-
 tests/meta/test_package_data.py | 26 ++++++++++
 tests/meta/test_public_api.py   | 34 +++++++++++++
 tools/check_dist.py             | 88 +++++++++++++++++++++++++++++++++
 5 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 tests/meta/test_package_data.py
 create mode 100644 tests/meta/test_public_api.py
 create mode 100644 tools/check_dist.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 38602c6..942ee01 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,7 +17,21 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install .[dev]
       - name: Lint
-        run: flake8 src tests
+        run: flake8 src tests tools
+
+  docs-check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install docs extras
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install .[docs]
+      - name: Build docs
+        run: mkdocs build --strict
 
   test:
     runs-on: ubuntu-latest
@@ -35,3 +49,32 @@ jobs:
           python -m pip install .[test]
       - name: Test
         run: pytest
+
+  build-dist:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install build twine
+      - name: Build distributions
+        run: python -m build
+      - name: Validate metadata
+        run: python -m twine check dist/*
+      - name: Check packaged files
+        run: python tools/check_dist.py dist
+      - name: Install built wheel and smoke-test it
+        run: |
+          python -m pip install --force-reinstall --no-deps dist/*.whl
+          python - <<'PY'
+          import atomref as ar
+
+          assert ar.get_covalent_radius('C') == 0.76
+          assert ar.get_vdw_radius('C') == 1.77
+          assert 'atomic_radius' in ar.list_quantities()
+          assert 'rahm2016' in ar.list_dataset_ids('atomic_radius', usage_role='support')
+          PY
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 590aad5..70396d7 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -2,7 +2,7 @@ name: Docs
 
 on:
   push:
-    branches: [main]
+    branches: [main, master]
   workflow_dispatch:
 
 jobs:
diff --git a/tests/meta/test_package_data.py b/tests/meta/test_package_data.py
new file mode 100644
index 0000000..e5c393c
--- /dev/null
+++ b/tests/meta/test_package_data.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from importlib import resources
+import json
+
+
+def test_packaged_data_files_are_available() -> None:
+    data_root = resources.files('atomref.data')
+    for name in (
+        'periodic_table.csv',
+        'covalent.csv',
+        'van_der_waals.csv',
+        'registry.json',
+    ):
+        assert data_root.joinpath(name).is_file(), name
+
+
+def test_packaged_registry_keeps_atomic_support_classification() -> None:
+    data_root = resources.files('atomref.data')
+    raw = json.loads(data_root.joinpath('registry.json').read_text(encoding='utf-8'))
+
+    assert 'atomic_radius' in raw['datasets']
+    rahm = raw['datasets']['atomic_radius']['rahm2016']
+    assert rahm['usage_role'] == 'support'
+    assert rahm['semantic_class'] == 'atomic_isodensity'
+    assert rahm['phase_context'] == 'isolated_atom'
diff --git a/tests/meta/test_public_api.py b/tests/meta/test_public_api.py
new file mode 100644
index 0000000..b64f77d
--- /dev/null
+++ b/tests/meta/test_public_api.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import atomref as ar
+
+
+REQUIRED_PUBLIC_NAMES = {
+    'Element',
+    'DatasetRef',
+    'DatasetInfo',
+    'ElementScalarSet',
+    'QuantityInfo',
+    'LookupResult',
+    'RadiiPolicy',
+    'DEFAULT_COVALENT_POLICY',
+    'DEFAULT_VDW_POLICY',
+    'LinearTransfer',
+    'SubstitutionTransfer',
+    'get_covalent_radius',
+    'lookup_covalent_radius',
+    'get_vdw_radius',
+    'lookup_vdw_radius',
+    'list_quantities',
+    'list_dataset_ids',
+    'list_radii_sets',
+}
+
+
+def test___all___exports_existing_objects() -> None:
+    for name in ar.__all__:
+        assert hasattr(ar, name), name
+
+
+def test_core_public_api_names_are_exported() -> None:
+    assert REQUIRED_PUBLIC_NAMES.issubset(set(ar.__all__))
diff --git a/tools/check_dist.py b/tools/check_dist.py
new file mode 100644
index 0000000..3eb4c66
--- /dev/null
+++ b/tools/check_dist.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+import tarfile
+import zipfile
+
+
+REQUIRED_WHEEL_MEMBERS = {
+    'atomref/data/periodic_table.csv',
+    'atomref/data/covalent.csv',
+    'atomref/data/van_der_waals.csv',
+    'atomref/data/registry.json',
+    'atomref/py.typed',
+}
+
+REQUIRED_SDIST_SUFFIXES = {
+    'src/atomref/data/periodic_table.csv',
+    'src/atomref/data/covalent.csv',
+    'src/atomref/data/van_der_waals.csv',
+    'src/atomref/data/registry.json',
+    'src/atomref/py.typed',
+    'README.md',
+    'LICENSE',
+    'pyproject.toml',
+}
+
+
+class DistCheckError(RuntimeError):
+    """Raised when a built distribution is missing required members."""
+
+
+def _assert_members_present(
+    actual: set[str], required: set[str], *, label: str
+) -> None:
+    missing = sorted(required - actual)
+    if missing:
+        joined = ', '.join(missing)
+        raise DistCheckError(f'{label} is missing required members: {joined}')
+
+
+def _members_matching_suffixes(actual: set[str], suffixes: set[str]) -> set[str]:
+    matched: set[str] = set()
+    for suffix in suffixes:
+        if any(name.endswith(suffix) for name in actual):
+            matched.add(suffix)
+    return matched
+
+
+def check_wheel(path: Path) -> None:
+    with zipfile.ZipFile(path) as zf:
+        names = set(zf.namelist())
+    matched = {
+        member
+        for member in REQUIRED_WHEEL_MEMBERS
+        if any(name.endswith(member) for name in names)
+    }
+    _assert_members_present(matched, REQUIRED_WHEEL_MEMBERS, label=path.name)
+
+
+def check_sdist(path: Path) -> None:
+    with tarfile.open(path, 'r:gz') as tf:
+        names = {member.name for member in tf.getmembers()}
+    matched = _members_matching_suffixes(names, REQUIRED_SDIST_SUFFIXES)
+    _assert_members_present(matched, REQUIRED_SDIST_SUFFIXES, label=path.name)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('dist_dir', type=Path, nargs='?', default=Path('dist'))
+    args = parser.parse_args()
+
+    dist_dir = args.dist_dir
+    wheels = sorted(dist_dir.glob('*.whl'))
+    sdists = sorted(dist_dir.glob('*.tar.gz'))
+    if not wheels:
+        raise DistCheckError(f'no wheel files found in {dist_dir}')
+    if not sdists:
+        raise DistCheckError(f'no source distributions found in {dist_dir}')
+
+    for wheel in wheels:
+        check_wheel(wheel)
+    for sdist in sdists:
+        check_sdist(sdist)
+
+
+if __name__ == '__main__':
+    main()

From 5bca61d0e325f2c113ec058401993f0568b656bf Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 19:41:14 +0300
Subject: [PATCH 06/15] Tightens data validation

---
 .github/workflows/ci.yml              |   2 +
 README.md                             |   3 +
 docs/dev/data_curation.md             |  19 +++
 docs/index.md                         |   3 +
 tests/meta/test_registry_integrity.py |  80 ++++++++++++
 tools/check_registry.py               | 172 ++++++++++++++++++++++++++
 6 files changed, 279 insertions(+)
 create mode 100644 tests/meta/test_registry_integrity.py
 create mode 100644 tools/check_registry.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 942ee01..3512c2a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,6 +18,8 @@ jobs:
           python -m pip install .[dev]
       - name: Lint
         run: flake8 src tests tools
+      - name: Validate packaged registry
+        run: python tools/check_registry.py
 
   docs-check:
     runs-on: ubuntu-latest
diff --git a/README.md b/README.md
index fc2a6fd..5514004 100644
--- a/README.md
+++ b/README.md
@@ -75,3 +75,6 @@ fits naturally beneath:
 
 Those packages should consume atomic reference data from `atomref` rather than
 re-curating such datasets independently.
+
+For data-curation changes, validate the packaged registry against the bundled
+CSV tables with `python tools/check_registry.py`.
diff --git a/docs/dev/data_curation.md b/docs/dev/data_curation.md
index 02f406b..689ae24 100644
--- a/docs/dev/data_curation.md
+++ b/docs/dev/data_curation.md
@@ -5,3 +5,22 @@ metadata and provenance live in `src/atomref/data/registry.json`.
 
 Placeholder values are modeled as dataset metadata, not as hard-coded Python
 constants.
+
+The registry distinguishes several orthogonal concerns:
+
+- `quantity` — the operational lookup target, such as `covalent_radius` or
+  `van_der_waals_radius`
+- `semantic_class` — what the dataset scientifically represents
+- `usage_role` — whether the dataset is intended as a direct target set or as
+  support data for transfer
+- `phase_context` — the physical context of the underlying values
+
+This matters for support-only datasets such as `atomic_radius:rahm2016`, which
+is packaged as atomic support data and then used by the default van der Waals
+policy through linear transfer.
+
+To check that metadata and packaged tables stay synchronized, run:
+
+```bash
+python tools/check_registry.py
+```
diff --git a/docs/index.md b/docs/index.md
index fc2a6fd..5514004 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -75,3 +75,6 @@ fits naturally beneath:
 
 Those packages should consume atomic reference data from `atomref` rather than
 re-curating such datasets independently.
+
+For data-curation changes, validate the packaged registry against the bundled
+CSV tables with `python tools/check_registry.py`.
diff --git a/tests/meta/test_registry_integrity.py b/tests/meta/test_registry_integrity.py
new file mode 100644
index 0000000..853df5c
--- /dev/null
+++ b/tests/meta/test_registry_integrity.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from collections import defaultdict
+from dataclasses import asdict
+
+import atomref as ar
+from atomref.registry import get_builtin_set
+
+_ALLOWED_USAGE_ROLES = {"target", "support"}
+
+
+def _canonical_token(value: str) -> str:
+    return " ".join(value.strip().lower().split())
+
+
+def test_dataset_aliases_are_unique_within_each_quantity() -> None:
+    for quantity in ar.list_quantities():
+        seen: dict[str, str] = {}
+        for set_id in ar.list_dataset_ids(quantity):
+            info = ar.get_dataset_info(ar.DatasetRef(quantity, set_id))
+            for token in (set_id, *info.aliases):
+                key = _canonical_token(token)
+                previous = seen.get(key)
+                assert previous in (None, set_id)
+                seen[key] = set_id
+
+
+def test_every_built_in_dataset_loads_and_matches_coverage_metadata() -> None:
+    for quantity in ar.list_quantities():
+        quantity_info = ar.get_quantity_info(quantity)
+        for set_id in ar.list_dataset_ids(quantity):
+            ref = ar.DatasetRef(quantity, set_id)
+            info = ar.get_dataset_info(ref)
+            dataset = get_builtin_set(ref)
+
+            assert info.domain == quantity_info.domain
+            assert info.units == quantity_info.units
+            assert info.usage_role in _ALLOWED_USAGE_ROLES
+            assert info.references
+            assert info.coverage is not None
+
+            max_z = (
+                info.coverage.z_max
+                if info.coverage.z_max is not None
+                else len(dataset.values_by_z) - 1
+            )
+            covered_z = tuple(
+                z
+                for z, value in enumerate(dataset.values_by_z)
+                if z > 0 and value is not None and z <= max_z
+            )
+            covered_set = set(covered_z)
+            missing_z = tuple(z for z in range(1, max_z + 1) if z not in covered_set)
+            has_placeholders = info.placeholder_value is not None and any(
+                value is not None and abs(value - info.placeholder_value) < 1e-12
+                for value in dataset.values_by_z[1 : max_z + 1]
+            )
+
+            coverage = asdict(info.coverage)
+            assert coverage["n_values"] == len(covered_z)
+            assert coverage["z_min"] == (min(covered_z) if covered_z else None)
+            assert coverage["z_max"] == (max(covered_z) if covered_z else None)
+            assert coverage["has_placeholders"] is has_placeholders
+            if coverage["covered_z"]:
+                assert tuple(coverage["covered_z"]) == covered_z
+            if coverage["missing_z"]:
+                assert tuple(coverage["missing_z"]) == missing_z
+
+
+def test_non_atomic_quantities_have_at_least_one_target_dataset() -> None:
+    by_role: dict[str, list[str]] = defaultdict(list)
+    for quantity in ar.list_quantities():
+        for set_id in ar.list_dataset_ids(quantity):
+            role = ar.get_dataset_info(ar.DatasetRef(quantity, set_id)).usage_role
+            assert role is not None
+            by_role[role].append(quantity)
+
+    for quantity in ar.list_quantities():
+        if quantity != "atomic_radius":
+            assert quantity in by_role["target"]
diff --git a/tools/check_registry.py b/tools/check_registry.py
new file mode 100644
index 0000000..a57f49f
--- /dev/null
+++ b/tools/check_registry.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""Validate packaged registry metadata against bundled CSV tables."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from dataclasses import asdict
+from pathlib import Path
+import sys
+from typing import Iterable
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SRC = REPO_ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+import atomref as ar
+from atomref.registry import get_builtin_set
+
+_ALLOWED_USAGE_ROLES = {"target", "support"}
+
+
+def _canonical_token(value: str) -> str:
+    return " ".join(value.strip().lower().split())
+
+
+def _iter_dataset_refs() -> Iterable[ar.DatasetRef]:
+    for quantity in ar.list_quantities():
+        for set_id in ar.list_dataset_ids(quantity):
+            yield ar.DatasetRef(quantity, set_id)
+
+
+def _validate_alias_collisions(errors: list[str]) -> None:
+    for quantity in ar.list_quantities():
+        seen: dict[str, str] = {}
+        for set_id in ar.list_dataset_ids(quantity):
+            info = ar.get_dataset_info(ar.DatasetRef(quantity, set_id))
+            for token in (set_id, *info.aliases):
+                key = _canonical_token(token)
+                previous = seen.get(key)
+                if previous is not None and previous != set_id:
+                    msg = (
+                        f"alias collision in {quantity!r}: {token!r} resolves to both "
+                        f"{previous!r} and {set_id!r}"
+                    )
+                    errors.append(msg)
+                else:
+                    seen[key] = set_id
+
+
+def _validate_dataset_metadata(errors: list[str]) -> None:
+    quantities = set(ar.list_quantities())
+    by_role: dict[str, list[str]] = defaultdict(list)
+
+    for ref in _iter_dataset_refs():
+        quantity_info = ar.get_quantity_info(ref.quantity)
+        info = ar.get_dataset_info(ref)
+        dataset = get_builtin_set(ref)
+
+        if info.ref != ref:
+            errors.append(f"dataset ref mismatch: requested {ref!r}, got {info.ref!r}")
+
+        if info.domain != quantity_info.domain:
+            msg = (
+                f"domain mismatch for {ref!r}: quantity={quantity_info.domain!r}, "
+                f"dataset={info.domain!r}"
+            )
+            errors.append(msg)
+
+        if info.units != quantity_info.units:
+            msg = (
+                f"units mismatch for {ref!r}: quantity={quantity_info.units!r}, "
+                f"dataset={info.units!r}"
+            )
+            errors.append(msg)
+
+        if info.usage_role not in _ALLOWED_USAGE_ROLES:
+            errors.append(f"invalid usage_role for {ref!r}: {info.usage_role!r}")
+        else:
+            by_role[info.usage_role].append(ref.quantity)
+
+        if not info.references:
+            errors.append(f"missing references for {ref!r}")
+
+        if info.storage is None:
+            errors.append(f"missing storage metadata for {ref!r}")
+        else:
+            filename = info.storage.get("filename")
+            column = info.storage.get("column")
+            fmt = info.storage.get("format")
+            if not isinstance(filename, str) or not filename:
+                errors.append(f"invalid storage filename for {ref!r}: {filename!r}")
+            if not isinstance(column, str) or not column:
+                errors.append(f"invalid storage column for {ref!r}: {column!r}")
+            if fmt != "dense_by_z_csv":
+                errors.append(f"unsupported storage format for {ref!r}: {fmt!r}")
+
+        coverage = info.coverage
+        if coverage is None:
+            errors.append(f"missing coverage metadata for {ref!r}")
+            max_z = len(dataset.values_by_z) - 1
+        else:
+            max_z = (
+                coverage.z_max
+                if coverage.z_max is not None
+                else len(dataset.values_by_z) - 1
+            )
+
+        covered_z = tuple(
+            z
+            for z, value in enumerate(dataset.values_by_z)
+            if z > 0 and value is not None and z <= max_z
+        )
+        covered_set = set(covered_z)
+        missing_z = tuple(z for z in range(1, max_z + 1) if z not in covered_set)
+        has_placeholders = info.placeholder_value is not None and any(
+            value is not None and abs(value - info.placeholder_value) < 1e-12
+            for value in dataset.values_by_z[1 : max_z + 1]
+        )
+
+        if coverage is not None:
+            expected = {
+                "n_values": len(covered_z),
+                "z_min": min(covered_z) if covered_z else None,
+                "z_max": max(covered_z) if covered_z else None,
+                "has_placeholders": has_placeholders,
+            }
+            actual = asdict(coverage)
+            for key, value in expected.items():
+                if actual[key] != value:
+                    msg = (
+                        f"coverage mismatch for {ref!r}: {key} is {actual[key]!r}, "
+                        f"expected {value!r}"
+                    )
+                    errors.append(msg)
+            if actual["covered_z"] and tuple(actual["covered_z"]) != covered_z:
+                msg = (
+                    f"coverage mismatch for {ref!r}: covered_z is "
+                    f"{actual['covered_z']!r}, expected {covered_z!r}"
+                )
+                errors.append(msg)
+            if actual["missing_z"] and tuple(actual["missing_z"]) != missing_z:
+                msg = (
+                    f"coverage mismatch for {ref!r}: missing_z is "
+                    f"{actual['missing_z']!r}, expected {missing_z!r}"
+                )
+                errors.append(msg)
+
+        if ref.quantity not in quantities:
+            errors.append(f"dataset refers to unknown quantity: {ref!r}")
+
+    for quantity in quantities:
+        if quantity not in by_role.get("target", []) and quantity != "atomic_radius":
+            errors.append(f"quantity {quantity!r} has no target datasets")
+
+
+def main() -> int:
+    errors: list[str] = []
+    _validate_alias_collisions(errors)
+    _validate_dataset_metadata(errors)
+
+    if errors:
+        for error in errors:
+            print(f"ERROR: {error}")
+        return 1
+
+    print("Registry validation passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())

From b84372116c240ee5788b4875c2296be398c12ac9 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 20:27:27 +0300
Subject: [PATCH 07/15] Adds data listing functionality

---
 README.md                       |  4 ++--
 docs/datasets/index.md          |  2 +-
 docs/guide/quickstart.md        |  2 ++
 docs/index.md                   |  4 ++--
 src/atomref/__init__.py         |  4 ++++
 src/atomref/radii.py            |  7 +++++++
 src/atomref/registry.py         |  9 +++++++++
 tests/meta/test_public_api.py   |  2 ++
 tests/registry/test_registry.py | 12 ++++++++++++
 tools/check_registry.py         | 28 +++++++++++++++++++---------
 10 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 5514004..d9f0d6c 100644
--- a/README.md
+++ b/README.md
@@ -60,8 +60,8 @@ import atomref as ar
 
 print(ar.list_quantities())
 print(ar.get_quantity_info("atomic_radius"))
-print(ar.list_dataset_ids("van_der_waals_radius", usage_role="target"))
-print(ar.list_dataset_ids("atomic_radius", usage_role="support"))
+print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
+print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
 ```
 
 ## Relationship to the Delone Commons ecosystem
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
index 1095225..cbd132e 100644
--- a/docs/datasets/index.md
+++ b/docs/datasets/index.md
@@ -9,7 +9,7 @@ The package distinguishes between:
 This is what keeps support-only datasets such as `rahm2016` usable without
 misclassifying them as direct condensed-phase vdW radii.
 
-For programmatic inspection, use `atomref.list_quantities()` and `atomref.get_quantity_info(...)`.
+For programmatic inspection, use `atomref.list_quantities()`, `atomref.get_quantity_info(...)`, and `atomref.list_dataset_infos(...)`.
 
 Dataset metadata also carries a package-level `usage_role`, which currently
 distinguishes direct target sets from support-only sets used for substitution or
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
index 5cb1637..eb55fd3 100644
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@@ -22,5 +22,7 @@ import atomref as ar
 
 print(ar.list_quantities())
 print(ar.get_quantity_info("atomic_radius"))
+print(ar.list_dataset_infos("covalent_radius"))
+print(ar.list_radii_set_infos("van_der_waals", usage_role="target"))
 ```
 
diff --git a/docs/index.md b/docs/index.md
index 5514004..d9f0d6c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -60,8 +60,8 @@ import atomref as ar
 
 print(ar.list_quantities())
 print(ar.get_quantity_info("atomic_radius"))
-print(ar.list_dataset_ids("van_der_waals_radius", usage_role="target"))
-print(ar.list_dataset_ids("atomic_radius", usage_role="support"))
+print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
+print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
 ```
 
 ## Relationship to the Delone Commons ecosystem
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
index 1b0ef6b..d08e1ed 100644
--- a/src/atomref/__init__.py
+++ b/src/atomref/__init__.py
@@ -17,6 +17,7 @@
     get_covalent_radius,
     get_radii_set_info,
     get_vdw_radius,
+    list_radii_set_infos,
     list_radii_sets,
     lookup_covalent_radius,
     lookup_vdw_radius,
@@ -31,6 +32,7 @@
     get_dataset_info,
     get_quantity_info,
     list_dataset_ids,
+    list_dataset_infos,
     list_quantities,
 )
 from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer
@@ -51,6 +53,7 @@
     "get_dataset_info",
     "get_quantity_info",
     "list_dataset_ids",
+    "list_dataset_infos",
     "list_quantities",
     "LinearFit",
     "LinearTransfer",
@@ -63,6 +66,7 @@
     "DEFAULT_COVALENT_POLICY",
     "DEFAULT_VDW_POLICY",
     "list_radii_sets",
+    "list_radii_set_infos",
     "get_radii_set_info",
     "lookup_covalent_radius",
     "get_covalent_radius",
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index 1095667..a5ede9e 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -15,6 +15,7 @@
     ElementScalarSet,
     get_dataset_info,
     list_dataset_ids,
+    list_dataset_infos,
 )
 from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer, TransferModel
 
@@ -127,6 +128,12 @@ def list_radii_sets(
     return list_dataset_ids(_quantity_for_kind(kind), usage_role=usage_role)
 
 
+def list_radii_set_infos(
+    kind: RadiiKind, *, usage_role: str | None = None
+) -> tuple[DatasetInfo, ...]:
+    return list_dataset_infos(_quantity_for_kind(kind), usage_role=usage_role)
+
+
 def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
     return get_dataset_info(DatasetRef(_quantity_for_kind(kind), set_id))
 
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index f84b14b..c465786 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -253,6 +253,15 @@ def list_dataset_ids(
     return tuple(filtered)
 
 
+def list_dataset_infos(
+    quantity: QuantityId, *, usage_role: str | None = None
+) -> tuple[DatasetInfo, ...]:
+    return tuple(
+        get_dataset_info(DatasetRef(quantity, set_id))
+        for set_id in list_dataset_ids(quantity, usage_role=usage_role)
+    )
+
+
 def _coerce_reference(obj: object) -> Reference:
     if not isinstance(obj, dict):
         raise DatasetError("invalid reference entry in registry.json")
diff --git a/tests/meta/test_public_api.py b/tests/meta/test_public_api.py
index b64f77d..a6cb329 100644
--- a/tests/meta/test_public_api.py
+++ b/tests/meta/test_public_api.py
@@ -21,7 +21,9 @@
     'lookup_vdw_radius',
     'list_quantities',
     'list_dataset_ids',
+    'list_dataset_infos',
     'list_radii_sets',
+    'list_radii_set_infos',
 }
 
 
diff --git a/tests/registry/test_registry.py b/tests/registry/test_registry.py
index 23b401e..3cfaec5 100644
--- a/tests/registry/test_registry.py
+++ b/tests/registry/test_registry.py
@@ -68,3 +68,15 @@ def test_list_dataset_ids_can_filter_by_usage_role() -> None:
 def test_list_radii_sets_can_filter_by_usage_role() -> None:
     assert ar.list_radii_sets('covalent', usage_role='support') == ('csd_legacy_cov',)
     assert 'alvarez2013' in ar.list_radii_sets('van_der_waals', usage_role='target')
+
+
+def test_list_dataset_infos_can_filter_by_usage_role() -> None:
+    infos = ar.list_dataset_infos('atomic_radius', usage_role='support')
+    assert tuple(info.ref.set_id for info in infos) == ('rahm2016',)
+    assert all(info.usage_role == 'support' for info in infos)
+
+
+def test_list_radii_set_infos_can_filter_by_usage_role() -> None:
+    infos = ar.list_radii_set_infos('van_der_waals', usage_role='target')
+    assert 'alvarez2013' in {info.ref.set_id for info in infos}
+    assert all(info.ref.quantity == 'van_der_waals_radius' for info in infos)
diff --git a/tools/check_registry.py b/tools/check_registry.py
index a57f49f..02b1e14 100644
--- a/tools/check_registry.py
+++ b/tools/check_registry.py
@@ -5,6 +5,7 @@
 
 from collections import defaultdict
 from dataclasses import asdict
+from importlib import import_module
 from pathlib import Path
 import sys
 from typing import Iterable
@@ -14,27 +15,35 @@
 if str(SRC) not in sys.path:
     sys.path.insert(0, str(SRC))
 
-import atomref as ar
-from atomref.registry import get_builtin_set
-
 _ALLOWED_USAGE_ROLES = {"target", "support"}
 
 
+def _load_atomref_module():
+    return import_module("atomref")
+
+
+def _get_builtin_set(ref):
+    registry = import_module("atomref.registry")
+    return registry.get_builtin_set(ref)
+
+
 def _canonical_token(value: str) -> str:
     return " ".join(value.strip().lower().split())
 
 
-def _iter_dataset_refs() -> Iterable[ar.DatasetRef]:
+def _iter_dataset_refs() -> Iterable[object]:
+    ar = _load_atomref_module()
     for quantity in ar.list_quantities():
-        for set_id in ar.list_dataset_ids(quantity):
-            yield ar.DatasetRef(quantity, set_id)
+        for info in ar.list_dataset_infos(quantity):
+            yield info.ref
 
 
 def _validate_alias_collisions(errors: list[str]) -> None:
+    ar = _load_atomref_module()
     for quantity in ar.list_quantities():
         seen: dict[str, str] = {}
-        for set_id in ar.list_dataset_ids(quantity):
-            info = ar.get_dataset_info(ar.DatasetRef(quantity, set_id))
+        for info in ar.list_dataset_infos(quantity):
+            set_id = info.ref.set_id
             for token in (set_id, *info.aliases):
                 key = _canonical_token(token)
                 previous = seen.get(key)
@@ -49,13 +58,14 @@ def _validate_alias_collisions(errors: list[str]) -> None:
 
 
 def _validate_dataset_metadata(errors: list[str]) -> None:
+    ar = _load_atomref_module()
     quantities = set(ar.list_quantities())
     by_role: dict[str, list[str]] = defaultdict(list)
 
     for ref in _iter_dataset_refs():
         quantity_info = ar.get_quantity_info(ref.quantity)
         info = ar.get_dataset_info(ref)
-        dataset = get_builtin_set(ref)
+        dataset = _get_builtin_set(ref)
 
         if info.ref != ref:
             errors.append(f"dataset ref mismatch: requested {ref!r}, got {info.ref!r}")

From 9cf2b71e17de851e96df5d47943d1b30ecd88983 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 21:54:39 +0300
Subject: [PATCH 08/15] Improves docs

---
 .github/workflows/ci.yml                     |  2 +
 README.md                                    | 12 +++
 docs/guide/notebooks.md                      | 17 ++++
 docs/guide/quickstart.md                     |  6 ++
 docs/index.md                                | 12 +++
 mkdocs.yml                                   |  1 +
 notebooks/01-quickstart.ipynb                | 77 ++++++++++++++++
 notebooks/02-policies-and-assessment.ipynb   | 96 ++++++++++++++++++++
 notebooks/03-custom-sets-and-discovery.ipynb | 78 ++++++++++++++++
 pyproject.toml                               |  1 +
 tests/meta/test_notebooks.py                 | 24 +++++
 tools/check_dist.py                          |  4 +
 tools/check_notebooks.py                     | 95 +++++++++++++++++++
 13 files changed, 425 insertions(+)
 create mode 100644 docs/guide/notebooks.md
 create mode 100644 notebooks/01-quickstart.ipynb
 create mode 100644 notebooks/02-policies-and-assessment.ipynb
 create mode 100644 notebooks/03-custom-sets-and-discovery.ipynb
 create mode 100644 tests/meta/test_notebooks.py
 create mode 100644 tools/check_notebooks.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3512c2a..12d3421 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,6 +20,8 @@ jobs:
         run: flake8 src tests tools
       - name: Validate packaged registry
         run: python tools/check_registry.py
+      - name: Validate notebooks
+        run: python tools/check_notebooks.py
 
   docs-check:
     runs-on: ubuntu-latest
diff --git a/README.md b/README.md
index d9f0d6c..b94dd31 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,18 @@ print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
 print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
 ```
 
+## Notebooks
+
+Hands-on notebooks live in the repository and mirror the main v0.1 workflows:
+
+- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
+- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
+- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+
+Open them locally in Jupyter or browse them on GitHub for worked examples of
+lookup, transfer-backed policies, dataset discovery, and custom element-scalar
+sets.
+
 ## Relationship to the Delone Commons ecosystem
 
 `atomref` is intended to be reusable outside the surrounding ecosystem, but it
diff --git a/docs/guide/notebooks.md b/docs/guide/notebooks.md
new file mode 100644
index 0000000..9d39376
--- /dev/null
+++ b/docs/guide/notebooks.md
@@ -0,0 +1,17 @@
+# Notebook gallery
+
+`atomref` ships example Jupyter notebooks that mirror the main v0.1 user
+workflows. They live in the repository under `notebooks/` and can be opened
+locally with JupyterLab, VS Code, or any other notebook frontend.
+
+Available notebooks:
+
+- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
+  – basic imports, element helpers, `get_*` vs `lookup_*`, quantity discovery.
+- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
+  – transfer policies, substitution vs linear transfer, policy assessment.
+- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+  – custom user-defined sets, catalog inspection, metadata discovery.
+
+The notebooks are plain JSON files without heavy execution metadata so they stay
+diff-friendly in version control.
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
index eb55fd3..5017079 100644
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@@ -26,3 +26,9 @@ print(ar.list_dataset_infos("covalent_radius"))
 print(ar.list_radii_set_infos("van_der_waals", usage_role="target"))
 ```
 
+Need runnable versions of these examples? See the notebooks page and the
+matching notebook files in the repository:
+
+- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
+- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
+- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
diff --git a/docs/index.md b/docs/index.md
index d9f0d6c..b94dd31 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -64,6 +64,18 @@ print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
 print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
 ```
 
+## Notebooks
+
+Hands-on notebooks live in the repository and mirror the main v0.1 workflows:
+
+- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
+- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
+- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+
+Open them locally in Jupyter or browse them on GitHub for worked examples of
+lookup, transfer-backed policies, dataset discovery, and custom element-scalar
+sets.
+
 ## Relationship to the Delone Commons ecosystem
 
 `atomref` is intended to be reusable outside the surrounding ecosystem, but it
diff --git a/mkdocs.yml b/mkdocs.yml
index 8b5060c..2a97e1a 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -20,6 +20,7 @@ nav:
   - Guide:
       - Install: guide/install.md
       - Quickstart: guide/quickstart.md
+      - Notebooks: guide/notebooks.md
       - Policies: guide/policies.md
       - Custom sets: guide/custom_sets.md
       - Non-goals: guide/non_goals.md
diff --git a/notebooks/01-quickstart.ipynb b/notebooks/01-quickstart.ipynb
new file mode 100644
index 0000000..2c09cc0
--- /dev/null
+++ b/notebooks/01-quickstart.ipynb
@@ -0,0 +1,77 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# atomref quickstart\n",
+        "\n",
+        "This notebook covers the basic public API: element helpers, direct `get_*` calls, provenance-carrying `lookup_*` calls, and quantity / dataset discovery.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import atomref as ar\n",
+        "\n",
+        "print(ar.get_element(\"Cl\"))\n",
+        "print(ar.list_quantities())\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "r_c = ar.get_covalent_radius(\"C\")\n",
+        "r_vdw = ar.get_vdw_radius(\"O\")\n",
+        "print(r_c)\n",
+        "print(r_vdw)\n",
+        "assert r_c == 0.76\n",
+        "assert r_vdw == 1.50\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "lookup = ar.lookup_vdw_radius(\"Pm\")\n",
+        "print(lookup)\n",
+        "print(lookup.value)\n",
+        "print(lookup.source)\n",
+        "print(lookup.resolved_from)\n",
+        "assert lookup.source == \"transfer_linear\"\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(ar.get_quantity_info(\"atomic_radius\"))\n",
+        "for info in ar.list_dataset_infos(\"van_der_waals_radius\", usage_role=\"target\"):\n",
+        "    print(info.ref.set_id, info.semantic_class, info.origin_class)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/notebooks/02-policies-and-assessment.ipynb b/notebooks/02-policies-and-assessment.ipynb
new file mode 100644
index 0000000..7db7e45
--- /dev/null
+++ b/notebooks/02-policies-and-assessment.ipynb
@@ -0,0 +1,96 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Policies and assessment\n",
+        "\n",
+        "This notebook shows how `atomref` resolves missing values through ordered transfer steps and how to inspect policy-level behavior.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import atomref as ar\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "covalent_policy = ar.RadiiPolicy(\n",
+        "    kind=\"covalent\",\n",
+        "    base_set=\"cordero2008\",\n",
+        "    transfers=(\n",
+        "        ar.SubstitutionTransfer(\n",
+        "            source=ar.DatasetRef(\"covalent_radius\", \"csd_legacy_cov\")\n",
+        "        ),\n",
+        "    ),\n",
+        ")\n",
+        "\n",
+        "lookup_bk = ar.lookup_covalent_radius(\"Bk\", policy=covalent_policy)\n",
+        "print(lookup_bk)\n",
+        "assert lookup_bk.source == \"transfer_substitution\"\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "vdw_policy = ar.RadiiPolicy(\n",
+        "    kind=\"van_der_waals\",\n",
+        "    base_set=\"alvarez2013\",\n",
+        "    transfers=(\n",
+        "        ar.LinearTransfer(\n",
+        "            predictors=(ar.DatasetRef(\"atomic_radius\", \"rahm2016\"),)\n",
+        "        ),\n",
+        "    ),\n",
+        ")\n",
+        "\n",
+        "lookup_pm = ar.lookup_vdw_radius(\"Pm\", policy=vdw_policy)\n",
+        "print(lookup_pm.fit)\n",
+        "print(lookup_pm.value)\n",
+        "assert lookup_pm.source == \"transfer_linear\"\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "assessment = ar.assess_radii_policy(\n",
+        "    [\"C\", \"Xe\", \"Pm\", \"Bk\"],\n",
+        "    policy=vdw_policy,\n",
+        "    detail=True,\n",
+        ")\n",
+        "\n",
+        "print(assessment)\n",
+        "print(assessment.n_base, assessment.n_transfer_linear)\n",
+        "for item in assessment.per_element:\n",
+        "    print(item.symbol, item.lookup.source)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/notebooks/03-custom-sets-and-discovery.ipynb b/notebooks/03-custom-sets-and-discovery.ipynb
new file mode 100644
index 0000000..827c91f
--- /dev/null
+++ b/notebooks/03-custom-sets-and-discovery.ipynb
@@ -0,0 +1,78 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Custom sets and dataset discovery\n",
+        "\n",
+        "This notebook shows how to define a small user-provided set, plug it into a policy, and inspect the packaged dataset catalog.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import atomref as ar\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "custom_cov = ar.ElementScalarSet.from_mapping(\n",
+        "    ref=ar.DatasetRef(\"covalent_radius\", \"demo_user_cov\"),\n",
+        "    values={\"C\": 0.77, \"O\": 0.67},\n",
+        "    name=\"Demo user covalent set\",\n",
+        "    units=\"angstrom\",\n",
+        "    description=\"Example custom set for notebook usage.\",\n",
+        "    notes=(\"Notebook example\",),\n",
+        ")\n",
+        "\n",
+        "policy = ar.RadiiPolicy(\n",
+        "    kind=\"covalent\",\n",
+        "    base_set=custom_cov,\n",
+        "    transfers=(\n",
+        "        ar.SubstitutionTransfer(\n",
+        "            source=ar.DatasetRef(\"covalent_radius\", \"cordero2008\")\n",
+        "        ),\n",
+        "    ),\n",
+        ")\n",
+        "\n",
+        "for symbol in (\"C\", \"O\", \"N\"):\n",
+        "    print(symbol, ar.lookup_covalent_radius(symbol, policy=policy))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "for info in ar.list_radii_set_infos(\"van_der_waals\", usage_role=\"target\"):\n",
+        "    print(info.ref.set_id, info.semantic_class, info.origin_class, info.phase_context)\n",
+        "\n",
+        "rahm = ar.get_dataset_info(ar.DatasetRef(\"atomic_radius\", \"rahm2016\"))\n",
+        "print(rahm.name)\n",
+        "print(rahm.semantic_class, rahm.phase_context, rahm.usage_role)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/pyproject.toml b/pyproject.toml
index ea2b569..065faab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,6 +70,7 @@ include = [
   "/tests",
   "/docs",
   "/tools",
+  "/notebooks",
   "/mkdocs.yml",
   "/README.md",
   "/CHANGELOG.md",
diff --git a/tests/meta/test_notebooks.py b/tests/meta/test_notebooks.py
new file mode 100644
index 0000000..f49775f
--- /dev/null
+++ b/tests/meta/test_notebooks.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from pathlib import Path
+import subprocess
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+SCRIPT = REPO_ROOT / "tools" / "check_notebooks.py"
+NOTEBOOKS = REPO_ROOT / "notebooks"
+
+
+def test_notebook_files_exist() -> None:
+    expected = {
+        "01-quickstart.ipynb",
+        "02-policies-and-assessment.ipynb",
+        "03-custom-sets-and-discovery.ipynb",
+    }
+    actual = {path.name for path in NOTEBOOKS.glob("*.ipynb")}
+    assert expected.issubset(actual)
+
+
+def test_notebooks_validate_and_execute() -> None:
+    subprocess.run([sys.executable, str(SCRIPT)], cwd=REPO_ROOT, check=True)
diff --git a/tools/check_dist.py b/tools/check_dist.py
index 3eb4c66..b9d80b5 100644
--- a/tools/check_dist.py
+++ b/tools/check_dist.py
@@ -23,6 +23,10 @@
     'README.md',
     'LICENSE',
     'pyproject.toml',
+    'notebooks/01-quickstart.ipynb',
+    'notebooks/02-policies-and-assessment.ipynb',
+    'notebooks/03-custom-sets-and-discovery.ipynb',
+    'tools/check_notebooks.py',
 }
 
 
diff --git a/tools/check_notebooks.py b/tools/check_notebooks.py
new file mode 100644
index 0000000..830d742
--- /dev/null
+++ b/tools/check_notebooks.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""Validate notebook JSON structure and execute notebook code cells."""
+
+from __future__ import annotations
+
+from contextlib import redirect_stdout
+import io
+import json
+from pathlib import Path
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SRC = REPO_ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+NOTEBOOKS = REPO_ROOT / "notebooks"
+REQUIRED_NOTEBOOKS = (
+    "01-quickstart.ipynb",
+    "02-policies-and-assessment.ipynb",
+    "03-custom-sets-and-discovery.ipynb",
+)
+
+
+class NotebookCheckError(RuntimeError):
+    """Raised when a notebook is malformed or fails to execute."""
+
+
+def iter_notebooks() -> tuple[Path, ...]:
+    return tuple(NOTEBOOKS / name for name in REQUIRED_NOTEBOOKS)
+
+
+def load_notebook(path: Path) -> dict[str, object]:
+    data = json.loads(path.read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise NotebookCheckError(f"{path.name}: expected top-level JSON object")
+    return data
+
+
+def iter_code_cells(data: dict[str, object], *, path: Path) -> tuple[str, ...]:
+    cells = data.get("cells")
+    if not isinstance(cells, list):
+        raise NotebookCheckError(f"{path.name}: missing notebook cell list")
+
+    code: list[str] = []
+    for index, cell in enumerate(cells):
+        if not isinstance(cell, dict):
+            raise NotebookCheckError(f"{path.name}: cell {index} is not an object")
+        cell_type = cell.get("cell_type")
+        if cell_type != "code":
+            continue
+        source = cell.get("source", [])
+        if isinstance(source, str):
+            text = source
+        elif isinstance(source, list) and all(isinstance(line, str) for line in source):
+            text = "".join(source)
+        else:
+            raise NotebookCheckError(
+                f"{path.name}: cell {index} has invalid code source"
+            )
+        code.append(text)
+    if not code:
+        raise NotebookCheckError(f"{path.name}: contains no code cells")
+    return tuple(code)
+
+
+def execute_notebook(path: Path) -> None:
+    if not path.exists():
+        raise NotebookCheckError(f"missing notebook: {path}")
+    data = load_notebook(path)
+    namespace = {"__name__": "__main__"}
+    for index, source in enumerate(iter_code_cells(data, path=path), start=1):
+        if not source.strip():
+            continue
+        try:
+            code = compile(source, f"{path.name}::cell{index}", "exec")
+            with redirect_stdout(io.StringIO()):
+                exec(code, namespace, namespace)
+        except Exception as exc:  # noqa: BLE001
+            raise NotebookCheckError(
+                f"{path.name}: execution failed in code cell {index}: {exc}"
+            ) from exc
+
+
+def main() -> int:
+    notebooks = iter_notebooks()
+    for notebook in notebooks:
+        execute_notebook(notebook)
+    print(f"Validated {len(notebooks)} notebook(s).")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())

From e1f19d6a0eacce7f686e229a7b221c4c89a39365 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sat, 14 Mar 2026 23:11:14 +0300
Subject: [PATCH 09/15] Adds public API

---
 CHANGELOG.md                    |  2 ++
 README.md                       | 12 ++++++++++++
 docs/guide/quickstart.md        | 12 ++++++++++++
 docs/index.md                   | 12 ++++++++++++
 src/atomref/__init__.py         |  4 ++++
 src/atomref/radii.py            |  5 +++++
 tests/meta/test_public_api.py   |  2 ++
 tests/registry/test_registry.py | 13 +++++++++++++
 8 files changed, 62 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index faca26a..34daba8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,3 +7,5 @@
 - Added registry design separating operational quantity from scientific
   classification.
 - Added radii policies with substitution and linear transfer models.
+
+- Added public packaged-set retrieval helpers: `get_builtin_set()` and `get_radii_set()`.
diff --git a/README.md b/README.md
index b94dd31..7d7253c 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,18 @@ print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
 print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
 ```
 
+You can also retrieve the packaged set object directly:
+
+```python
+import atomref as ar
+
+vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
+print(vdw.get("O"))
+
+raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
+print(raw.get("Pm"))
+```
+
 ## Notebooks
 
 Hands-on notebooks live in the repository and mirror the main v0.1 workflows:
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
index 5017079..d23e99f 100644
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@@ -26,6 +26,18 @@ print(ar.list_dataset_infos("covalent_radius"))
 print(ar.list_radii_set_infos("van_der_waals", usage_role="target"))
 ```
 
+You can also retrieve the packaged set object directly:
+
+```python
+import atomref as ar
+
+vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
+print(vdw.get("O"))
+
+raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
+print(raw.get("Pm"))
+```
+
 Need runnable versions of these examples? See the notebooks page and the
 matching notebook files in the repository:
 
diff --git a/docs/index.md b/docs/index.md
index b94dd31..7d7253c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -64,6 +64,18 @@ print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
 print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
 ```
 
+You can also retrieve the packaged set object directly:
+
+```python
+import atomref as ar
+
+vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
+print(vdw.get("O"))
+
+raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
+print(raw.get("Pm"))
+```
+
 ## Notebooks
 
 Hands-on notebooks live in the repository and mirror the main v0.1 workflows:
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
index d08e1ed..815b42c 100644
--- a/src/atomref/__init__.py
+++ b/src/atomref/__init__.py
@@ -15,6 +15,7 @@
     RadiiPolicyAssessment,
     assess_radii_policy,
     get_covalent_radius,
+    get_radii_set,
     get_radii_set_info,
     get_vdw_radius,
     list_radii_set_infos,
@@ -29,6 +30,7 @@
     ElementScalarSet,
     QuantityInfo,
     Reference,
+    get_builtin_set,
     get_dataset_info,
     get_quantity_info,
     list_dataset_ids,
@@ -50,6 +52,7 @@
     "ElementScalarSet",
     "QuantityInfo",
     "Reference",
+    "get_builtin_set",
     "get_dataset_info",
     "get_quantity_info",
     "list_dataset_ids",
@@ -67,6 +70,7 @@
     "DEFAULT_VDW_POLICY",
     "list_radii_sets",
     "list_radii_set_infos",
+    "get_radii_set",
     "get_radii_set_info",
     "lookup_covalent_radius",
     "get_covalent_radius",
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index a5ede9e..cda8a89 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -13,6 +13,7 @@
     DatasetInfo,
     DatasetRef,
     ElementScalarSet,
+    get_builtin_set,
     get_dataset_info,
     list_dataset_ids,
     list_dataset_infos,
@@ -138,6 +139,10 @@ def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
     return get_dataset_info(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
+def get_radii_set(kind: RadiiKind, set_id: str) -> RadiiSet:
+    return get_builtin_set(DatasetRef(_quantity_for_kind(kind), set_id))
+
+
 def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
     if policy.kind != expected:
         raise PolicyError(f"expected a {expected!r} radii policy, got {policy.kind!r}")
diff --git a/tests/meta/test_public_api.py b/tests/meta/test_public_api.py
index a6cb329..8f191bf 100644
--- a/tests/meta/test_public_api.py
+++ b/tests/meta/test_public_api.py
@@ -15,6 +15,8 @@
     'DEFAULT_VDW_POLICY',
     'LinearTransfer',
     'SubstitutionTransfer',
+    'get_builtin_set',
+    'get_radii_set',
     'get_covalent_radius',
     'lookup_covalent_radius',
     'get_vdw_radius',
diff --git a/tests/registry/test_registry.py b/tests/registry/test_registry.py
index 3cfaec5..48afbae 100644
--- a/tests/registry/test_registry.py
+++ b/tests/registry/test_registry.py
@@ -80,3 +80,16 @@ def test_list_radii_set_infos_can_filter_by_usage_role() -> None:
     infos = ar.list_radii_set_infos('van_der_waals', usage_role='target')
     assert 'alvarez2013' in {info.ref.set_id for info in infos}
     assert all(info.ref.quantity == 'van_der_waals_radius' for info in infos)
+
+
+def test_public_builtin_set_helper_is_exported() -> None:
+    ds = ar.get_builtin_set(ar.DatasetRef('covalent_radius', 'cordero2008'))
+    assert ds.info.ref.quantity == 'covalent_radius'
+    assert ds.get('C') == 0.76
+
+
+def test_public_radii_set_helper_returns_packaged_radii_set() -> None:
+    ds = ar.get_radii_set('van_der_waals', 'alvarez2013')
+    assert ds.info.ref.quantity == 'van_der_waals_radius'
+    assert ds.info.ref.set_id == 'alvarez2013'
+    assert ds.get('O') == 1.5

From b64972838970af2951d0ef5eb272ea93172f5e9e Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sun, 15 Mar 2026 08:44:16 +0300
Subject: [PATCH 10/15] Improves docs

---
 .github/workflows/ci.yml                      |   8 +
 .github/workflows/docs.yml                    |   4 +
 CHANGELOG.md                                  |  10 +-
 README.md                                     | 165 +++++++++-------
 docs/api/atomref.md                           |   3 +
 docs/api/elements.md                          |   7 +
 docs/api/index.md                             |  28 ++-
 docs/api/policy.md                            |   9 +
 docs/api/radii.md                             |   8 +
 docs/api/registry.md                          |   9 +
 docs/api/transfer.md                          |   9 +
 docs/datasets/atomic_radius.md                |  22 ++-
 docs/datasets/covalent_radius.md              |  37 +++-
 docs/datasets/index.md                        |  35 +++-
 docs/datasets/van_der_waals_radius.md         |  60 +++++-
 docs/guide/custom_sets.md                     |  19 +-
 docs/guide/install.md                         |  19 +-
 docs/guide/non_goals.md                       |  26 ++-
 docs/guide/notebooks.md                       |  28 ++-
 docs/guide/policies.md                        | 112 +++++++++--
 docs/guide/quickstart.md                      |  78 ++++----
 docs/index.md                                 | 161 ++++++++-------
 docs/notebooks/01-quickstart.md               |  72 +++++++
 docs/notebooks/02-policies-and-assessment.md  |  73 +++++++
 .../notebooks/03-custom-sets-and-discovery.md |  56 ++++++
 mkdocs.yml                                    |  11 +-
 notebooks/01-quickstart.ipynb                 | 164 ++++++++-------
 notebooks/02-policies-and-assessment.ipynb    | 187 +++++++++---------
 notebooks/03-custom-sets-and-discovery.ipynb  |   3 +-
 src/atomref/__init__.py                       |   2 +
 src/atomref/elements.py                       |  35 ++--
 src/atomref/errors.py                         |   9 +-
 src/atomref/policy.py                         |  57 +++++-
 src/atomref/radii.py                          |  71 ++++++-
 src/atomref/registry.py                       |  72 ++++++-
 src/atomref/transfer.py                       |  19 +-
 tests/meta/test_notebooks.py                  |  21 +-
 tests/meta/test_text_generation_tools.py      |  34 ++++
 tools/README.md                               |  27 +++
 tools/check_dist.py                           |  74 ++++---
 tools/check_notebooks.py                      |  10 +
 tools/export_notebooks.py                     | 146 ++++++++++++++
 tools/gen_readme.py                           |  55 +++++-
 43 files changed, 1576 insertions(+), 479 deletions(-)
 create mode 100644 docs/api/elements.md
 create mode 100644 docs/api/policy.md
 create mode 100644 docs/api/radii.md
 create mode 100644 docs/api/registry.md
 create mode 100644 docs/api/transfer.md
 create mode 100644 docs/notebooks/01-quickstart.md
 create mode 100644 docs/notebooks/02-policies-and-assessment.md
 create mode 100644 docs/notebooks/03-custom-sets-and-discovery.md
 create mode 100644 tests/meta/test_text_generation_tools.py
 create mode 100644 tools/README.md
 create mode 100644 tools/export_notebooks.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 12d3421..6f00ac0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,6 +22,10 @@ jobs:
         run: python tools/check_registry.py
       - name: Validate notebooks
         run: python tools/check_notebooks.py
+      - name: Check notebook exports
+        run: python tools/export_notebooks.py --check
+      - name: Check README sync
+        run: python tools/gen_readme.py --check
 
   docs-check:
     runs-on: ubuntu-latest
@@ -34,6 +38,10 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install .[docs]
+      - name: Export notebooks and README
+        run: |
+          python tools/export_notebooks.py --check
+          python tools/gen_readme.py --check
       - name: Build docs
         run: mkdocs build --strict
 
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 70396d7..418ce0d 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -17,5 +17,9 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install .[docs]
+      - name: Check generated files
+        run: |
+          python tools/export_notebooks.py --check
+          python tools/gen_readme.py --check
       - name: Build docs
         run: mkdocs build --strict
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34daba8..dcfa24a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,5 +7,11 @@
 - Added registry design separating operational quantity from scientific
   classification.
 - Added radii policies with substitution and linear transfer models.
-
-- Added public packaged-set retrieval helpers: `get_builtin_set()` and `get_radii_set()`.
+- Added public packaged-set retrieval helpers: `get_builtin_set()` and
+  `get_radii_set()`.
+- Added runnable notebooks together with generated Markdown notebook pages in
+  the docs.
+- Expanded the docs with dataset guidance, module-level API pages, and a tools
+  overview.
+- Added docstrings across the main importable modules, including important
+  internal helpers used across modules.
diff --git a/README.md b/README.md
index 7d7253c..9d5eb79 100644
--- a/README.md
+++ b/README.md
@@ -1,104 +1,133 @@
 # atomref
 
-`atomref` is a small pure-Python package for curated atomic reference data and
-policy-based lookup in geometry and structure-analysis code.
+[![CI](https://github.com/DeloneCommons/atomref/actions/workflows/ci.yml/badge.svg)](https://github.com/DeloneCommons/atomref/actions/workflows/ci.yml)
+[![Docs](https://github.com/DeloneCommons/atomref/actions/workflows/docs.yml/badge.svg)](https://github.com/DeloneCommons/atomref/actions/workflows/docs.yml)
+[![PyPI](https://img.shields.io/pypi/v/atomref.svg)](https://pypi.org/project/atomref/)
+[![Python Versions](https://img.shields.io/pypi/pyversions/atomref.svg)](https://pypi.org/project/atomref/)
+[![License](https://img.shields.io/pypi/l/atomref.svg)](https://github.com/DeloneCommons/atomref/blob/main/LICENSE)
 
-It is **not** a periodic-table encyclopedia. The package is meant to sit under
-higher-level scientific software and provide:
+`atomref` is a small pure-Python package for **curated atomic reference data**
+and **provenance-aware lookup policies** used by geometry and
+structure-analysis algorithms.
+
+It is not meant to be yet another periodic-table encyclopedia. The package is
+for code that needs stable atomic reference values with explicit provenance,
+clear fallback behavior, and honest handling of incomplete preferred datasets.
+
+What you get in v0.1:
 
 - stable element metadata,
-- named radii sets,
-- explicit dataset provenance,
+- curated named radii sets,
+- dataset provenance and coverage metadata,
 - deterministic lookup policies,
-- transfer from broader-support datasets into narrower target sets.
-
-For v0.1 the public scope is intentionally radii-first.
+- substitution and linear transfer from support datasets into target datasets,
+- user-defined custom element-indexed scalar sets.
 
 ## Why this exists
 
-Many geometry algorithms need a complete reference table, but the scientifically
-preferred dataset is often incomplete. `atomref` makes that situation explicit:
-choose a target dataset, add one or more transfer steps, and keep provenance on
-what was returned.
+Scientific software often wants a complete lookup table, but the best dataset
+for the job is rarely complete. `atomref` makes that situation explicit.
+Instead of hiding ad hoc defaults inside algorithm code, you choose a target
+set, describe how missing values may be restored, and keep provenance on what
+was actually returned.
 
-The default examples mirror the current `molcryst` behavior:
+The default v0.1 behavior is intentionally simple and practical:
 
-- covalent radii: use `cordero2008`, substitute from `csd_legacy_cov`
-- van der Waals radii: use `alvarez2013`, linearly transfer from
-  `atomic_radius:rahm2016`
+- **Cordero covalent radii** (`cordero2008`) are the preferred covalent target
+  set, with missing values substituted from the **legacy CSD covalent radii**
+  (`csd_legacy_cov`).
+- **Alvarez van der Waals radii** (`alvarez2013`) are the preferred vdW target
+  set, with missing values restored from the **Rahm isodensity atomic radii**
+  (`rahm2016`) through a fitted linear transfer.
 
 ## Quick example
 
-```python
-import atomref as ar
-
-r_c = ar.get_covalent_radius("C")
-r_vdw = ar.get_vdw_radius("O")
-
-lookup = ar.lookup_vdw_radius("Pm")
-print(lookup.value, lookup.source, lookup.resolved_from)
+```pycon
+>>> import atomref as ar
+>>> ar.get_covalent_radius("C")
+0.76
+>>> ar.get_vdw_radius("O")
+1.5
+>>> lookup = ar.lookup_vdw_radius("Pm")
+>>> lookup.value
+2.8972265395148358
+>>> lookup.source
+'transfer_linear'
+>>> lookup.resolved_from
+(DatasetRef(quantity='atomic_radius', set_id='rahm2016'),)
 ```
 
-## Public API split: `get_*` vs `lookup_*`
+`get_*` returns only the number. `lookup_*` returns a `LookupResult` that also
+records where the value came from and whether a transfer model was involved.
 
-- `get_*` returns only the selected numeric value, or `None`.
-- `lookup_*` returns the provenance-carrying `LookupResult` object.
+You can inspect the packaged quantity and dataset catalog directly:
 
-This follows the current `molcryst` pattern.
+```pycon
+>>> import atomref as ar
+>>> ar.list_quantities()
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+>>> ar.get_quantity_info("atomic_radius")
+QuantityInfo(quantity='atomic_radius', domain='element', units='angstrom', description='Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data.')
+>>> [info.ref.set_id for info in ar.list_dataset_infos("van_der_waals_radius", usage_role="target")]
+['bondi1964', 'rowland_taylor1996', 'alvarez2013', 'chernyshov2020']
+```
 
-## Current built-in quantities
+You can also load a packaged set directly:
 
-- `covalent_radius`
-- `van_der_waals_radius`
-- `atomic_radius` (support quantity; currently used for transfer from
-  `rahm2016`)
+```pycon
+>>> import atomref as ar
+>>> vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
+>>> vdw.get("O")
+1.5
+>>> raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
+>>> raw.get("Pm")
+2.83
+```
 
-You can inspect the packaged quantity layer directly:
+## Notebook walkthroughs
 
-```python
-import atomref as ar
+The repository ships example notebooks for the main v0.1 workflows. In the
+documentation they are also available as rendered Markdown pages, so users can
+read them without opening Jupyter first.
 
-print(ar.list_quantities())
-print(ar.get_quantity_info("atomic_radius"))
-print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
-print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
-```
+- [Notebook overview](https://delonecommons.github.io/atomref/guide/notebooks/)
+- [Quickstart notebook](https://delonecommons.github.io/atomref/notebooks/01-quickstart/)
+- [Policies and assessment notebook](https://delonecommons.github.io/atomref/notebooks/02-policies-and-assessment/)
+- [Custom sets and discovery notebook](https://delonecommons.github.io/atomref/notebooks/03-custom-sets-and-discovery/)
 
-You can also retrieve the packaged set object directly:
+## Relationship to Delone Commons
 
-```python
-import atomref as ar
+`atomref` is designed as a standalone package, but within Delone Commons it is
+primarily intended to support chemistry-aware packages such as:
 
-vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
-print(vdw.get("O"))
+- `molcryst`, for covalent-bond detection and contact analysis,
+- future `chemvoro`, for chemistry-aware contact and hydrogen workflows.
 
-raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
-print(raw.get("Pm"))
-```
+By contrast, `pyvoro2` and `pbcgraph` are intentionally general mathematical
+packages and are not direct consumers of `atomref`.
 
-## Notebooks
+## Data curation and developer tools
 
-Hands-on notebooks live in the repository and mirror the main v0.1 workflows:
+The repository also ships small maintenance tools. The most important ones are:
 
-- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
-- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
-- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+- `python tools/check_registry.py` — validate curated registry metadata against
+  packaged CSV tables,
+- `python tools/check_notebooks.py` — execute notebook code cells,
+- `python tools/export_notebooks.py` — turn notebooks into Markdown pages for
+  the docs,
+- `python tools/gen_readme.py` — regenerate `README.md` from this page.
 
-Open them locally in Jupyter or browse them on GitHub for worked examples of
-lookup, transfer-backed policies, dataset discovery, and custom element-scalar
-sets.
+See the [tools README](https://github.com/DeloneCommons/atomref/blob/main/tools/README.md)
+for a short description of each script.
 
-## Relationship to the Delone Commons ecosystem
+---
 
-`atomref` is intended to be reusable outside the surrounding ecosystem, but it
-fits naturally beneath:
+This README is generated from `docs/index.md`.
 
-- `molcryst`
-- `pyvoro2`
-- `pbcgraph`
+To regenerate it:
 
-Those packages should consume atomic reference data from `atomref` rather than
-re-curating such datasets independently.
+```bash
+python tools/gen_readme.py
+```
 
-For data-curation changes, validate the packaged registry against the bundled
-CSV tables with `python tools/check_registry.py`.
+Edit the documentation sources instead of editing `README.md` directly.
diff --git a/docs/api/atomref.md b/docs/api/atomref.md
index dcbc5e0..3536e34 100644
--- a/docs/api/atomref.md
+++ b/docs/api/atomref.md
@@ -1,3 +1,6 @@
 # atomref
 
+The top-level package re-exports the main user-facing API so that most code can
+simply do `import atomref as ar`.
+
 ::: atomref
diff --git a/docs/api/elements.md b/docs/api/elements.md
new file mode 100644
index 0000000..c4275a0
--- /dev/null
+++ b/docs/api/elements.md
@@ -0,0 +1,7 @@
+# atomref.elements
+
+Element identity is intentionally minimal in v0.1: atomic number, symbol, and
+name. The module also contains the canonicalization helpers used throughout the
+package.
+
+::: atomref.elements
diff --git a/docs/api/index.md b/docs/api/index.md
index da15dbf..e69e719 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -1,4 +1,28 @@
 # API
 
-The top-level package exports the main radii helpers together with the registry,
-policy, and transfer data structures.
+The public API is small on purpose.
+
+Most users will spend most of their time in the top-level package namespace and
+in the radii helpers. The lower-level modules are still documented because they
+expose the actual data model behind the package.
+
+## Common tasks
+
+- get a single value: use `get_covalent_radius(...)` or `get_vdw_radius(...)`
+- inspect provenance: use `lookup_covalent_radius(...)` or
+  `lookup_vdw_radius(...)`
+- browse packaged datasets: use `list_quantities()`, `get_quantity_info(...)`,
+  `list_dataset_infos(...)`, or `list_radii_set_infos(...)`
+- load a packaged set directly: use `get_builtin_set(...)` or `get_radii_set(...)`
+- define a custom set: use `ElementScalarSet.from_mapping(...)`
+- define transfer-backed lookup behavior: use `RadiiPolicy`,
+  `SubstitutionTransfer`, and `LinearTransfer`
+
+## Module reference
+
+- [Top-level package](atomref.md)
+- [Elements](elements.md)
+- [Registry and packaged datasets](registry.md)
+- [Transfer models](transfer.md)
+- [Generic policy core](policy.md)
+- [Radii API](radii.md)
diff --git a/docs/api/policy.md b/docs/api/policy.md
new file mode 100644
index 0000000..99d51d9
--- /dev/null
+++ b/docs/api/policy.md
@@ -0,0 +1,9 @@
+# atomref.policy
+
+This module contains the generic resolver that sits below the radii-specific
+API.
+
+It is useful when you want to understand exactly how overrides, base datasets,
+transfers, fallbacks, and missing values are ordered and reported.
+
+::: atomref.policy
diff --git a/docs/api/radii.md b/docs/api/radii.md
new file mode 100644
index 0000000..05617a4
--- /dev/null
+++ b/docs/api/radii.md
@@ -0,0 +1,8 @@
+# atomref.radii
+
+This is the main user-facing module in v0.1.
+
+It provides radii policies, packaged radii-set discovery, lookup helpers, and
+policy-assessment reports.
+
+::: atomref.radii
diff --git a/docs/api/registry.md b/docs/api/registry.md
new file mode 100644
index 0000000..4f664e6
--- /dev/null
+++ b/docs/api/registry.md
@@ -0,0 +1,9 @@
+# atomref.registry
+
+This module contains the packaged data model.
+
+If you want to understand how `atomref` classifies datasets, how aliases are
+resolved, or how built-in CSV tables are turned into typed in-memory objects,
+this is the key module to read.
+
+::: atomref.registry
diff --git a/docs/api/transfer.md b/docs/api/transfer.md
new file mode 100644
index 0000000..eab5672
--- /dev/null
+++ b/docs/api/transfer.md
@@ -0,0 +1,9 @@
+# atomref.transfer
+
+Transfer models describe how missing target values may be restored from other
+datasets.
+
+In v0.1 the core built-in models are direct substitution and one-predictor
+linear transfer.
+
+::: atomref.transfer
diff --git a/docs/datasets/atomic_radius.md b/docs/datasets/atomic_radius.md
index 00a43cd..1704980 100644
--- a/docs/datasets/atomic_radius.md
+++ b/docs/datasets/atomic_radius.md
@@ -1,10 +1,22 @@
 # Atomic radius
 
-This quantity currently exists to hold transferable support datasets that are
-not best described as direct condensed-phase vdW radii.
+The `atomic_radius` quantity exists in v0.1 to hold support datasets that are
+scientifically useful but should not be presented as direct condensed-phase vdW
+radii.
 
-Built-in v0.1 support set:
+## Rahm isodensity atomic radii (`rahm2016`)
 
-- `rahm2016`
+This is currently the only built-in atomic-radius dataset.
 
-`rahm2016` is intentionally classified here as atomic support data rather than as a direct vdW target set.
+- **What it is:** radii for isolated neutral atoms defined by the
+  ρ = 0.001 e/bohr³ electron-density isosurface.
+- **Source idea:** a consistent theory-based atomic size measure derived from
+  computed electron densities.
+- **Coverage:** broad, but not complete for the full periodic table.
+- **Why it matters here:** it correlates well with structural vdW radii and is a
+  useful support baseline when a condensed-phase target set is incomplete.
+- **How `atomref` uses it:** support-only dataset for linear transfer into
+  target vdW values such as `alvarez2013`.
+
+This is an important example of the package philosophy: a dataset can be very
+useful algorithmically without being mislabeled as something it is not.
diff --git a/docs/datasets/covalent_radius.md b/docs/datasets/covalent_radius.md
index f298635..d2e2251 100644
--- a/docs/datasets/covalent_radius.md
+++ b/docs/datasets/covalent_radius.md
@@ -1,6 +1,37 @@
 # Covalent radius
 
-Built-in v0.1 sets:
+The covalent-radius quantity in v0.1 is aimed at bond-detection and related
+geometry workflows. It currently ships one preferred target dataset and one
+legacy support dataset.
 
-- `cordero2008`
-- `csd_legacy_cov`
+## Cordero covalent radii (`cordero2008`)
+
+This is the main covalent-radius target set in `atomref` v0.1.
+
+- **What it is:** a broad covalent-radius compilation based mainly on
+  crystallographic bond distances.
+- **Why it matters:** it is a modern, widely used reference set for element-wise
+  covalent radii.
+- **Coverage:** broad coverage across the periodic table, but not complete for
+  every element.
+- **How `atomref` uses it:** direct target dataset for covalent-radius lookup.
+
+If you want one covalent set to start with, this is usually the right first
+choice.
+
+## Legacy CSD covalent radii (`csd_legacy_cov`)
+
+This set reflects the older covalent radii historically used in CSD software for
+bond perception.
+
+- **What it is:** a practical, legacy-oriented bond-assignment table.
+- **Why it matters:** it has long been used in chemistry software and contains
+  placeholder conventions that are still relevant for compatibility work.
+- **Coverage:** broad practical coverage, with explicit placeholder values for
+  elements not covered by the historical table.
+- **How `atomref` uses it:** support dataset for substitution when the preferred
+  Cordero target set is missing a value.
+
+Because it contains legacy placeholders, it is not the preferred scientific
+starting point. It is mainly useful as a support layer and for compatibility
+with older workflows.
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
index cbd132e..20d4c3e 100644
--- a/docs/datasets/index.md
+++ b/docs/datasets/index.md
@@ -1,17 +1,34 @@
 # Datasets
 
-The package distinguishes between:
+`atomref` does not treat all datasets as interchangeable lookup tables.
+Instead, the package records several layers of classification:
 
 - **quantity** — the operational property being requested,
 - **semantic class** — what the dataset scientifically represents,
-- **origin / phase context** — how and where it was derived.
+- **origin class** — how the values were obtained,
+- **phase context** — what physical context they describe,
+- **usage role** — whether the package treats the dataset as a direct target set
+  or as support data for transfer.
 
-This is what keeps support-only datasets such as `rahm2016` usable without
-misclassifying them as direct condensed-phase vdW radii.
+This is what allows a dataset such as **Rahm isodensity atomic radii**
+(`rahm2016`) to be useful in van der Waals workflows without pretending that it
+is itself a condensed-phase structural vdW-radius set.
 
-For programmatic inspection, use `atomref.list_quantities()`, `atomref.get_quantity_info(...)`, and `atomref.list_dataset_infos(...)`.
+## Programmatic inspection
 
-Dataset metadata also carries a package-level `usage_role`, which currently
-distinguishes direct target sets from support-only sets used for substitution or
-linear transfer. Use `atomref.list_dataset_ids(..., usage_role=...)` to inspect
-that layer programmatically.
+The most useful catalog helpers are:
+
+- `atomref.list_quantities()`
+- `atomref.get_quantity_info(...)`
+- `atomref.list_dataset_infos(...)`
+- `atomref.list_radii_set_infos(...)`
+
+If you only need dataset ids, use `list_dataset_ids(...)` or `list_radii_sets(...)`.
+If you want the packaged values themselves, use `get_builtin_set(...)` or
+`get_radii_set(...)`.
+
+## Built-in quantity families in v0.1
+
+- [Covalent radius](covalent_radius.md)
+- [van der Waals radius](van_der_waals_radius.md)
+- [Atomic radius](atomic_radius.md)
diff --git a/docs/datasets/van_der_waals_radius.md b/docs/datasets/van_der_waals_radius.md
index d757bab..c678639 100644
--- a/docs/datasets/van_der_waals_radius.md
+++ b/docs/datasets/van_der_waals_radius.md
@@ -1,11 +1,57 @@
 # van der Waals radius
 
-Built-in v0.1 target sets:
+The van der Waals quantity in v0.1 intentionally includes several target sets
+with different scientific backgrounds. This lets users choose between a classic
+historical compilation, structural contact-derived sets, and compatibility-only
+legacy tables.
 
-- `bondi1964`
-- `rowland_taylor1996`
-- `alvarez2013`
-- `chernyshov2020`
-- `csd_legacy_vdw`
+## Bondi van der Waals radii (`bondi1964`)
 
-Support-only sets may live under other quantities.
+A classic historical reference set compiled from mixed experimental sources.
+
+- **What it is:** the traditional Bondi vdW table used throughout chemistry.
+- **Coverage:** limited, especially for transition metals and heavier elements.
+- **Why you might use it:** historical consistency or comparison with older
+  literature and software defaults.
+
+## Rowland & Taylor nonbonded-contact radii (`rowland_taylor1996`)
+
+A small but influential structural set derived from organic-crystal nonbonded
+contacts.
+
+- **What it is:** a condensed-phase structural vdW set focused on common organic
+  elements.
+- **Coverage:** intentionally narrow.
+- **Why you might use it:** organic-crystal contact analysis and comparisons to
+  classic contact-distance literature.
+
+## Alvarez van der Waals radii (`alvarez2013`)
+
+This is the main van der Waals target set in `atomref` v0.1.
+
+- **What it is:** a broad structural vdW set derived from statistical analysis
+  of many interatomic distances in the Cambridge Structural Database.
+- **Coverage:** broad, but still incomplete for some elements.
+- **Why you might use it:** it is a strong default for general condensed-phase
+  geometry and contact work.
+- **How `atomref` uses it:** direct target set for vdW lookup, with missing
+  values restored from support data when requested by policy.
+
+## Chernyshov line-of-sight vdW radii (`chernyshov2020`)
+
+A reduced element-wise view of a more atom-type-aware structural analysis.
+
+- **What it is:** vdW radii inferred from line-of-sight contact classification.
+- **Coverage:** focused on elements common in molecular crystals.
+- **Why you might use it:** you want a contact-derived set informed by the LoS
+  idea while still using a simple element-wise API.
+
+## Legacy CSD van der Waals radii (`csd_legacy_vdw`)
+
+A compatibility-oriented table used historically in CSD tools.
+
+- **What it is:** an older practical vdW table with placeholder conventions.
+- **Coverage:** broad practical coverage, but not a modern scientific target
+  set.
+- **How `atomref` uses it:** support-only data for legacy compatibility and
+  future migration work.
diff --git a/docs/guide/custom_sets.md b/docs/guide/custom_sets.md
index bfc55cb..ed4d664 100644
--- a/docs/guide/custom_sets.md
+++ b/docs/guide/custom_sets.md
@@ -1,8 +1,10 @@
 # Custom sets
 
-Custom element-indexed scalar datasets can be built with
-`ElementScalarSet.from_mapping(...)` and then used directly in a `RadiiPolicy`
-or a transfer model.
+`atomref` is not limited to the packaged tables. You can build a small
+user-defined element-indexed scalar dataset and use it as a base dataset or as a
+support dataset inside a transfer-backed policy.
+
+The simplest entry point is `ElementScalarSet.from_mapping(...)`.
 
 ```python
 from atomref import DatasetRef, ElementScalarSet, RadiiPolicy
@@ -16,3 +18,14 @@ custom = ElementScalarSet.from_mapping(
 
 policy = RadiiPolicy(kind="covalent", base_set=custom)
 ```
+
+This is useful when you want to:
+
+- test an alternative reference table,
+- pin a small project-specific dataset without creating a full package fork,
+- combine a user dataset with built-in support data through substitution or
+  linear transfer.
+
+In v0.1 custom sets are element-domain scalar datasets, which keeps the data
+model small and stable. Later versions may add more specialized domains, but
+custom element-wise sets are already enough for many geometry workflows.
diff --git a/docs/guide/install.md b/docs/guide/install.md
index 2e2ae65..00a4f22 100644
--- a/docs/guide/install.md
+++ b/docs/guide/install.md
@@ -1,8 +1,23 @@
 # Install
 
+For normal use, install the runtime package:
+
 ```bash
 pip install atomref
 ```
 
-The runtime package is pure Python and has no required runtime dependencies
-outside the standard library.
+`atomref` is pure Python and has no required runtime dependencies outside the
+standard library.
+
+For local development, documentation work, and tests, install the editable
+package together with the main extras:
+
+```bash
+pip install -e ".[test,docs,dev]"
+```
+
+Those extras currently cover:
+
+- `test` — pytest and test-only compatibility helpers,
+- `docs` — MkDocs and API documentation tooling,
+- `dev` — flake8, build, and release metadata checks.
diff --git a/docs/guide/non_goals.md b/docs/guide/non_goals.md
index 57bca94..b38aa68 100644
--- a/docs/guide/non_goals.md
+++ b/docs/guide/non_goals.md
@@ -1,11 +1,23 @@
 # Non-goals
 
-`atomref` does not aim to handle:
+`atomref` is intentionally narrow.
 
-- file parsing,
-- crystallographic symmetry,
-- structure inference,
-- Voronoi or power tessellation,
-- chemistry-specific plane-position logic.
+It is **not** trying to be:
 
-Those concerns belong in higher-level packages.
+- a general periodic-table encyclopedia,
+- a home for arbitrary atomic or chemical properties,
+- a structure parser,
+- a crystallographic symmetry package,
+- a structure-inference engine,
+- a Voronoi / tessellation library,
+- an environment-specific chemistry model,
+- a machine-learning framework for extrapolating unseen chemistry.
+
+The package is about **curated reference data and explicit lookup policies**.
+That includes provenance, transfer from broader support datasets, and stable API
+surfaces that higher-level scientific code can rely on.
+
+Future versions may widen the range of supported *reference-data families* — for
+example X–H distances or radial atomic reference functions — but the package
+should still remain a small reference-data layer rather than a full chemistry
+platform.
diff --git a/docs/guide/notebooks.md b/docs/guide/notebooks.md
index 9d39376..cdd1721 100644
--- a/docs/guide/notebooks.md
+++ b/docs/guide/notebooks.md
@@ -1,17 +1,25 @@
 # Notebook gallery
 
-`atomref` ships example Jupyter notebooks that mirror the main v0.1 user
-workflows. They live in the repository under `notebooks/` and can be opened
-locally with JupyterLab, VS Code, or any other notebook frontend.
+`atomref` ships example Jupyter notebooks that cover the main v0.1 workflows.
+Each notebook is available in two forms:
 
-Available notebooks:
+- the original `.ipynb` file in the repository,
+- a rendered Markdown copy included in these docs.
+
+That way users can either run the notebooks locally or read them directly on the
+documentation site.
+
+## Available notebooks
+
+- [Quickstart notebook](../notebooks/01-quickstart.md) — basic imports,
+  `get_*` vs `lookup_*`, quantity discovery, and packaged-set access.
+- [Policies and assessment notebook](../notebooks/02-policies-and-assessment.md)
+  — overrides, transfer-backed policies, and policy summaries.
+- [Custom sets and discovery notebook](../notebooks/03-custom-sets-and-discovery.md)
+  — user-defined sets, catalog inspection, and metadata exploration.
+
+The original notebook files are also in the repository:
 
 - [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
-  – basic imports, element helpers, `get_*` vs `lookup_*`, quantity discovery.
 - [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
-  – transfer policies, substitution vs linear transfer, policy assessment.
 - [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
-  – custom user-defined sets, catalog inspection, metadata discovery.
-
-The notebooks are plain JSON files without heavy execution metadata so they stay
-diff-friendly in version control.
diff --git a/docs/guide/policies.md b/docs/guide/policies.md
index a5a5b1b..62663ae 100644
--- a/docs/guide/policies.md
+++ b/docs/guide/policies.md
@@ -1,28 +1,102 @@
 # Policies
 
-A policy is the ordered rule set for selecting a value.
+A policy tells `atomref` how to answer the question “what value should I use for
+this element?”
 
-Resolution order in v0.1:
+That may sound simple, but in practice scientific datasets are often
+incomplete. A policy makes the decision process explicit instead of hiding it in
+algorithm code.
 
-1. override
-2. base dataset
-3. transfers in order
-4. fallback
-5. missing
+## Resolution order
 
-Built-in transfer models:
+In v0.1 every lookup follows the same ordered path:
 
-- `SubstitutionTransfer`
-- `LinearTransfer`
+1. **Override**
+2. **Base dataset**
+3. **Transfer models**, in the order you listed them
+4. **Fallback**
+5. **Missing**
 
-`LinearTransfer` is intentionally limited to one predictor in v0.1, but the API
-already accepts a predictor tuple so later multi-predictor linear models do not
-require a redesign.
+Each step has a specific meaning.
 
-## Target vs support sets
+### Override
 
-`atomref` keeps the lookup behavior separate from the scientific classification
-of a dataset. In addition, each built-in dataset now carries a package-level
-`usage_role` such as `target` or `support`. This is how `rahm2016` can remain
-available for linear transfer into `alvarez2013`-style vdW values without being
-misrepresented as a direct condensed-phase vdW target set.
+An override is a value you provide directly for a specific element. It wins over
+everything else and is useful when you want to pin one or two elements without
+changing the whole dataset.
+
+### Base dataset
+
+The base dataset is the preferred source. For example, the default covalent
+policy starts from the **Cordero covalent radii** (`cordero2008`), and the
+default vdW policy starts from the **Alvarez van der Waals radii**
+(`alvarez2013`).
+
+### Transfer
+
+A transfer model is used only when the base dataset has no value for the
+requested element.
+
+Built-in transfer models in v0.1 are:
+
+- `SubstitutionTransfer` — take a value directly from another dataset,
+- `LinearTransfer` — infer a target-equivalent value from a support dataset
+  through a fitted linear model.
+
+`LinearTransfer` already accepts a tuple of predictors in the API, but the v0.1
+runtime intentionally supports exactly one predictor dataset. That keeps the
+implementation simple now while leaving room for later multi-predictor linear
+models.
+
+### Fallback
+
+A fallback is a constant last-resort value. It is useful when an algorithm must
+receive *some* number even if both the base dataset and transfer sources are
+missing a value.
+
+### Missing
+
+If nothing above can produce a value and no fallback was configured, the result
+is simply missing. In that case `get_*` returns `None`, while `lookup_*`
+returns a `LookupResult` with `source="missing"` and explanatory notes.
+
+## Target datasets and support datasets
+
+`atomref` separates **what a dataset is used for** from **what it scientifically
+represents**.
+
+That is why the package stores:
+
+- the operational **quantity**,
+- the scientific **semantic class**,
+- the package-level **usage role**.
+
+This distinction matters for datasets such as **Rahm isodensity atomic radii**
+(`rahm2016`). They are useful support data for restoring missing van der Waals
+radii, but they are not the same thing as a condensed-phase structural vdW
+radius set. In `atomref`, that difference is recorded in the metadata instead of
+being hidden.
+
+## Example
+
+```python
+import atomref as ar
+
+policy = ar.RadiiPolicy(
+    kind="van_der_waals",
+    base_set="alvarez2013",
+    transfers=(
+        ar.LinearTransfer(
+            predictors=(ar.DatasetRef("atomic_radius", "rahm2016"),),
+        ),
+    ),
+    overrides={"Xe": 2.10},
+)
+```
+
+With that policy:
+
+- xenon uses the explicit override,
+- elements present in `alvarez2013` use the base vdW value,
+- missing elements may be restored from `rahm2016`,
+- anything still unresolved remains missing unless you also set a fallback.
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
index d23e99f..3649653 100644
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@@ -1,46 +1,56 @@
 # Quickstart
 
-```python
-import atomref as ar
-
-print(ar.get_covalent_radius("C"))
-print(ar.get_vdw_radius("O"))
-
-m = ar.lookup_vdw_radius("Pm")
-print(m.value)
-print(m.source)
-print(m.resolved_from)
+The two most important user-facing ideas in `atomref` are:
+
+- `get_*` returns only the selected number,
+- `lookup_*` returns the number **and** provenance metadata.
+
+```pycon
+>>> import atomref as ar
+>>> ar.get_covalent_radius("C")
+0.76
+>>> ar.get_vdw_radius("O")
+1.5
+>>> lookup = ar.lookup_vdw_radius("Pm")
+>>> lookup.value
+2.8972265395148358
+>>> lookup.source
+'transfer_linear'
+>>> lookup.resolved_from
+(DatasetRef(quantity='atomic_radius', set_id='rahm2016'),)
 ```
 
-Use `get_*` when you only need the number, and `lookup_*` when you need
-provenance.
-
-You can also inspect the packaged quantity layer directly:
+Use `get_*` when you only need the value. Use `lookup_*` when you want to know
+whether the result came from the preferred dataset, a support dataset, a policy
+override, or a fallback.
 
-```python
-import atomref as ar
+You can inspect the packaged quantity layer directly:
 
-print(ar.list_quantities())
-print(ar.get_quantity_info("atomic_radius"))
-print(ar.list_dataset_infos("covalent_radius"))
-print(ar.list_radii_set_infos("van_der_waals", usage_role="target"))
+```pycon
+>>> import atomref as ar
+>>> ar.list_quantities()
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+>>> ar.get_quantity_info("atomic_radius")
+QuantityInfo(quantity='atomic_radius', domain='element', units='angstrom', description='Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data.')
+>>> [info.ref.set_id for info in ar.list_radii_set_infos("van_der_waals", usage_role="target")]
+['bondi1964', 'rowland_taylor1996', 'alvarez2013', 'chernyshov2020']
 ```
 
-You can also retrieve the packaged set object directly:
-
-```python
-import atomref as ar
-
-vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
-print(vdw.get("O"))
+And you can load a packaged set object directly:
 
-raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
-print(raw.get("Pm"))
+```pycon
+>>> import atomref as ar
+>>> vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
+>>> vdw.get("O")
+1.5
+>>> raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
+>>> raw.get("Pm")
+2.83
 ```
 
-Need runnable versions of these examples? See the notebooks page and the
-matching notebook files in the repository:
+For longer, runnable examples see:
 
-- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
-- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
-- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+- the [notebook overview](notebooks.md),
+- the [quickstart notebook page](../notebooks/01-quickstart.md),
+- the [policies notebook page](../notebooks/02-policies-and-assessment.md),
+- the [custom sets notebook page](../notebooks/03-custom-sets-and-discovery.md).
diff --git a/docs/index.md b/docs/index.md
index 7d7253c..c59777e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,104 +1,121 @@
 # atomref
 
-`atomref` is a small pure-Python package for curated atomic reference data and
-policy-based lookup in geometry and structure-analysis code.
+[![CI](https://github.com/DeloneCommons/atomref/actions/workflows/ci.yml/badge.svg)](https://github.com/DeloneCommons/atomref/actions/workflows/ci.yml)
+[![Docs](https://github.com/DeloneCommons/atomref/actions/workflows/docs.yml/badge.svg)](https://github.com/DeloneCommons/atomref/actions/workflows/docs.yml)
+[![PyPI](https://img.shields.io/pypi/v/atomref.svg)](https://pypi.org/project/atomref/)
+[![Python Versions](https://img.shields.io/pypi/pyversions/atomref.svg)](https://pypi.org/project/atomref/)
+[![License](https://img.shields.io/pypi/l/atomref.svg)](https://github.com/DeloneCommons/atomref/blob/main/LICENSE)
 
-It is **not** a periodic-table encyclopedia. The package is meant to sit under
-higher-level scientific software and provide:
+`atomref` is a small pure-Python package for **curated atomic reference data**
+and **provenance-aware lookup policies** used by geometry and
+structure-analysis algorithms.
+
+It is not meant to be yet another periodic-table encyclopedia. The package is
+for code that needs stable atomic reference values with explicit provenance,
+clear fallback behavior, and honest handling of incomplete preferred datasets.
+
+What you get in v0.1:
 
 - stable element metadata,
-- named radii sets,
-- explicit dataset provenance,
+- curated named radii sets,
+- dataset provenance and coverage metadata,
 - deterministic lookup policies,
-- transfer from broader-support datasets into narrower target sets.
-
-For v0.1 the public scope is intentionally radii-first.
+- substitution and linear transfer from support datasets into target datasets,
+- user-defined custom element-indexed scalar sets.
 
 ## Why this exists
 
-Many geometry algorithms need a complete reference table, but the scientifically
-preferred dataset is often incomplete. `atomref` makes that situation explicit:
-choose a target dataset, add one or more transfer steps, and keep provenance on
-what was returned.
+Scientific software often wants a complete lookup table, but the best dataset
+for the job is rarely complete. `atomref` makes that situation explicit.
+Instead of hiding ad hoc defaults inside algorithm code, you choose a target
+set, describe how missing values may be restored, and keep provenance on what
+was actually returned.
 
-The default examples mirror the current `molcryst` behavior:
+The default v0.1 behavior is intentionally simple and practical:
 
-- covalent radii: use `cordero2008`, substitute from `csd_legacy_cov`
-- van der Waals radii: use `alvarez2013`, linearly transfer from
-  `atomic_radius:rahm2016`
+- **Cordero covalent radii** (`cordero2008`) are the preferred covalent target
+  set, with missing values substituted from the **legacy CSD covalent radii**
+  (`csd_legacy_cov`).
+- **Alvarez van der Waals radii** (`alvarez2013`) are the preferred vdW target
+  set, with missing values restored from the **Rahm isodensity atomic radii**
+  (`rahm2016`) through a fitted linear transfer.
 
 ## Quick example
 
-```python
-import atomref as ar
-
-r_c = ar.get_covalent_radius("C")
-r_vdw = ar.get_vdw_radius("O")
-
-lookup = ar.lookup_vdw_radius("Pm")
-print(lookup.value, lookup.source, lookup.resolved_from)
+```pycon
+>>> import atomref as ar
+>>> ar.get_covalent_radius("C")
+0.76
+>>> ar.get_vdw_radius("O")
+1.5
+>>> lookup = ar.lookup_vdw_radius("Pm")
+>>> lookup.value
+2.8972265395148358
+>>> lookup.source
+'transfer_linear'
+>>> lookup.resolved_from
+(DatasetRef(quantity='atomic_radius', set_id='rahm2016'),)
 ```
 
-## Public API split: `get_*` vs `lookup_*`
-
-- `get_*` returns only the selected numeric value, or `None`.
-- `lookup_*` returns the provenance-carrying `LookupResult` object.
+`get_*` returns only the number. `lookup_*` returns a `LookupResult` that also
+records where the value came from and whether a transfer model was involved.
 
-This follows the current `molcryst` pattern.
+You can inspect the packaged quantity and dataset catalog directly:
 
-## Current built-in quantities
-
-- `covalent_radius`
-- `van_der_waals_radius`
-- `atomic_radius` (support quantity; currently used for transfer from
-  `rahm2016`)
-
-You can inspect the packaged quantity layer directly:
-
-```python
-import atomref as ar
-
-print(ar.list_quantities())
-print(ar.get_quantity_info("atomic_radius"))
-print(ar.list_dataset_infos("van_der_waals_radius", usage_role="target"))
-print(ar.list_dataset_infos("atomic_radius", usage_role="support"))
+```pycon
+>>> import atomref as ar
+>>> ar.list_quantities()
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+>>> ar.get_quantity_info("atomic_radius")
+QuantityInfo(quantity='atomic_radius', domain='element', units='angstrom', description='Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data.')
+>>> [info.ref.set_id for info in ar.list_dataset_infos("van_der_waals_radius", usage_role="target")]
+['bondi1964', 'rowland_taylor1996', 'alvarez2013', 'chernyshov2020']
 ```
 
-You can also retrieve the packaged set object directly:
+You can also load a packaged set directly:
 
-```python
-import atomref as ar
+```pycon
+>>> import atomref as ar
+>>> vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
+>>> vdw.get("O")
+1.5
+>>> raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
+>>> raw.get("Pm")
+2.83
+```
 
-vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
-print(vdw.get("O"))
+## Notebook walkthroughs
 
-raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
-print(raw.get("Pm"))
-```
+The repository ships example notebooks for the main v0.1 workflows. In the
+documentation they are also available as rendered Markdown pages, so users can
+read them without opening Jupyter first.
 
-## Notebooks
+- [Notebook overview](https://delonecommons.github.io/atomref/guide/notebooks/)
+- [Quickstart notebook](https://delonecommons.github.io/atomref/notebooks/01-quickstart/)
+- [Policies and assessment notebook](https://delonecommons.github.io/atomref/notebooks/02-policies-and-assessment/)
+- [Custom sets and discovery notebook](https://delonecommons.github.io/atomref/notebooks/03-custom-sets-and-discovery/)
 
-Hands-on notebooks live in the repository and mirror the main v0.1 workflows:
+## Relationship to Delone Commons
 
-- [`01-quickstart.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
-- [`02-policies-and-assessment.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
-- [`03-custom-sets-and-discovery.ipynb`](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+`atomref` is designed as a standalone package, but within Delone Commons it is
+primarily intended to support chemistry-aware packages such as:
 
-Open them locally in Jupyter or browse them on GitHub for worked examples of
-lookup, transfer-backed policies, dataset discovery, and custom element-scalar
-sets.
+- `molcryst`, for covalent-bond detection and contact analysis,
+- future `chemvoro`, for chemistry-aware contact and hydrogen workflows.
 
-## Relationship to the Delone Commons ecosystem
+By contrast, `pyvoro2` and `pbcgraph` are intentionally general mathematical
+packages and are not direct consumers of `atomref`.
 
-`atomref` is intended to be reusable outside the surrounding ecosystem, but it
-fits naturally beneath:
+## Data curation and developer tools
 
-- `molcryst`
-- `pyvoro2`
-- `pbcgraph`
+The repository also ships small maintenance tools. The most important ones are:
 
-Those packages should consume atomic reference data from `atomref` rather than
-re-curating such datasets independently.
+- `python tools/check_registry.py` — validate curated registry metadata against
+  packaged CSV tables,
+- `python tools/check_notebooks.py` — execute notebook code cells,
+- `python tools/export_notebooks.py` — turn notebooks into Markdown pages for
+  the docs,
+- `python tools/gen_readme.py` — regenerate `README.md` from this page.
 
-For data-curation changes, validate the packaged registry against the bundled
-CSV tables with `python tools/check_registry.py`.
+See the [tools README](https://github.com/DeloneCommons/atomref/blob/main/tools/README.md)
+for a short description of each script.
diff --git a/docs/notebooks/01-quickstart.md b/docs/notebooks/01-quickstart.md
new file mode 100644
index 0000000..3a9f22b
--- /dev/null
+++ b/docs/notebooks/01-quickstart.md
@@ -0,0 +1,72 @@
+<!-- This file is generated from the matching notebook. -->
+<!-- Regenerate with: python tools/export_notebooks.py -->
+[Open the original notebook on GitHub](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
+# atomref quickstart
+
+This notebook covers the main public API in v0.1: element helpers, direct
+`get_*` calls, provenance-carrying `lookup_*` calls, and packaged dataset
+discovery.
+```python
+import atomref as ar
+
+print(ar.get_element('Cl'))
+print(ar.list_quantities())
+```
+**Output**
+```text
+Element(z=17, symbol='Cl', name='Chlorine')
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+```
+```python
+r_c = ar.get_covalent_radius('C')
+r_vdw = ar.get_vdw_radius('O')
+print(r_c)
+print(r_vdw)
+assert r_c == 0.76
+assert r_vdw == 1.50
+```
+**Output**
+```text
+0.76
+1.5
+```
+```python
+lookup = ar.lookup_vdw_radius('Pm')
+print(f"{lookup.value:.12f}")
+print(lookup.source)
+print(lookup.resolved_from)
+assert lookup.source == 'transfer_linear'
+```
+**Output**
+```text
+2.897226539515
+transfer_linear
+(DatasetRef(quantity='atomic_radius', set_id='rahm2016'),)
+```
+```python
+quantity = ar.get_quantity_info('atomic_radius')
+print(quantity.quantity, quantity.domain, quantity.units)
+
+for info in ar.list_dataset_infos('van_der_waals_radius', usage_role='target'):
+    print(info.ref.set_id, info.name, info.usage_role)
+```
+**Output**
+```text
+atomic_radius element angstrom
+bondi1964 Bondi van der Waals radii target
+rowland_taylor1996 Rowland & Taylor nonbonded contact radii target
+alvarez2013 Alvarez van der Waals radii target
+chernyshov2020 Chernyshov LoS van der Waals radii target
+```
+```python
+vdw = ar.get_radii_set('van_der_waals', 'alvarez2013')
+print(vdw.get('O'))
+
+support = ar.get_builtin_set(ar.DatasetRef('atomic_radius', 'rahm2016'))
+print(support.get('Pm'))
+```
+**Output**
+```text
+1.5
+2.83
+```
diff --git a/docs/notebooks/02-policies-and-assessment.md b/docs/notebooks/02-policies-and-assessment.md
new file mode 100644
index 0000000..4f6baf6
--- /dev/null
+++ b/docs/notebooks/02-policies-and-assessment.md
@@ -0,0 +1,73 @@
+<!-- This file is generated from the matching notebook. -->
+<!-- Regenerate with: python tools/export_notebooks.py -->
+[Open the original notebook on GitHub](https://github.com/DeloneCommons/atomref/blob/main/notebooks/02-policies-and-assessment.ipynb)
+# Policies and assessment
+
+This notebook shows how `atomref` resolves missing values through ordered
+policy steps and how to inspect policy-level behavior.
+```python
+import atomref as ar
+```
+```python
+covalent_policy = ar.RadiiPolicy(
+    kind='covalent',
+    base_set='cordero2008',
+    transfers=(
+        ar.SubstitutionTransfer(
+            source=ar.DatasetRef('covalent_radius', 'csd_legacy_cov')
+        ),
+    ),
+)
+lookup = ar.lookup_covalent_radius('Bk', policy=covalent_policy)
+print(lookup.source)
+print(f"{lookup.value:.12f}")
+print(lookup.resolved_from)
+```
+**Output**
+```text
+transfer_substitution
+1.540000000000
+(DatasetRef(quantity='covalent_radius', set_id='csd_legacy_cov'),)
+```
+```python
+vdw_policy = ar.RadiiPolicy(
+    kind='van_der_waals',
+    base_set='alvarez2013',
+    transfers=(
+        ar.LinearTransfer(
+            predictors=(ar.DatasetRef('atomic_radius', 'rahm2016'),)
+        ),
+    ),
+)
+lookup = ar.lookup_vdw_radius('Pm', policy=vdw_policy)
+print(f"{lookup.value:.12f}")
+print(lookup.source)
+print(
+    f"slope={lookup.fit.coefficients[0]:.12f} intercept={lookup.fit.intercept:.12f} n={lookup.fit.n_points}"
+)
+```
+**Output**
+```text
+2.897226539515
+transfer_linear
+slope=1.135336645553 intercept=-0.315776167399 n=90
+```
+```python
+assessment = ar.assess_radii_policy(
+    ['C', 'Xe', 'Pm', 'Bk'],
+    policy=vdw_policy,
+    detail=True,
+)
+print(assessment.n_base, assessment.n_transfer_linear, assessment.n_missing)
+for row in assessment.per_element:
+    value = 'None' if row.lookup.value is None else f"{row.lookup.value:.12f}"
+    print(row.symbol, row.lookup.source, value)
+```
+**Output**
+```text
+3 1 0
+C base 1.770000000000
+Xe base 2.060000000000
+Pm transfer_linear 2.897226539515
+Bk base 3.400000000000
+```
diff --git a/docs/notebooks/03-custom-sets-and-discovery.md b/docs/notebooks/03-custom-sets-and-discovery.md
new file mode 100644
index 0000000..51dc5e2
--- /dev/null
+++ b/docs/notebooks/03-custom-sets-and-discovery.md
@@ -0,0 +1,56 @@
+<!-- This file is generated from the matching notebook. -->
+<!-- Regenerate with: python tools/export_notebooks.py -->
+[Open the original notebook on GitHub](https://github.com/DeloneCommons/atomref/blob/main/notebooks/03-custom-sets-and-discovery.ipynb)
+# Custom sets and dataset discovery
+
+This notebook shows how to define a small user-provided set, plug it into a
+policy, and inspect the packaged dataset catalog.
+```python
+import atomref as ar
+```
+```python
+custom_cov = ar.ElementScalarSet.from_mapping(
+    ref=ar.DatasetRef("covalent_radius", "demo_user_cov"),
+    values={"C": 0.77, "O": 0.67},
+    name="Demo user covalent set",
+    units="angstrom",
+    description="Example custom set for notebook usage.",
+    notes=("Notebook example",),
+)
+
+policy = ar.RadiiPolicy(
+    kind="covalent",
+    base_set=custom_cov,
+    transfers=(
+        ar.SubstitutionTransfer(
+            source=ar.DatasetRef("covalent_radius", "cordero2008")
+        ),
+    ),
+)
+
+for symbol in ("C", "O", "N"):
+    print(symbol, ar.lookup_covalent_radius(symbol, policy=policy))
+```
+**Output**
+```text
+C LookupResult(value=0.77, source='base', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'),), is_placeholder=False, fit=None, notes=())
+O LookupResult(value=0.67, source='base', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'),), is_placeholder=False, fit=None, notes=())
+N LookupResult(value=0.71, source='transfer_substitution', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='cordero2008'),), is_placeholder=False, fit=None, notes=('missing in base set; substituted from transfer source',))
+```
+```python
+for info in ar.list_radii_set_infos("van_der_waals", usage_role="target"):
+    print(info.ref.set_id, info.semantic_class, info.origin_class, info.phase_context)
+
+rahm = ar.get_dataset_info(ar.DatasetRef("atomic_radius", "rahm2016"))
+print(rahm.name)
+print(rahm.semantic_class, rahm.phase_context, rahm.usage_role)
+```
+**Output**
+```text
+bondi1964 vdw_compiled compiled_experimental mixed_or_legacy
+rowland_taylor1996 vdw_structural structural condensed_phase
+alvarez2013 vdw_structural structural condensed_phase
+chernyshov2020 vdw_structural_typed_reduced structural condensed_phase
+Rahm isodensity atomic radii (ρ=0.001 e/bohr³)
+atomic_isodensity isolated_atom support
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 2a97e1a..c3e560c 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -20,7 +20,6 @@ nav:
   - Guide:
       - Install: guide/install.md
       - Quickstart: guide/quickstart.md
-      - Notebooks: guide/notebooks.md
       - Policies: guide/policies.md
       - Custom sets: guide/custom_sets.md
       - Non-goals: guide/non_goals.md
@@ -29,6 +28,11 @@ nav:
       - Covalent radius: datasets/covalent_radius.md
       - van der Waals radius: datasets/van_der_waals_radius.md
       - Atomic radius: datasets/atomic_radius.md
+  - Notebooks:
+      - Overview: guide/notebooks.md
+      - Quickstart notebook: notebooks/01-quickstart.md
+      - Policies and assessment notebook: notebooks/02-policies-and-assessment.md
+      - Custom sets and discovery notebook: notebooks/03-custom-sets-and-discovery.md
   - Development:
       - Architecture: dev/architecture.md
       - Data curation: dev/data_curation.md
@@ -36,3 +40,8 @@ nav:
   - API:
       - Overview: api/index.md
       - atomref: api/atomref.md
+      - atomref.elements: api/elements.md
+      - atomref.registry: api/registry.md
+      - atomref.transfer: api/transfer.md
+      - atomref.policy: api/policy.md
+      - atomref.radii: api/radii.md
diff --git a/notebooks/01-quickstart.ipynb b/notebooks/01-quickstart.ipynb
index 2c09cc0..6d6d16f 100644
--- a/notebooks/01-quickstart.ipynb
+++ b/notebooks/01-quickstart.ipynb
@@ -1,77 +1,93 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# atomref quickstart\n",
-        "\n",
-        "This notebook covers the basic public API: element helpers, direct `get_*` calls, provenance-carrying `lookup_*` calls, and quantity / dataset discovery.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import atomref as ar\n",
-        "\n",
-        "print(ar.get_element(\"Cl\"))\n",
-        "print(ar.list_quantities())\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "r_c = ar.get_covalent_radius(\"C\")\n",
-        "r_vdw = ar.get_vdw_radius(\"O\")\n",
-        "print(r_c)\n",
-        "print(r_vdw)\n",
-        "assert r_c == 0.76\n",
-        "assert r_vdw == 1.50\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "lookup = ar.lookup_vdw_radius(\"Pm\")\n",
-        "print(lookup)\n",
-        "print(lookup.value)\n",
-        "print(lookup.source)\n",
-        "print(lookup.resolved_from)\n",
-        "assert lookup.source == \"transfer_linear\"\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "print(ar.get_quantity_info(\"atomic_radius\"))\n",
-        "for info in ar.list_dataset_infos(\"van_der_waals_radius\", usage_role=\"target\"):\n",
-        "    print(info.ref.set_id, info.semantic_class, info.origin_class)\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# atomref quickstart\n",
+    "\n",
+    "This notebook covers the main public API in v0.1: element helpers, direct\n",
+    "`get_*` calls, provenance-carrying `lookup_*` calls, and packaged dataset\n",
+    "discovery.\n"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import atomref as ar\n",
+    "\n",
+    "print(ar.get_element('Cl'))\n",
+    "print(ar.list_quantities())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r_c = ar.get_covalent_radius('C')\n",
+    "r_vdw = ar.get_vdw_radius('O')\n",
+    "print(r_c)\n",
+    "print(r_vdw)\n",
+    "assert r_c == 0.76\n",
+    "assert r_vdw == 1.50\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lookup = ar.lookup_vdw_radius('Pm')\n",
+    "print(f\"{lookup.value:.12f}\")\n",
+    "print(lookup.source)\n",
+    "print(lookup.resolved_from)\n",
+    "assert lookup.source == 'transfer_linear'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quantity = ar.get_quantity_info('atomic_radius')\n",
+    "print(quantity.quantity, quantity.domain, quantity.units)\n",
+    "\n",
+    "for info in ar.list_dataset_infos('van_der_waals_radius', usage_role='target'):\n",
+    "    print(info.ref.set_id, info.name, info.usage_role)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vdw = ar.get_radii_set('van_der_waals', 'alvarez2013')\n",
+    "print(vdw.get('O'))\n",
+    "\n",
+    "support = ar.get_builtin_set(ar.DatasetRef('atomic_radius', 'rahm2016'))\n",
+    "print(support.get('Pm'))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/notebooks/02-policies-and-assessment.ipynb b/notebooks/02-policies-and-assessment.ipynb
index 7db7e45..dfe2678 100644
--- a/notebooks/02-policies-and-assessment.ipynb
+++ b/notebooks/02-policies-and-assessment.ipynb
@@ -1,96 +1,97 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# Policies and assessment\n",
-        "\n",
-        "This notebook shows how `atomref` resolves missing values through ordered transfer steps and how to inspect policy-level behavior.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import atomref as ar\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "covalent_policy = ar.RadiiPolicy(\n",
-        "    kind=\"covalent\",\n",
-        "    base_set=\"cordero2008\",\n",
-        "    transfers=(\n",
-        "        ar.SubstitutionTransfer(\n",
-        "            source=ar.DatasetRef(\"covalent_radius\", \"csd_legacy_cov\")\n",
-        "        ),\n",
-        "    ),\n",
-        ")\n",
-        "\n",
-        "lookup_bk = ar.lookup_covalent_radius(\"Bk\", policy=covalent_policy)\n",
-        "print(lookup_bk)\n",
-        "assert lookup_bk.source == \"transfer_substitution\"\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "vdw_policy = ar.RadiiPolicy(\n",
-        "    kind=\"van_der_waals\",\n",
-        "    base_set=\"alvarez2013\",\n",
-        "    transfers=(\n",
-        "        ar.LinearTransfer(\n",
-        "            predictors=(ar.DatasetRef(\"atomic_radius\", \"rahm2016\"),)\n",
-        "        ),\n",
-        "    ),\n",
-        ")\n",
-        "\n",
-        "lookup_pm = ar.lookup_vdw_radius(\"Pm\", policy=vdw_policy)\n",
-        "print(lookup_pm.fit)\n",
-        "print(lookup_pm.value)\n",
-        "assert lookup_pm.source == \"transfer_linear\"\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "assessment = ar.assess_radii_policy(\n",
-        "    [\"C\", \"Xe\", \"Pm\", \"Bk\"],\n",
-        "    policy=vdw_policy,\n",
-        "    detail=True,\n",
-        ")\n",
-        "\n",
-        "print(assessment)\n",
-        "print(assessment.n_base, assessment.n_transfer_linear)\n",
-        "for item in assessment.per_element:\n",
-        "    print(item.symbol, item.lookup.source)\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Policies and assessment\n",
+    "\n",
+    "This notebook shows how `atomref` resolves missing values through ordered\n",
+    "policy steps and how to inspect policy-level behavior.\n"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import atomref as ar\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "covalent_policy = ar.RadiiPolicy(\n",
+    "    kind='covalent',\n",
+    "    base_set='cordero2008',\n",
+    "    transfers=(\n",
+    "        ar.SubstitutionTransfer(\n",
+    "            source=ar.DatasetRef('covalent_radius', 'csd_legacy_cov')\n",
+    "        ),\n",
+    "    ),\n",
+    ")\n",
+    "lookup = ar.lookup_covalent_radius('Bk', policy=covalent_policy)\n",
+    "print(lookup.source)\n",
+    "print(f\"{lookup.value:.12f}\")\n",
+    "print(lookup.resolved_from)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vdw_policy = ar.RadiiPolicy(\n",
+    "    kind='van_der_waals',\n",
+    "    base_set='alvarez2013',\n",
+    "    transfers=(\n",
+    "        ar.LinearTransfer(\n",
+    "            predictors=(ar.DatasetRef('atomic_radius', 'rahm2016'),)\n",
+    "        ),\n",
+    "    ),\n",
+    ")\n",
+    "lookup = ar.lookup_vdw_radius('Pm', policy=vdw_policy)\n",
+    "print(f\"{lookup.value:.12f}\")\n",
+    "print(lookup.source)\n",
+    "print(\n",
+    "    f\"slope={lookup.fit.coefficients[0]:.12f} intercept={lookup.fit.intercept:.12f} n={lookup.fit.n_points}\"\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assessment = ar.assess_radii_policy(\n",
+    "    ['C', 'Xe', 'Pm', 'Bk'],\n",
+    "    policy=vdw_policy,\n",
+    "    detail=True,\n",
+    ")\n",
+    "print(assessment.n_base, assessment.n_transfer_linear, assessment.n_missing)\n",
+    "for row in assessment.per_element:\n",
+    "    value = 'None' if row.lookup.value is None else f\"{row.lookup.value:.12f}\"\n",
+    "    print(row.symbol, row.lookup.source, value)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/notebooks/03-custom-sets-and-discovery.ipynb b/notebooks/03-custom-sets-and-discovery.ipynb
index 827c91f..58f9d92 100644
--- a/notebooks/03-custom-sets-and-discovery.ipynb
+++ b/notebooks/03-custom-sets-and-discovery.ipynb
@@ -6,7 +6,8 @@
       "source": [
         "# Custom sets and dataset discovery\n",
         "\n",
-        "This notebook shows how to define a small user-provided set, plug it into a policy, and inspect the packaged dataset catalog.\n"
+        "This notebook shows how to define a small user-provided set, plug it into a\n",
+        "policy, and inspect the packaged dataset catalog.\n"
       ]
     },
     {
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
index 815b42c..6104c9d 100644
--- a/src/atomref/__init__.py
+++ b/src/atomref/__init__.py
@@ -1,3 +1,5 @@
+"""Public package exports for :mod:`atomref`."""
+
 from .__about__ import __version__
 from .elements import (
     Element,
diff --git a/src/atomref/elements.py b/src/atomref/elements.py
index 42f0598..5245b80 100644
--- a/src/atomref/elements.py
+++ b/src/atomref/elements.py
@@ -1,4 +1,4 @@
-"""Periodic table access for stable element identity."""
+"""Periodic-table access for stable element identity."""
 
 from __future__ import annotations
 
@@ -9,13 +9,13 @@
 from importlib import resources
 
 
-_MISSING_TOKENS = {'', '?', '.'}
-_LEADING_ALPHA_RE = re.compile(r'([A-Za-z]{1,3})')
+_MISSING_TOKENS = {"", "?", "."}
+_LEADING_ALPHA_RE = re.compile(r"([A-Za-z]{1,3})")
 
 
 @dataclass(frozen=True, slots=True)
 class Element:
-    """Chemical element identity."""
+    """Chemical element identity keyed by atomic number and symbol."""
 
     z: int
     symbol: str
@@ -23,6 +23,8 @@ class Element:
 
 
 def _normalize_element_token(token: str | None) -> str | None:
+    """Strip quotes and obvious missing-value markers from a token."""
+
     if token is None:
         return None
 
@@ -43,7 +45,12 @@ def _normalize_element_token(token: str | None) -> str | None:
 
 
 def canonicalize_element_symbol(token: str | None) -> str | None:
-    """Canonicalize a free-form element token."""
+    """Canonicalize a free-form token to a conventional element symbol.
+
+    The function accepts strings such as ``"cl"``, ``" Cl "`` or
+    ``"Cl12"`` and returns ``"Cl"`` when a leading element-like token can be
+    identified. Missing-value markers and non-element strings return ``None``.
+    """
 
     raw = _normalize_element_token(token)
     if raw is None:
@@ -59,25 +66,29 @@ def canonicalize_element_symbol(token: str | None) -> str | None:
 
 @lru_cache(maxsize=1)
 def _load_elements_by_symbol() -> dict[str, Element]:
-    table_path = resources.files('atomref.data').joinpath('periodic_table.csv')
-    with table_path.open('r', encoding='utf-8', newline='') as handle:
+    """Load the packaged periodic table into a symbol-keyed mapping."""
+
+    table_path = resources.files("atomref.data").joinpath("periodic_table.csv")
+    with table_path.open("r", encoding="utf-8", newline="") as handle:
         reader = csv.DictReader(handle)
         out: dict[str, Element] = {}
         for row in reader:
-            z = int(row['z'])
-            symbol = row['symbol']
-            name = row['name']
+            z = int(row["z"])
+            symbol = row["symbol"]
+            name = row["name"]
             out[symbol] = Element(z=z, symbol=symbol, name=name)
     return out
 
 
 @lru_cache(maxsize=1)
 def _elements_in_z_order() -> tuple[Element, ...]:
+    """Return packaged elements sorted by increasing atomic number."""
+
     return tuple(sorted(_load_elements_by_symbol().values(), key=lambda e: e.z))
 
 
 def is_valid_element_symbol(symbol: str | None) -> bool:
-    """Return ``True`` if ``symbol`` is a known element symbol."""
+    """Return ``True`` if ``symbol`` is a known packaged element symbol."""
 
     if symbol is None:
         return False
@@ -85,7 +96,7 @@ def is_valid_element_symbol(symbol: str | None) -> bool:
 
 
 def get_element(symbol: str | None) -> Element | None:
-    """Look up element identity by symbol or free-form token."""
+    """Look up packaged element identity from a symbol-like token."""
 
     sym = canonicalize_element_symbol(symbol)
     if sym is None:
diff --git a/src/atomref/errors.py b/src/atomref/errors.py
index 1922cf5..d31660a 100644
--- a/src/atomref/errors.py
+++ b/src/atomref/errors.py
@@ -1,9 +1,12 @@
+"""Package-local exceptions used across :mod:`atomref`."""
+
+
 class AtomrefError(Exception):
-    """Base package error."""
+    """Base class for package-defined errors."""
 
 
 class DatasetError(AtomrefError):
-    """Packaged dataset or registry error."""
+    """Raised when packaged data or registry metadata are invalid."""
 
 
 class MissingValueError(AtomrefError):
@@ -11,4 +14,4 @@ class MissingValueError(AtomrefError):
 
 
 class PolicyError(AtomrefError):
-    """Raised for invalid policy configuration."""
+    """Raised for invalid policy configuration or transfer resolution."""
diff --git a/src/atomref/policy.py b/src/atomref/policy.py
index 5b242e2..36741fe 100644
--- a/src/atomref/policy.py
+++ b/src/atomref/policy.py
@@ -34,6 +34,12 @@
 
 @dataclass(frozen=True, slots=True)
 class LookupResult:
+    """Result of resolving one value through a policy.
+
+    ``value`` carries the final scalar value when one could be produced, while
+    ``source`` and the remaining metadata explain how that value was obtained.
+    """
+
     value: float | None
     source: LookupSource
     target: DatasetRef
@@ -43,6 +49,8 @@ class LookupResult:
     notes: tuple[str, ...] = ()
 
     def __float__(self) -> float:
+        """Coerce the resolved value to ``float`` or raise if it is missing."""
+
         if self.value is None:
             raise TypeError("reference value is missing")
         return float(self.value)
@@ -50,6 +58,8 @@ def __float__(self) -> float:
 
 @dataclass(frozen=True, slots=True)
 class ValuePolicy(Generic[K]):
+    """Ordered rule set for resolving element-domain scalar values."""
+
     base: DatasetLike
     transfers: tuple[TransferModel, ...] = ()
     overrides: Mapping[K, float] = field(default_factory=dict)
@@ -57,6 +67,11 @@ class ValuePolicy(Generic[K]):
 
 
 def _normalize_element_symbol(symbol: str | None) -> str | None:
+    """Normalize user input to a packaged element symbol.
+
+    The current resolver treats ``D`` and ``T`` as hydrogen aliases.
+    """
+
     cand = canonicalize_element_symbol(symbol)
     if cand in {"D", "T"}:
         cand = "H"
@@ -68,6 +83,8 @@ def _normalize_element_symbol(symbol: str | None) -> str | None:
 
 
 def _resolve_target_ref(policy: ValuePolicy[object]) -> DatasetRef:
+    """Return the target dataset reference implied by a policy base."""
+
     return resolve_dataset_like(policy.base).ref
 
 
@@ -78,6 +95,8 @@ def _fit_linear_transfer(
     min_points: int,
     exclude_placeholders: bool,
 ) -> LinearFit:
+    """Fit a one-predictor linear transfer model between two datasets."""
+
     xs: list[float] = []
     ys: list[float] = []
 
@@ -133,6 +152,8 @@ def _fit_linear_transfer_cached(
     min_points: int,
     exclude_placeholders: bool,
 ) -> LinearFit:
+    """Cache fits between two packaged datasets for repeated reuse."""
+
     return _fit_linear_transfer(
         get_builtin_set(base_ref),
         get_builtin_set(predictor_ref),
@@ -142,6 +163,8 @@ def _fit_linear_transfer_cached(
 
 
 def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit | None:
+    """Return the fit object for a transfer model when it needs one."""
+
     if not isinstance(transfer, LinearTransfer):
         return None
     if len(transfer.predictors) != 1:
@@ -150,7 +173,10 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
     predictor = transfer.predictors[0]
     if isinstance(base, DatasetRef) and isinstance(predictor, DatasetRef):
         return _fit_linear_transfer_cached(
-            base, predictor, transfer.min_points, transfer.exclude_placeholders
+            base,
+            predictor,
+            transfer.min_points,
+            transfer.exclude_placeholders,
         )
     return _fit_linear_transfer(
         resolve_dataset_like(base),
@@ -161,8 +187,13 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
 
 
 def _apply_substitution_transfer(
-    symbol: str, *, target: DatasetRef, transfer: SubstitutionTransfer
+    symbol: str,
+    *,
+    target: DatasetRef,
+    transfer: SubstitutionTransfer,
 ) -> tuple[LookupResult | None, str | None]:
+    """Try to resolve ``symbol`` by direct substitution from another dataset."""
+
     source_set = resolve_dataset_like(transfer.source)
     value = source_set.get(symbol)
     if value is None:
@@ -182,8 +213,14 @@ def _apply_substitution_transfer(
 
 
 def _apply_linear_transfer(
-    symbol: str, *, base: DatasetLike, target: DatasetRef, transfer: LinearTransfer
+    symbol: str,
+    *,
+    base: DatasetLike,
+    target: DatasetRef,
+    transfer: LinearTransfer,
 ) -> tuple[LookupResult | None, str | None]:
+    """Try to resolve ``symbol`` through linear transfer from predictor data."""
+
     if len(transfer.predictors) != 1:
         raise PolicyError("v0.1 LinearTransfer supports exactly one predictor dataset")
 
@@ -194,7 +231,8 @@ def _apply_linear_transfer(
     predictor_f = float(predictor_value)
 
     if transfer.exclude_placeholders and _is_placeholder_value(
-        predictor_set.info, predictor_f
+        predictor_set.info,
+        predictor_f,
     ):
         return None, f"predictor value in {predictor_set.ref.set_id} is a placeholder"
 
@@ -217,6 +255,8 @@ def _apply_linear_transfer(
 
 
 def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
+    """Resolve a value through override, base, transfer, and fallback steps."""
+
     target = _resolve_target_ref(policy)
     base_set = resolve_dataset_like(policy.base)
     if base_set.info.domain != "element":
@@ -251,11 +291,16 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
     for transfer in policy.transfers:
         if isinstance(transfer, SubstitutionTransfer):
             result, note = _apply_substitution_transfer(
-                sym, target=target, transfer=transfer
+                sym,
+                target=target,
+                transfer=transfer,
             )
         elif isinstance(transfer, LinearTransfer):
             result, note = _apply_linear_transfer(
-                sym, base=policy.base, target=target, transfer=transfer
+                sym,
+                base=policy.base,
+                target=target,
+                transfer=transfer,
             )
         else:  # pragma: no cover - closed union today
             raise PolicyError(f"unsupported transfer model: {type(transfer)!r}")
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index cda8a89..01f13f4 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -32,6 +32,12 @@
 
 @dataclass(frozen=True, slots=True)
 class RadiiPolicy:
+    """Policy wrapper specialized for radii lookup.
+
+    ``kind`` determines the target quantity, while the remaining fields mirror
+    the generic :class:`atomref.policy.ValuePolicy` interface.
+    """
+
     kind: RadiiKind
     base_set: str | RadiiSet
     transfers: tuple[TransferModel, ...] = ()
@@ -39,6 +45,8 @@ class RadiiPolicy:
     fallback: float | None = None
 
     def as_value_policy(self) -> ValuePolicy[str]:
+        """Convert the radii policy into the generic scalar-value policy."""
+
         quantity = _quantity_for_kind(self.kind)
         if isinstance(self.base_set, ElementScalarSet):
             if self.base_set.ref.quantity != quantity:
@@ -68,12 +76,16 @@ def as_value_policy(self) -> ValuePolicy[str]:
 
 @dataclass(frozen=True, slots=True)
 class RadiiElementAssessment:
+    """Per-element row in a radii policy assessment report."""
+
     symbol: str
     lookup: LookupResult
 
 
 @dataclass(frozen=True, slots=True)
 class RadiiPolicyAssessment:
+    """Summary of how a radii policy behaved over a set of elements."""
+
     kind: RadiiKind
     policy: RadiiPolicy
     elements: tuple[str, ...]
@@ -96,6 +108,8 @@ class RadiiPolicyAssessment:
 
 
 def _quantity_for_kind(kind: RadiiKind) -> str:
+    """Translate public radii kind names into registry quantity ids."""
+
     try:
         return _KIND_TO_QUANTITY[kind]
     except KeyError as exc:
@@ -103,6 +117,8 @@ def _quantity_for_kind(kind: RadiiKind) -> str:
 
 
 def _normalize_radii_symbol(symbol: str | None) -> str | None:
+    """Normalize symbols accepted by the radii convenience layer."""
+
     cand = canonicalize_element_symbol(symbol)
     if cand in {"D", "T"}:
         cand = "H"
@@ -110,6 +126,8 @@ def _normalize_radii_symbol(symbol: str | None) -> str | None:
 
 
 def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
+    """Normalize, validate, deduplicate, and sort assessment element labels."""
+
     symbols: set[str] = set()
     for token in elements:
         sym = _normalize_radii_symbol(token)
@@ -124,65 +142,102 @@ def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
 
 
 def list_radii_sets(
-    kind: RadiiKind, *, usage_role: str | None = None
+    kind: RadiiKind,
+    *,
+    usage_role: str | None = None,
 ) -> tuple[str, ...]:
+    """List packaged radii-set ids for one radii kind."""
+
     return list_dataset_ids(_quantity_for_kind(kind), usage_role=usage_role)
 
 
 def list_radii_set_infos(
-    kind: RadiiKind, *, usage_role: str | None = None
+    kind: RadiiKind,
+    *,
+    usage_role: str | None = None,
 ) -> tuple[DatasetInfo, ...]:
+    """Return packaged metadata objects for radii sets of one kind."""
+
     return list_dataset_infos(_quantity_for_kind(kind), usage_role=usage_role)
 
 
 def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
+    """Return metadata for one packaged radii set."""
+
     return get_dataset_info(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
 def get_radii_set(kind: RadiiKind, set_id: str) -> RadiiSet:
+    """Load one packaged radii set as an :class:`ElementScalarSet`."""
+
     return get_builtin_set(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
 def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
+    """Raise when a policy is used with the wrong public radii helper."""
+
     if policy.kind != expected:
         raise PolicyError(f"expected a {expected!r} radii policy, got {policy.kind!r}")
 
 
 def _lookup_radius(symbol: str | None, *, policy: RadiiPolicy) -> LookupResult:
+    """Shared implementation for radii lookup helpers."""
+
     return _resolve_value(symbol, policy=policy.as_value_policy())
 
 
 def lookup_covalent_radius(
-    symbol: str | None, *, policy: RadiiPolicy | None = None
+    symbol: str | None,
+    *,
+    policy: RadiiPolicy | None = None,
 ) -> LookupResult:
+    """Resolve a covalent radius together with provenance information."""
+
     active = DEFAULT_COVALENT_POLICY if policy is None else policy
     _validate_policy_kind(active, expected="covalent")
     return _lookup_radius(symbol, policy=active)
 
 
 def get_covalent_radius(
-    symbol: str | None, *, policy: RadiiPolicy | None = None
+    symbol: str | None,
+    *,
+    policy: RadiiPolicy | None = None,
 ) -> float | None:
+    """Return only the selected covalent-radius value, without provenance."""
+
     return lookup_covalent_radius(symbol, policy=policy).value
 
 
 def lookup_vdw_radius(
-    symbol: str | None, *, policy: RadiiPolicy | None = None
+    symbol: str | None,
+    *,
+    policy: RadiiPolicy | None = None,
 ) -> LookupResult:
+    """Resolve a van der Waals radius together with provenance information."""
+
     active = DEFAULT_VDW_POLICY if policy is None else policy
     _validate_policy_kind(active, expected="van_der_waals")
     return _lookup_radius(symbol, policy=active)
 
 
 def get_vdw_radius(
-    symbol: str | None, *, policy: RadiiPolicy | None = None
+    symbol: str | None,
+    *,
+    policy: RadiiPolicy | None = None,
 ) -> float | None:
+    """Return only the selected van der Waals radius, without provenance."""
+
     return lookup_vdw_radius(symbol, policy=policy).value
 
 
 def assess_radii_policy(
-    elements: Iterable[str], *, policy: RadiiPolicy, detail: bool = False
+    elements: Iterable[str],
+    *,
+    policy: RadiiPolicy,
+    detail: bool = False,
 ) -> RadiiPolicyAssessment:
+    """Assess how a radii policy resolves values over a set of elements."""
+
     elems = _normalize_assessment_elements(elements)
     value_policy = policy.as_value_policy()
 
@@ -260,9 +315,11 @@ def assess_radii_policy(
         SubstitutionTransfer(source=DatasetRef("covalent_radius", "csd_legacy_cov")),
     ),
 )
+"""Default covalent-radii policy used by the convenience helpers."""
 
 DEFAULT_VDW_POLICY = RadiiPolicy(
     kind="van_der_waals",
     base_set="alvarez2013",
     transfers=(LinearTransfer(predictors=(DatasetRef("atomic_radius", "rahm2016"),)),),
 )
+"""Default vdW-radii policy used by the convenience helpers."""
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index c465786..594e98e 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -5,9 +5,9 @@
 from collections.abc import Iterable, Mapping
 from dataclasses import dataclass
 import csv
-import json
 from functools import lru_cache
 from importlib import resources
+import json
 
 from .elements import canonicalize_element_symbol, get_element, iter_elements
 from .errors import DatasetError
@@ -18,12 +18,20 @@
 
 @dataclass(frozen=True, slots=True)
 class DatasetRef:
+    """Stable reference to a packaged dataset.
+
+    The ``quantity`` identifies the operational property family, while
+    ``set_id`` names a specific curated dataset within that family.
+    """
+
     quantity: QuantityId
     set_id: str
 
 
 @dataclass(frozen=True, slots=True)
 class Reference:
+    """Bibliographic record attached to packaged dataset metadata."""
+
     authors: str | None = None
     year: int | None = None
     title: str | None = None
@@ -36,6 +44,8 @@ class Reference:
 
 @dataclass(frozen=True, slots=True)
 class CoverageInfo:
+    """Coverage summary for an element-indexed scalar dataset."""
+
     n_values: int
     z_min: int | None = None
     z_max: int | None = None
@@ -46,6 +56,8 @@ class CoverageInfo:
 
 @dataclass(frozen=True, slots=True)
 class QuantityInfo:
+    """Metadata shared by all datasets that belong to one quantity."""
+
     quantity: QuantityId
     domain: DomainId
     units: str | None = None
@@ -54,6 +66,13 @@ class QuantityInfo:
 
 @dataclass(frozen=True, slots=True)
 class DatasetInfo:
+    """Curated metadata for one packaged dataset.
+
+    This object keeps operational classification such as ``ref.quantity`` and
+    ``usage_role`` separate from scientific classification such as
+    ``semantic_class`` and ``phase_context``.
+    """
+
     ref: DatasetRef
     domain: DomainId
     units: str | None
@@ -75,6 +94,8 @@ class DatasetInfo:
 
 @dataclass(frozen=True, slots=True)
 class ElementScalarSet:
+    """Element-indexed scalar dataset stored densely by atomic number."""
+
     ref: DatasetRef
     info: DatasetInfo
     values_by_z: tuple[float | None, ...]
@@ -96,6 +117,8 @@ def from_mapping(
         notes: Iterable[str] = (),
         placeholder_value: float | None = None,
     ) -> "ElementScalarSet":
+        """Build a custom element-domain dataset from a symbol-keyed mapping."""
+
         n_z = max(e.z for e in iter_elements())
         values_by_z: list[float | None] = [None] * (n_z + 1)
 
@@ -143,6 +166,8 @@ def from_mapping(
         return cls(ref=ref, info=info, values_by_z=tuple(values_by_z))
 
     def get(self, symbol: str | None) -> float | None:
+        """Return the scalar value for ``symbol`` or ``None`` if absent."""
+
         sym = _normalize_element_domain_symbol(symbol)
         elem = get_element(sym)
         if elem is None:
@@ -154,6 +179,8 @@ def get(self, symbol: str | None) -> float | None:
 
 
 def _normalize_element_domain_symbol(symbol: str | None) -> str | None:
+    """Normalize element-domain symbols and fold D/T onto hydrogen."""
+
     cand = canonicalize_element_symbol(symbol)
     if cand in {"D", "T"}:
         return "H"
@@ -162,6 +189,8 @@ def _normalize_element_domain_symbol(symbol: str | None) -> str | None:
 
 @lru_cache(maxsize=1)
 def _load_registry_json() -> dict[str, object]:
+    """Load the packaged registry JSON as a validated top-level mapping."""
+
     path = resources.files("atomref.data").joinpath("registry.json")
     with path.open("r", encoding="utf-8") as handle:
         data = json.load(handle)
@@ -171,6 +200,8 @@ def _load_registry_json() -> dict[str, object]:
 
 
 def _get_quantities_mapping() -> Mapping[str, object]:
+    """Return the raw ``quantities`` mapping from ``registry.json``."""
+
     quantities = _load_registry_json().get("quantities")
     if not isinstance(quantities, dict):
         raise DatasetError("invalid registry.json: missing quantities mapping")
@@ -178,6 +209,8 @@ def _get_quantities_mapping() -> Mapping[str, object]:
 
 
 def _get_datasets_mapping() -> Mapping[str, object]:
+    """Return the raw ``datasets`` mapping from ``registry.json``."""
+
     datasets = _load_registry_json().get("datasets")
     if not isinstance(datasets, dict):
         raise DatasetError("invalid registry.json: missing datasets mapping")
@@ -185,6 +218,8 @@ def _get_datasets_mapping() -> Mapping[str, object]:
 
 
 def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
+    """Return the dataset table for one quantity or raise on unknown input."""
+
     datasets = _get_datasets_mapping().get(quantity)
     if not isinstance(datasets, dict):
         raise DatasetError(f"unknown quantity: {quantity!r}")
@@ -192,10 +227,14 @@ def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
 
 
 def list_quantities() -> tuple[str, ...]:
+    """List packaged quantity identifiers in registry order."""
+
     return tuple(_get_quantities_mapping().keys())
 
 
 def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
+    """Return quantity-level metadata for a packaged quantity."""
+
     raw = _get_quantities_mapping().get(quantity)
     if not isinstance(raw, dict):
         raise DatasetError(f"unknown quantity: {quantity!r}")
@@ -207,15 +246,22 @@ def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
         raw.get("description") if isinstance(raw.get("description"), str) else None
     )
     return QuantityInfo(
-        quantity=quantity, domain=domain, units=units, description=description
+        quantity=quantity,
+        domain=domain,
+        units=units,
+        description=description,
     )
 
 
 def _canonicalize_alias_token(value: str) -> str:
+    """Normalize a dataset id or alias for case-insensitive comparison."""
+
     return " ".join(value.strip().lower().split())
 
 
 def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
+    """Resolve a dataset id or alias to its canonical packaged set id."""
+
     by_quantity = _datasets_for_quantity(quantity)
     if set_id in by_quantity:
         return set_id
@@ -239,6 +285,12 @@ def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
 def list_dataset_ids(
     quantity: QuantityId, *, usage_role: str | None = None
 ) -> tuple[str, ...]:
+    """List packaged dataset identifiers for a quantity.
+
+    When ``usage_role`` is provided, only datasets with a matching normalized
+    role such as ``"target"`` or ``"support"`` are returned.
+    """
+
     dataset_ids = tuple(_datasets_for_quantity(quantity).keys())
     if usage_role is None:
         return dataset_ids
@@ -256,6 +308,8 @@ def list_dataset_ids(
 def list_dataset_infos(
     quantity: QuantityId, *, usage_role: str | None = None
 ) -> tuple[DatasetInfo, ...]:
+    """Return packaged dataset metadata objects for a quantity."""
+
     return tuple(
         get_dataset_info(DatasetRef(quantity, set_id))
         for set_id in list_dataset_ids(quantity, usage_role=usage_role)
@@ -263,6 +317,8 @@ def list_dataset_infos(
 
 
 def _coerce_reference(obj: object) -> Reference:
+    """Coerce a raw registry reference entry into :class:`Reference`."""
+
     if not isinstance(obj, dict):
         raise DatasetError("invalid reference entry in registry.json")
     return Reference(
@@ -280,6 +336,8 @@ def _coerce_reference(obj: object) -> Reference:
 
 
 def _coerce_coverage(obj: object) -> CoverageInfo | None:
+    """Coerce raw coverage metadata into :class:`CoverageInfo`."""
+
     if not isinstance(obj, dict):
         return None
     covered = obj.get("covered_z")
@@ -297,6 +355,8 @@ def _coerce_coverage(obj: object) -> CoverageInfo | None:
 
 
 def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
+    """Return curated metadata for a packaged dataset reference."""
+
     actual_set_id = _resolve_set_id(ref.quantity, ref.set_id)
     actual_ref = DatasetRef(quantity=ref.quantity, set_id=actual_set_id)
 
@@ -403,6 +463,8 @@ def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
 
 @lru_cache(maxsize=None)
 def _load_csv_columns(filename: str) -> dict[str, tuple[float | None, ...]]:
+    """Load all value columns from one packaged dense-by-Z CSV table."""
+
     path = resources.files("atomref.data").joinpath(filename)
     with path.open("r", encoding="utf-8", newline="") as handle:
         reader = csv.DictReader(handle)
@@ -427,6 +489,8 @@ def _load_csv_columns(filename: str) -> dict[str, tuple[float | None, ...]]:
 
 @lru_cache(maxsize=None)
 def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
+    """Load a packaged dataset as an :class:`ElementScalarSet`."""
+
     info = get_dataset_info(ref)
     if info.domain != "element":
         raise DatasetError(
@@ -448,12 +512,16 @@ def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
 
 
 def resolve_dataset_like(dataset: DatasetLike) -> ElementScalarSet:
+    """Resolve either a packaged reference or a custom set to a loaded set."""
+
     if isinstance(dataset, ElementScalarSet):
         return dataset
     return get_builtin_set(dataset)
 
 
 def _is_placeholder_value(info: DatasetInfo, value: float) -> bool:
+    """Return ``True`` when ``value`` equals the dataset's placeholder value."""
+
     if info.placeholder_value is None:
         return False
     return abs(value - info.placeholder_value) < 1e-12
diff --git a/src/atomref/transfer.py b/src/atomref/transfer.py
index d7f5d5e..14362db 100644
--- a/src/atomref/transfer.py
+++ b/src/atomref/transfer.py
@@ -1,4 +1,4 @@
-"""Transfer model configuration types."""
+"""Transfer-model configuration types for policy-based lookup."""
 
 from __future__ import annotations
 
@@ -9,6 +9,13 @@
 
 @dataclass(frozen=True, slots=True)
 class LinearFit:
+    """Summary statistics for a fitted linear transfer model.
+
+    Parameters are stored in a compact, serializable form so they can be
+    attached to :class:`atomref.policy.LookupResult` objects and reused in
+    reporting code.
+    """
+
     coefficients: tuple[float, ...]
     intercept: float
     n_points: int
@@ -18,14 +25,24 @@ class LinearFit:
 
 @dataclass(frozen=True, slots=True)
 class SubstitutionTransfer:
+    """Use another dataset directly when the base dataset is missing a value."""
+
     source: DatasetLike
 
 
 @dataclass(frozen=True, slots=True)
 class LinearTransfer:
+    """Infer missing target values from one or more predictor datasets.
+
+    In v0.1 the public API stores predictors as a tuple for forward
+    compatibility, but the runtime implementation intentionally accepts exactly
+    one predictor dataset.
+    """
+
     predictors: tuple[DatasetLike, ...]
     min_points: int = 2
     exclude_placeholders: bool = True
 
 
 TransferModel = SubstitutionTransfer | LinearTransfer
+"""Closed union of transfer models supported by the core resolver."""
diff --git a/tests/meta/test_notebooks.py b/tests/meta/test_notebooks.py
index f49775f..d420476 100644
--- a/tests/meta/test_notebooks.py
+++ b/tests/meta/test_notebooks.py
@@ -6,8 +6,10 @@
 
 
 REPO_ROOT = Path(__file__).resolve().parents[2]
-SCRIPT = REPO_ROOT / "tools" / "check_notebooks.py"
+CHECK_SCRIPT = REPO_ROOT / "tools" / "check_notebooks.py"
+EXPORT_SCRIPT = REPO_ROOT / "tools" / "export_notebooks.py"
 NOTEBOOKS = REPO_ROOT / "notebooks"
+EXPORTED_NOTEBOOKS = REPO_ROOT / "docs" / "notebooks"
 
 
 def test_notebook_files_exist() -> None:
@@ -21,4 +23,19 @@ def test_notebook_files_exist() -> None:
 
 
 def test_notebooks_validate_and_execute() -> None:
-    subprocess.run([sys.executable, str(SCRIPT)], cwd=REPO_ROOT, check=True)
+    subprocess.run([sys.executable, str(CHECK_SCRIPT)], cwd=REPO_ROOT, check=True)
+
+
+def test_exported_notebook_pages_are_in_sync() -> None:
+    expected = {
+        "01-quickstart.md",
+        "02-policies-and-assessment.md",
+        "03-custom-sets-and-discovery.md",
+    }
+    actual = {path.name for path in EXPORTED_NOTEBOOKS.glob("*.md")}
+    assert expected.issubset(actual)
+    subprocess.run(
+        [sys.executable, str(EXPORT_SCRIPT), "--check"],
+        cwd=REPO_ROOT,
+        check=True,
+    )
diff --git a/tests/meta/test_text_generation_tools.py b/tests/meta/test_text_generation_tools.py
new file mode 100644
index 0000000..b6203a7
--- /dev/null
+++ b/tests/meta/test_text_generation_tools.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import importlib.util
+from pathlib import Path
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+MODULE_PATH = REPO_ROOT / "tools" / "export_notebooks.py"
+
+spec = importlib.util.spec_from_file_location("export_notebooks_tool", MODULE_PATH)
+assert spec is not None and spec.loader is not None
+export_notebooks = importlib.util.module_from_spec(spec)
+sys.modules[spec.name] = export_notebooks
+spec.loader.exec_module(export_notebooks)
+
+
+def test_export_notebooks_check_ignores_crlf(tmp_path: Path) -> None:
+    """Notebook export checks should ignore Windows vs Unix newline differences."""
+
+    output_dir = tmp_path / "docs"
+    output_dir.mkdir()
+
+    for notebook_name, output_name in export_notebooks.NOTEBOOK_OUTPUTS.items():
+        rendered = export_notebooks._export_markdown(
+            export_notebooks.NOTEBOOKS / notebook_name
+        )
+        (output_dir / output_name).write_text(
+            rendered.replace("\n", "\r\n"),
+            encoding="utf-8",
+            newline="",
+        )
+
+    assert export_notebooks.export_notebooks(output_dir, check=True) == 0
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..8074430
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,27 @@
+# tools
+
+This directory contains small maintenance scripts used during development and
+release preparation.
+
+## Scripts
+
+- `check_dist.py` — verify that wheel and source-distribution artifacts contain
+  the key files expected by the project.
+- `check_notebooks.py` — validate notebook JSON and execute notebook code cells.
+- `check_registry.py` — validate curated registry metadata against packaged CSV
+  tables.
+- `export_notebooks.py` — render the bundled notebooks into Markdown pages under
+  `docs/notebooks/`.
+- `gen_readme.py` — regenerate `README.md` from `docs/index.md`.
+
+## Typical commands
+
+```bash
+python tools/check_registry.py
+python tools/check_notebooks.py
+python tools/export_notebooks.py
+python tools/gen_readme.py
+```
+
+The main project README is generated from the documentation home page. To change
+`README.md`, edit `docs/index.md` and then run `python tools/gen_readme.py`.
diff --git a/tools/check_dist.py b/tools/check_dist.py
index b9d80b5..92cef29 100644
--- a/tools/check_dist.py
+++ b/tools/check_dist.py
@@ -1,3 +1,5 @@
+"""Verify that built distributions contain the project's key files."""
+
 from __future__ import annotations
 
 import argparse
@@ -7,26 +9,31 @@
 
 
 REQUIRED_WHEEL_MEMBERS = {
-    'atomref/data/periodic_table.csv',
-    'atomref/data/covalent.csv',
-    'atomref/data/van_der_waals.csv',
-    'atomref/data/registry.json',
-    'atomref/py.typed',
+    "atomref/data/periodic_table.csv",
+    "atomref/data/covalent.csv",
+    "atomref/data/van_der_waals.csv",
+    "atomref/data/registry.json",
+    "atomref/py.typed",
 }
 
 REQUIRED_SDIST_SUFFIXES = {
-    'src/atomref/data/periodic_table.csv',
-    'src/atomref/data/covalent.csv',
-    'src/atomref/data/van_der_waals.csv',
-    'src/atomref/data/registry.json',
-    'src/atomref/py.typed',
-    'README.md',
-    'LICENSE',
-    'pyproject.toml',
-    'notebooks/01-quickstart.ipynb',
-    'notebooks/02-policies-and-assessment.ipynb',
-    'notebooks/03-custom-sets-and-discovery.ipynb',
-    'tools/check_notebooks.py',
+    "src/atomref/data/periodic_table.csv",
+    "src/atomref/data/covalent.csv",
+    "src/atomref/data/van_der_waals.csv",
+    "src/atomref/data/registry.json",
+    "src/atomref/py.typed",
+    "README.md",
+    "LICENSE",
+    "pyproject.toml",
+    "notebooks/01-quickstart.ipynb",
+    "notebooks/02-policies-and-assessment.ipynb",
+    "notebooks/03-custom-sets-and-discovery.ipynb",
+    "docs/notebooks/01-quickstart.md",
+    "docs/notebooks/02-policies-and-assessment.md",
+    "docs/notebooks/03-custom-sets-and-discovery.md",
+    "tools/check_notebooks.py",
+    "tools/export_notebooks.py",
+    "tools/README.md",
 }
 
 
@@ -35,15 +42,22 @@ class DistCheckError(RuntimeError):
 
 
 def _assert_members_present(
-    actual: set[str], required: set[str], *, label: str
+    actual: set[str],
+    required: set[str],
+    *,
+    label: str,
 ) -> None:
+    """Raise when ``required`` contains members not present in ``actual``."""
+
     missing = sorted(required - actual)
     if missing:
-        joined = ', '.join(missing)
-        raise DistCheckError(f'{label} is missing required members: {joined}')
+        joined = ", ".join(missing)
+        raise DistCheckError(f"{label} is missing required members: {joined}")
 
 
 def _members_matching_suffixes(actual: set[str], suffixes: set[str]) -> set[str]:
+    """Return suffixes that match at least one member name from ``actual``."""
+
     matched: set[str] = set()
     for suffix in suffixes:
         if any(name.endswith(suffix) for name in actual):
@@ -52,6 +66,8 @@ def _members_matching_suffixes(actual: set[str], suffixes: set[str]) -> set[str]
 
 
 def check_wheel(path: Path) -> None:
+    """Validate the contents of one built wheel."""
+
     with zipfile.ZipFile(path) as zf:
         names = set(zf.namelist())
     matched = {
@@ -63,24 +79,28 @@ def check_wheel(path: Path) -> None:
 
 
 def check_sdist(path: Path) -> None:
-    with tarfile.open(path, 'r:gz') as tf:
+    """Validate the contents of one built source distribution."""
+
+    with tarfile.open(path, "r:gz") as tf:
         names = {member.name for member in tf.getmembers()}
     matched = _members_matching_suffixes(names, REQUIRED_SDIST_SUFFIXES)
     _assert_members_present(matched, REQUIRED_SDIST_SUFFIXES, label=path.name)
 
 
 def main() -> None:
+    """Validate wheel and sdist artifacts found in a distribution directory."""
+
     parser = argparse.ArgumentParser()
-    parser.add_argument('dist_dir', type=Path, nargs='?', default=Path('dist'))
+    parser.add_argument("dist_dir", type=Path, nargs="?", default=Path("dist"))
     args = parser.parse_args()
 
     dist_dir = args.dist_dir
-    wheels = sorted(dist_dir.glob('*.whl'))
-    sdists = sorted(dist_dir.glob('*.tar.gz'))
+    wheels = sorted(dist_dir.glob("*.whl"))
+    sdists = sorted(dist_dir.glob("*.tar.gz"))
     if not wheels:
-        raise DistCheckError(f'no wheel files found in {dist_dir}')
+        raise DistCheckError(f"no wheel files found in {dist_dir}")
     if not sdists:
-        raise DistCheckError(f'no source distributions found in {dist_dir}')
+        raise DistCheckError(f"no source distributions found in {dist_dir}")
 
     for wheel in wheels:
         check_wheel(wheel)
@@ -88,5 +108,5 @@ def main() -> None:
         check_sdist(sdist)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/tools/check_notebooks.py b/tools/check_notebooks.py
index 830d742..51d9dfa 100644
--- a/tools/check_notebooks.py
+++ b/tools/check_notebooks.py
@@ -28,10 +28,14 @@ class NotebookCheckError(RuntimeError):
 
 
 def iter_notebooks() -> tuple[Path, ...]:
+    """Return the notebooks that are expected to ship with the project."""
+
     return tuple(NOTEBOOKS / name for name in REQUIRED_NOTEBOOKS)
 
 
 def load_notebook(path: Path) -> dict[str, object]:
+    """Load one notebook JSON document."""
+
     data = json.loads(path.read_text(encoding="utf-8"))
     if not isinstance(data, dict):
         raise NotebookCheckError(f"{path.name}: expected top-level JSON object")
@@ -39,6 +43,8 @@ def load_notebook(path: Path) -> dict[str, object]:
 
 
 def iter_code_cells(data: dict[str, object], *, path: Path) -> tuple[str, ...]:
+    """Return notebook code-cell sources in order."""
+
     cells = data.get("cells")
     if not isinstance(cells, list):
         raise NotebookCheckError(f"{path.name}: missing notebook cell list")
@@ -66,6 +72,8 @@ def iter_code_cells(data: dict[str, object], *, path: Path) -> tuple[str, ...]:
 
 
 def execute_notebook(path: Path) -> None:
+    """Execute all code cells from one notebook in a shared namespace."""
+
     if not path.exists():
         raise NotebookCheckError(f"missing notebook: {path}")
     data = load_notebook(path)
@@ -84,6 +92,8 @@ def execute_notebook(path: Path) -> None:
 
 
 def main() -> int:
+    """Validate and execute every required notebook."""
+
     notebooks = iter_notebooks()
     for notebook in notebooks:
         execute_notebook(notebook)
diff --git a/tools/export_notebooks.py b/tools/export_notebooks.py
new file mode 100644
index 0000000..aa6761d
--- /dev/null
+++ b/tools/export_notebooks.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""Export bundled notebooks to Markdown pages for the docs."""
+
+from __future__ import annotations
+
+from contextlib import redirect_stdout
+import argparse
+import io
+import json
+from pathlib import Path
+import sys
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SRC = REPO_ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+NOTEBOOKS = REPO_ROOT / "notebooks"
+DEFAULT_OUTPUT_DIR = REPO_ROOT / "docs" / "notebooks"
+NOTEBOOK_OUTPUTS = {
+    "01-quickstart.ipynb": "01-quickstart.md",
+    "02-policies-and-assessment.ipynb": "02-policies-and-assessment.md",
+    "03-custom-sets-and-discovery.ipynb": "03-custom-sets-and-discovery.md",
+}
+HEADER = (
+    "<!-- This file is generated from the matching notebook. -->\n"
+    "<!-- Regenerate with: python tools/export_notebooks.py -->\n\n"
+)
+
+
+class NotebookExportError(RuntimeError):
+    """Raised when notebook export fails."""
+
+
+def _load_notebook(path: Path) -> dict[str, object]:
+    """Load one notebook JSON document."""
+
+    data = json.loads(path.read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise NotebookExportError(f"{path.name}: expected top-level JSON object")
+    return data
+
+
+def _cell_source(cell: dict[str, object], *, path: Path, index: int) -> str:
+    """Return normalized source text for one notebook cell."""
+
+    source = cell.get("source", [])
+    if isinstance(source, str):
+        return source
+    if isinstance(source, list) and all(isinstance(line, str) for line in source):
+        return "".join(source)
+    raise NotebookExportError(f"{path.name}: invalid source in cell {index}")
+
+
+def _export_markdown(path: Path) -> str:
+    """Render one notebook as Markdown, executing code cells for output."""
+
+    data = _load_notebook(path)
+    cells = data.get("cells")
+    if not isinstance(cells, list):
+        raise NotebookExportError(f"{path.name}: missing notebook cell list")
+
+    namespace = {"__name__": "__main__"}
+    parts: list[str] = [HEADER]
+    parts.append(
+        f"[Open the original notebook on GitHub]"
+        f"(https://github.com/DeloneCommons/atomref/blob/main/notebooks/{path.name})\n"
+    )
+
+    for index, cell in enumerate(cells, start=1):
+        if not isinstance(cell, dict):
+            raise NotebookExportError(f"{path.name}: cell {index} is not an object")
+        source = _cell_source(cell, path=path, index=index)
+        cell_type = cell.get("cell_type")
+        if cell_type == "markdown":
+            text = source.strip()
+            if text:
+                parts.append(f"{text}\n")
+            continue
+        if cell_type != "code":
+            continue
+        code_text = source.rstrip()
+        parts.append("```python\n")
+        parts.append(f"{code_text}\n")
+        parts.append("```\n")
+        if not code_text.strip():
+            continue
+
+        stdout = io.StringIO()
+        try:
+            code = compile(code_text, f"{path.name}::cell{index}", "exec")
+            with redirect_stdout(stdout):
+                exec(code, namespace, namespace)
+        except Exception as exc:  # noqa: BLE001
+            raise NotebookExportError(
+                f"{path.name}: execution failed in code cell {index}: {exc}"
+            ) from exc
+
+        output = stdout.getvalue().rstrip()
+        if output:
+            parts.append("**Output**\n\n")
+            parts.append("```text\n")
+            parts.append(f"{output}\n")
+            parts.append("```\n")
+
+    return "\n".join(part.rstrip() for part in parts if part).rstrip() + "\n"
+
+
+def export_notebooks(output_dir: Path, *, check: bool = False) -> int:
+    """Export bundled notebooks or verify that exported pages are in sync."""
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for notebook_name, output_name in NOTEBOOK_OUTPUTS.items():
+        notebook_path = NOTEBOOKS / notebook_name
+        rendered = _export_markdown(notebook_path)
+        output_path = output_dir / output_name
+        if check:
+            current = output_path.read_text(encoding="utf-8").replace("\r\n", "\n")
+            if current != rendered:
+                print(
+                    f"{output_path} is out of sync with {notebook_path.name}",
+                    file=sys.stderr,
+                )
+                return 1
+        else:
+            output_path.write_text(rendered, encoding="utf-8", newline="\n")
+    return 0
+
+
+def main() -> int:
+    """Export notebook Markdown pages or check that they are current."""
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR)
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="exit with status 1 when exported pages are out of sync",
+    )
+    args = parser.parse_args()
+    return export_notebooks(args.output_dir, check=args.check)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tools/gen_readme.py b/tools/gen_readme.py
index cad0335..71b954d 100644
--- a/tools/gen_readme.py
+++ b/tools/gen_readme.py
@@ -1,20 +1,61 @@
+"""Generate ``README.md`` from the documentation home page."""
+
 from __future__ import annotations
 
 import argparse
 from pathlib import Path
+import sys
 
 
 REPO_ROOT = Path(__file__).resolve().parents[1]
-SOURCE = REPO_ROOT / 'docs' / 'index.md'
-README = REPO_ROOT / 'README.md'
+SOURCE = REPO_ROOT / "docs" / "index.md"
+README = REPO_ROOT / "README.md"
+FOOTER = """
+
+---
+
+This README is generated from `docs/index.md`.
+
+To regenerate it:
+
+```bash
+python tools/gen_readme.py
+```
+
+Edit the documentation sources instead of editing `README.md` directly.
+"""
 
 
-def main() -> None:
+def render_readme() -> str:
+    """Return the generated README text."""
+
+    body = SOURCE.read_text(encoding="utf-8").rstrip()
+    return f"{body}{FOOTER}"
+
+
+def main() -> int:
+    """Generate or verify the repository README file."""
+
     parser = argparse.ArgumentParser()
-    parser.add_argument('--output', type=Path, default=README)
+    parser.add_argument("--output", type=Path, default=README)
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="exit with status 1 when the target file is out of sync",
+    )
     args = parser.parse_args()
-    args.output.write_text(SOURCE.read_text(encoding='utf-8'), encoding='utf-8')
+
+    rendered = render_readme()
+    if args.check:
+        current = args.output.read_text(encoding="utf-8")
+        if current != rendered:
+            print(f"{args.output} is out of sync with docs/index.md", file=sys.stderr)
+            return 1
+        return 0
+
+    args.output.write_text(rendered, encoding="utf-8")
+    return 0
 
 
-if __name__ == '__main__':
-    main()
+if __name__ == "__main__":
+    raise SystemExit(main())

From 169ae719e5d9b78610f9406fa72066ce37434a61 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sun, 15 Mar 2026 11:31:59 +0300
Subject: [PATCH 11/15] Adds pre-release check

---
 CHANGELOG.md                     |  45 ++++++++-----
 README.md                        |   1 +
 docs/guide/install.md            |   7 ++
 docs/index.md                    |   1 +
 mkdocs.yml                       |   1 +
 pyproject.toml                   |   4 +-
 src/atomref/__about__.py         |   2 +-
 tests/meta/test_release_tools.py |  22 +++++++
 tools/README.md                  |   3 +
 tools/check_dist.py              |   4 ++
 tools/release_check.py           | 106 +++++++++++++++++++++++++++++++
 11 files changed, 179 insertions(+), 17 deletions(-)
 create mode 100644 tests/meta/test_release_tools.py
 create mode 100644 tools/release_check.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dcfa24a..cac0707 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,17 +1,32 @@
 # Changelog
 
-## 0.1.0a0
-
-- Initial scaffold extracted from the `molcryst` chemistry data layer.
-- Added packaged element metadata and radii tables.
-- Added registry design separating operational quantity from scientific
-  classification.
-- Added radii policies with substitution and linear transfer models.
-- Added public packaged-set retrieval helpers: `get_builtin_set()` and
-  `get_radii_set()`.
-- Added runnable notebooks together with generated Markdown notebook pages in
-  the docs.
-- Expanded the docs with dataset guidance, module-level API pages, and a tools
-  overview.
-- Added docstrings across the main importable modules, including important
-  internal helpers used across modules.
+## 0.1.0 - 2026-03-15
+
+First public release.
+
+### Added
+
+- Packaged element metadata and curated radii tables.
+- Quantity-aware registry metadata that separates operational lookup quantity
+  from scientific classification and dataset usage role.
+- Provenance-aware radii policies with deterministic resolution order.
+- Substitution and linear-transfer support for restoring missing values from
+  curated support datasets.
+- Public helpers for inspecting quantities, dataset metadata, and packaged
+  built-in sets.
+- Runnable notebooks together with generated Markdown notebook pages in the
+  documentation.
+- Validation and maintenance tools for registry checks, notebook export, README
+  generation, and distribution-artifact inspection.
+
+### Documentation
+
+- Expanded dataset guides with citations and selection-oriented descriptions.
+- Added module-level API pages and notebook walkthroughs.
+- Added developer-facing curation and tooling notes.
+
+### Packaging
+
+- Built and validated wheel and source-distribution artifacts.
+- Added CI coverage for linting, tests, docs builds, notebook sync, and
+  distribution checks.
diff --git a/README.md b/README.md
index 9d5eb79..d9d90bd 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,7 @@ The repository also ships small maintenance tools. The most important ones are:
 - `python tools/export_notebooks.py` — turn notebooks into Markdown pages for
   the docs,
 - `python tools/gen_readme.py` — regenerate `README.md` from this page.
+- `python tools/release_check.py` — run the full release-preparation checklist, including linting, tests, docs, builds, and artifact validation.
 
 See the [tools README](https://github.com/DeloneCommons/atomref/blob/main/tools/README.md)
 for a short description of each script.
diff --git a/docs/guide/install.md b/docs/guide/install.md
index 00a4f22..e7e0697 100644
--- a/docs/guide/install.md
+++ b/docs/guide/install.md
@@ -21,3 +21,10 @@ Those extras currently cover:
 - `test` — pytest and test-only compatibility helpers,
 - `docs` — MkDocs and API documentation tooling,
 - `dev` — flake8, build, and release metadata checks.
+
+
+For a full local pre-release validation pass after installing those extras, run:
+
+```bash
+python tools/release_check.py
+```
diff --git a/docs/index.md b/docs/index.md
index c59777e..3bc7495 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -116,6 +116,7 @@ The repository also ships small maintenance tools. The most important ones are:
 - `python tools/export_notebooks.py` — turn notebooks into Markdown pages for
   the docs,
 - `python tools/gen_readme.py` — regenerate `README.md` from this page.
+- `python tools/release_check.py` — run the full release-preparation checklist, including linting, tests, docs, builds, and artifact validation.
 
 See the [tools README](https://github.com/DeloneCommons/atomref/blob/main/tools/README.md)
 for a short description of each script.
diff --git a/mkdocs.yml b/mkdocs.yml
index c3e560c..2658174 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -11,6 +11,7 @@ plugins:
   - mkdocstrings:
       handlers:
         python:
+          paths: [src]
           options:
             show_root_heading: true
             show_source: false
diff --git a/pyproject.toml b/pyproject.toml
index 065faab..b712101 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ authors = [
 ]
 keywords = ["chemistry", "materials", "crystallography", "reference data", "atomic radii"]
 classifiers = [
-  "Development Status :: 2 - Pre-Alpha",
+  "Development Status :: 3 - Alpha",
   "Intended Audience :: Science/Research",
   "Topic :: Scientific/Engineering :: Chemistry",
   "Topic :: Software Development :: Libraries",
@@ -26,6 +26,7 @@ classifiers = [
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
   "Operating System :: OS Independent",
+  "Typing :: Typed",
 ]
 dependencies = []
 
@@ -34,6 +35,7 @@ Homepage = "https://delonecommons.github.io/atomref/"
 Documentation = "https://delonecommons.github.io/atomref/"
 Repository = "https://github.com/DeloneCommons/atomref"
 Issues = "https://github.com/DeloneCommons/atomref/issues"
+Changelog = "https://github.com/DeloneCommons/atomref/blob/main/CHANGELOG.md"
 
 [project.optional-dependencies]
 test = [
diff --git a/src/atomref/__about__.py b/src/atomref/__about__.py
index 44cdb9a..3dc1f76 100644
--- a/src/atomref/__about__.py
+++ b/src/atomref/__about__.py
@@ -1 +1 @@
-__version__ = '0.1.0a0'
+__version__ = "0.1.0"
diff --git a/tests/meta/test_release_tools.py b/tests/meta/test_release_tools.py
new file mode 100644
index 0000000..7cbff90
--- /dev/null
+++ b/tests/meta/test_release_tools.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+import subprocess
+import sys
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+
+
+# Keeping this as a subprocess test ensures the helper stays importable and
+# exposes a stable CLI entry point without running the expensive full release
+# workflow inside the unit test suite.
+def test_release_check_help() -> None:
+    result = subprocess.run(
+        [sys.executable, "tools/release_check.py", "--help"],
+        cwd=REPO_ROOT,
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+    assert "release-preparation checks" in result.stdout
diff --git a/tools/README.md b/tools/README.md
index 8074430..943900d 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -13,6 +13,8 @@ release preparation.
 - `export_notebooks.py` — render the bundled notebooks into Markdown pages under
   `docs/notebooks/`.
 - `gen_readme.py` — regenerate `README.md` from `docs/index.md`.
+- `release_check.py` — run the full release-preparation checklist,
+  including linting, tests, docs, builds, and artifact validation.
 
 ## Typical commands
 
@@ -21,6 +23,7 @@ python tools/check_registry.py
 python tools/check_notebooks.py
 python tools/export_notebooks.py
 python tools/gen_readme.py
+python tools/release_check.py
 ```
 
 The main project README is generated from the documentation home page. To change
diff --git a/tools/check_dist.py b/tools/check_dist.py
index 92cef29..df70910 100644
--- a/tools/check_dist.py
+++ b/tools/check_dist.py
@@ -23,6 +23,8 @@
     "src/atomref/data/registry.json",
     "src/atomref/py.typed",
     "README.md",
+    "CHANGELOG.md",
+    "DEV_PLAN.md",
     "LICENSE",
     "pyproject.toml",
     "notebooks/01-quickstart.ipynb",
@@ -33,6 +35,8 @@
     "docs/notebooks/03-custom-sets-and-discovery.md",
     "tools/check_notebooks.py",
     "tools/export_notebooks.py",
+    "tools/gen_readme.py",
+    "tools/release_check.py",
     "tools/README.md",
 }
 
diff --git a/tools/release_check.py b/tools/release_check.py
new file mode 100644
index 0000000..a357a18
--- /dev/null
+++ b/tools/release_check.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""Run the full release-preparation checks for the repository.
+
+This helper is intended for local release preparation. It runs the same checks
+that are exercised separately in CI, then builds source and wheel artifacts,
+validates them, and smoke-tests the built wheel in an isolated virtual
+environment.
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+import shutil
+import subprocess
+import sys
+import tempfile
+import venv
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+DIST_DIR = REPO_ROOT / "dist"
+BUILD_DIR = REPO_ROOT / "build"
+
+
+def _run(*args: str, env: dict[str, str] | None = None) -> None:
+    """Run one subprocess command in the repository root."""
+
+    print("+", " ".join(args))
+    subprocess.run(args, cwd=REPO_ROOT, check=True, env=env)
+
+
+def _fresh_build_dirs() -> None:
+    """Remove build artifacts from previous runs."""
+
+    shutil.rmtree(DIST_DIR, ignore_errors=True)
+    shutil.rmtree(BUILD_DIR, ignore_errors=True)
+
+
+def _smoke_test_wheel() -> None:
+    """Install the built wheel into a temporary virtualenv and import it."""
+
+    wheels = sorted(DIST_DIR.glob("*.whl"))
+    if not wheels:
+        raise RuntimeError("no wheel found in dist/")
+    wheel = wheels[-1]
+
+    with tempfile.TemporaryDirectory(prefix="atomref-release-check-") as tmp:
+        env_dir = Path(tmp) / "venv"
+        builder = venv.EnvBuilder(with_pip=True)
+        builder.create(env_dir)
+        bindir = "Scripts" if sys.platform.startswith("win") else "bin"
+        python = env_dir / bindir / "python"
+        _run(str(python), "-m", "pip", "install", "--no-deps", str(wheel))
+        _run(
+            str(python),
+            "-c",
+            (
+                "import atomref as ar; "
+                "assert ar.get_covalent_radius('C') == 0.76; "
+                "assert ar.get_vdw_radius('C') == 1.77; "
+                "assert 'atomic_radius' in ar.list_quantities(); "
+                "assert 'rahm2016' in ar.list_dataset_ids("
+                "'atomic_radius', usage_role='support')"
+            ),
+        )
+
+
+def main() -> int:
+    """Run lint, tests, docs, build, metadata, and wheel smoke checks."""
+
+    parser = argparse.ArgumentParser(
+        description="Run the full release-preparation checks for the repository.",
+    )
+    parser.add_argument(
+        "--skip-docs",
+        action="store_true",
+        help="skip the strict MkDocs build step",
+    )
+    parser.add_argument(
+        "--skip-smoke-test",
+        action="store_true",
+        help="skip the temporary-virtualenv wheel import smoke test",
+    )
+    args = parser.parse_args()
+
+    _run("flake8", "src", "tests", "tools")
+    _run(sys.executable, "tools/check_registry.py")
+    _run(sys.executable, "tools/check_notebooks.py")
+    _run(sys.executable, "tools/export_notebooks.py", "--check")
+    _run(sys.executable, "tools/gen_readme.py", "--check")
+    _run(sys.executable, "-m", "pytest", "-q")
+    if not args.skip_docs:
+        _run("mkdocs", "build", "--strict")
+
+    _fresh_build_dirs()
+    _run(sys.executable, "-m", "build")
+    _run(sys.executable, "-m", "twine", "check", "dist/*")
+    _run(sys.executable, "tools/check_dist.py", "dist")
+    if not args.skip_smoke_test:
+        _smoke_test_wheel()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())

From a9900821f4d696f014787bf943546ed3f9aea851 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sun, 15 Mar 2026 19:01:49 +0300
Subject: [PATCH 12/15] Adds functionality for X-H bonds

---
 CHANGELOG.md                          |  59 ++++
 README.md                             |  57 +++-
 docs/api/index.md                     |  23 +-
 docs/api/policy.md                    |  21 +-
 docs/api/registry.md                  |  10 +
 docs/api/transfer.md                  |  18 +-
 docs/api/xh.md                        |  22 ++
 docs/datasets/index.md                |  12 +-
 docs/datasets/xh_bond_length.md       |  39 +++
 docs/dev/architecture.md              |  84 ++++-
 docs/guide/policies.md                | 113 +++++--
 docs/guide/quickstart.md              |  11 +-
 docs/index.md                         |  57 +++-
 docs/notebooks/01-quickstart.md       |   2 +-
 mkdocs.yml                            |   2 +
 src/atomref/__about__.py              |   2 +-
 src/atomref/__init__.py               |  22 +-
 src/atomref/data/registry.json        |  64 ++++
 src/atomref/data/xh_bond_length.csv   | 119 +++++++
 src/atomref/policy.py                 | 463 +++++++++++++++++++++-----
 src/atomref/radii.py                  |  81 ++++-
 src/atomref/registry.py               | 113 ++++++-
 src/atomref/transfer.py               |  31 +-
 src/atomref/xh.py                     | 168 ++++++++++
 tests/meta/test_imports.py            |   1 +
 tests/meta/test_package_data.py       |   2 +
 tests/meta/test_public_api.py         |   7 +
 tests/meta/test_registry_integrity.py |   8 +-
 tests/policy/test_policy.py           |  69 ++++
 tests/radii/test_selection.py         |  50 +++
 tests/registry/test_registry.py       |  50 ++-
 tests/xh/test_xh.py                   |  67 ++++
 tools/check_registry.py               |   3 +-
 33 files changed, 1645 insertions(+), 205 deletions(-)
 create mode 100644 docs/api/xh.md
 create mode 100644 docs/datasets/xh_bond_length.md
 create mode 100644 src/atomref/data/xh_bond_length.csv
 create mode 100644 src/atomref/xh.py
 create mode 100644 tests/policy/test_policy.py
 create mode 100644 tests/xh/test_xh.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cac0707..fbb2887 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,64 @@
 # Changelog
 
+## 0.1.3 - 2026-03-15
+
+### Added
+
+- Support for using generic policies and wrapper policies as transfer sources in
+  `SubstitutionTransfer` and `LinearTransfer`.
+- Public `atomref.xh` module docs and examples for policy-backed predictor
+  workflows.
+
+### Changed
+
+- `LinearTransfer` now treats predictors as **sources** rather than only raw
+  datasets, while still keeping the current runtime to one predictor at a time.
+- Generic policy resolution now supports blocked element keys, which is used by
+  the X–H helper to prevent invalid `H` parent-element lookups.
+- Transfer results now preserve nested-policy provenance through
+  `resolved_from` and explanatory notes when a policy source is involved.
+
+## 0.1.2 - 2026-03-15
+
+### Added
+
+- New `xh_bond_length` quantity family.
+- Packaged provisional X–H dataset `csd_legacy_xh_cno` with ConQuest/CSD
+  hydrogen-normalisation targets for `C`, `N`, and `O`.
+- New `atomref.xh` convenience layer with `XHPolicy`, `DEFAULT_XH_POLICY`, set
+  listing helpers, and X–H lookup helpers.
+
+### Documentation
+
+- Added X–H dataset and API pages.
+- Documented the provisional scope of X–H support in `0.1.x` and the planned
+  broader follow-up in `0.2.x`.
+
+## 0.1.1 - 2026-03-15
+
+### Added
+
+- Public generic lookup helpers `lookup_value(...)` and `get_value(...)`.
+- Tests for alias normalization, immutable metadata, non-finite-value rejection,
+  collision detection, and explicit placeholder notes.
+
+### Changed
+
+- Registry metadata returned by `get_dataset_info(...)` is now frozen so callers
+  cannot mutate the cached registry state.
+- Dataset-alias resolution now normalizes Unicode and dash variants more
+  robustly.
+- Custom-set construction and policy configuration now reject normalized-key
+  collisions and non-finite numeric values.
+- Radii-specific wrappers now reject negative override and fallback values.
+- Base and substitution lookups now emit explicit placeholder notes when the
+  returned numeric value is a dataset placeholder.
+- `LinearTransfer` now validates empty-predictor and invalid-`min_points`
+  configurations eagerly.
+- The docs now explain the distinction between quantity, domain, dataset, and
+  policy, and clarify that the current runtime supports only the `element`
+  domain.
+
 ## 0.1.0 - 2026-03-15
 
 First public release.
diff --git a/README.md b/README.md
index d9d90bd..0d784fc 100644
--- a/README.md
+++ b/README.md
@@ -14,15 +14,36 @@ It is not meant to be yet another periodic-table encyclopedia. The package is
 for code that needs stable atomic reference values with explicit provenance,
 clear fallback behavior, and honest handling of incomplete preferred datasets.
 
-What you get in v0.1:
+What you get in the current `0.1.x` line:
 
 - stable element metadata,
 - curated named radii sets,
+- provisional X–H bond-length support for hydrogen-normalisation workflows,
 - dataset provenance and coverage metadata,
 - deterministic lookup policies,
-- substitution and linear transfer from support datasets into target datasets,
+- substitution and linear transfer from support datasets or policies into target datasets,
 - user-defined custom element-indexed scalar sets.
 
+## Core terms
+
+`atomref` uses a small vocabulary on purpose.
+
+- **quantity** — the operational property family being requested, such as
+  `covalent_radius`, `van_der_waals_radius`, `atomic_radius`, or
+  `xh_bond_length`.
+- **domain** — the key space used to index that quantity. In the current
+  runtime, the supported domain is `element`, meaning lookups are keyed by an
+  element symbol.
+- **dataset** — one curated named table inside a quantity, such as
+  `cordero2008`, `alvarez2013`, or `csd_legacy_xh_cno`.
+- **policy** — the ordered rule set that decides what value to return when the
+  preferred dataset is incomplete.
+
+The metadata layer already records `domain` explicitly because the package is
+built for later extension, but the current runtime intentionally keeps the
+implementation narrow and stable: **v0.1 resolves only element-domain scalar
+values**.
+
 ## Why this exists
 
 Scientific software often wants a complete lookup table, but the best dataset
@@ -31,7 +52,7 @@ Instead of hiding ad hoc defaults inside algorithm code, you choose a target
 set, describe how missing values may be restored, and keep provenance on what
 was actually returned.
 
-The default v0.1 behavior is intentionally simple and practical:
+The default `0.1.x` behavior is intentionally simple and practical:
 
 - **Cordero covalent radii** (`cordero2008`) are the preferred covalent target
   set, with missing values substituted from the **legacy CSD covalent radii**
@@ -39,6 +60,10 @@ The default v0.1 behavior is intentionally simple and practical:
 - **Alvarez van der Waals radii** (`alvarez2013`) are the preferred vdW target
   set, with missing values restored from the **Rahm isodensity atomic radii**
   (`rahm2016`) through a fitted linear transfer.
+- **CSD/ConQuest hydrogen-normalisation defaults** (`csd_legacy_xh_cno`) are a
+  provisional sparse X–H target set for `C`, `N`, and `O`, with other parent
+  elements inferred from **Cordero covalent radii** through a fitted linear
+  policy.
 
 ## Quick example
 
@@ -48,6 +73,8 @@ The default v0.1 behavior is intentionally simple and practical:
 0.76
 >>> ar.get_vdw_radius("O")
 1.5
+>>> ar.get_xh_bond_length("N")
+1.015
 >>> lookup = ar.lookup_vdw_radius("Pm")
 >>> lookup.value
 2.8972265395148358
@@ -58,16 +85,17 @@ The default v0.1 behavior is intentionally simple and practical:
 ```
 
 `get_*` returns only the number. `lookup_*` returns a `LookupResult` that also
-records where the value came from and whether a transfer model was involved.
+records where the value came from and whether a transfer model or policy source
+was involved.
 
 You can inspect the packaged quantity and dataset catalog directly:
 
 ```pycon
 >>> import atomref as ar
 >>> ar.list_quantities()
-('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
->>> ar.get_quantity_info("atomic_radius")
-QuantityInfo(quantity='atomic_radius', domain='element', units='angstrom', description='Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data.')
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius', 'xh_bond_length')
+>>> ar.get_quantity_info("xh_bond_length")
+QuantityInfo(quantity='xh_bond_length', domain='element', units='angstrom', description='Element-indexed reference X-H bond lengths keyed by parent element X and intended for hydrogen-position normalisation or related geometry workflows.')
 >>> [info.ref.set_id for info in ar.list_dataset_infos("van_der_waals_radius", usage_role="target")]
 ['bondi1964', 'rowland_taylor1996', 'alvarez2013', 'chernyshov2020']
 ```
@@ -79,14 +107,14 @@ You can also load a packaged set directly:
 >>> vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
 >>> vdw.get("O")
 1.5
->>> raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
->>> raw.get("Pm")
-2.83
+>>> xh = ar.get_xh_set("csd_legacy_xh_cno")
+>>> xh.get("C")
+1.089
 ```
 
 ## Notebook walkthroughs
 
-The repository ships example notebooks for the main v0.1 workflows. In the
+The repository ships example notebooks for the main `0.1.x` workflows. In the
 documentation they are also available as rendered Markdown pages, so users can
 read them without opening Jupyter first.
 
@@ -100,7 +128,7 @@ read them without opening Jupyter first.
 `atomref` is designed as a standalone package, but within Delone Commons it is
 primarily intended to support chemistry-aware packages such as:
 
-- `molcryst`, for covalent-bond detection and contact analysis,
+- `molcryst`, for covalent-bond detection, contact analysis, and hydrogen workflows,
 - future `chemvoro`, for chemistry-aware contact and hydrogen workflows.
 
 By contrast, `pyvoro2` and `pbcgraph` are intentionally general mathematical
@@ -115,8 +143,9 @@ The repository also ships small maintenance tools. The most important ones are:
 - `python tools/check_notebooks.py` — execute notebook code cells,
 - `python tools/export_notebooks.py` — turn notebooks into Markdown pages for
   the docs,
-- `python tools/gen_readme.py` — regenerate `README.md` from this page.
-- `python tools/release_check.py` — run the full release-preparation checklist, including linting, tests, docs, builds, and artifact validation.
+- `python tools/gen_readme.py` — regenerate `README.md` from this page,
+- `python tools/release_check.py` — run the full release-preparation checklist,
+  including linting, tests, docs, builds, and artifact validation.
 
 See the [tools README](https://github.com/DeloneCommons/atomref/blob/main/tools/README.md)
 for a short description of each script.
diff --git a/docs/api/index.md b/docs/api/index.md
index e69e719..f56eb7c 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -3,20 +3,24 @@
 The public API is small on purpose.
 
 Most users will spend most of their time in the top-level package namespace and
-in the radii helpers. The lower-level modules are still documented because they
-expose the actual data model behind the package.
+in the quantity-specific convenience helpers. The lower-level modules are still
+documented because they expose the actual data model behind the package.
 
 ## Common tasks
 
-- get a single value: use `get_covalent_radius(...)` or `get_vdw_radius(...)`
-- inspect provenance: use `lookup_covalent_radius(...)` or
-  `lookup_vdw_radius(...)`
+- get a single value: use `get_covalent_radius(...)`, `get_vdw_radius(...)`, or
+  `get_xh_bond_length(...)`
+- inspect provenance: use `lookup_covalent_radius(...)`,
+  `lookup_vdw_radius(...)`, `lookup_xh_bond_length(...)`, or the generic
+  `lookup_value(...)`
 - browse packaged datasets: use `list_quantities()`, `get_quantity_info(...)`,
-  `list_dataset_infos(...)`, or `list_radii_set_infos(...)`
-- load a packaged set directly: use `get_builtin_set(...)` or `get_radii_set(...)`
+  `list_dataset_infos(...)`, `list_radii_set_infos(...)`, or
+  `list_xh_set_infos(...)`
+- load a packaged set directly: use `get_builtin_set(...)`, `get_radii_set(...)`,
+  or `get_xh_set(...)`
 - define a custom set: use `ElementScalarSet.from_mapping(...)`
-- define transfer-backed lookup behavior: use `RadiiPolicy`,
-  `SubstitutionTransfer`, and `LinearTransfer`
+- define transfer-backed lookup behavior: use `ValuePolicy`, `RadiiPolicy`,
+  `XHPolicy`, `SubstitutionTransfer`, and `LinearTransfer`
 
 ## Module reference
 
@@ -26,3 +30,4 @@ expose the actual data model behind the package.
 - [Transfer models](transfer.md)
 - [Generic policy core](policy.md)
 - [Radii API](radii.md)
+- [X–H API](xh.md)
diff --git a/docs/api/policy.md b/docs/api/policy.md
index 99d51d9..5b68440 100644
--- a/docs/api/policy.md
+++ b/docs/api/policy.md
@@ -1,9 +1,22 @@
 # atomref.policy
 
-This module contains the generic resolver that sits below the radii-specific
-API.
+This module contains the generic resolver that sits below the radii-specific and
+X–H-specific convenience APIs.
 
-It is useful when you want to understand exactly how overrides, base datasets,
-transfers, fallbacks, and missing values are ordered and reported.
+Use it when you want to work directly with the common value-selection engine:
+
+- `ValuePolicy` — generic element-domain policy configuration,
+- `lookup_value(...)` — resolve one value together with provenance,
+- `get_value(...)` — resolve only the numeric value,
+- `LookupResult` — the structured result object returned by the resolver.
+
+A few practical notes:
+
+- The current runtime supports **element-domain** scalar policies.
+- `ValuePolicy` normalizes element-symbol overrides eagerly.
+- Transfer sources may be packaged datasets, custom sets, generic policies, or
+  wrapper policies that expose `as_value_policy()`.
+- `LookupResult.is_placeholder` refers to the returned numeric value itself, not
+  to whether any transfer happened.
 
 ::: atomref.policy
diff --git a/docs/api/registry.md b/docs/api/registry.md
index 4f664e6..9c41653 100644
--- a/docs/api/registry.md
+++ b/docs/api/registry.md
@@ -6,4 +6,14 @@ If you want to understand how `atomref` classifies datasets, how aliases are
 resolved, or how built-in CSV tables are turned into typed in-memory objects,
 this is the key module to read.
 
+The most important registry ideas are:
+
+- **quantity** — the operational property family,
+- **domain** — the key space used to index that quantity,
+- **dataset** — one curated named table inside the quantity.
+
+In the current runtime, the implemented lookup domain is `element`.
+The registry still stores `domain` explicitly because the metadata design is
+meant to stay reusable as the package grows.
+
 ::: atomref.registry
diff --git a/docs/api/transfer.md b/docs/api/transfer.md
index eab5672..797626e 100644
--- a/docs/api/transfer.md
+++ b/docs/api/transfer.md
@@ -1,9 +1,21 @@
 # atomref.transfer
 
 Transfer models describe how missing target values may be restored from other
-datasets.
+sources.
 
-In v0.1 the core built-in models are direct substitution and one-predictor
-linear transfer.
+In the current runtime the built-in models are:
+
+- direct substitution (`SubstitutionTransfer`),
+- one-predictor linear transfer (`LinearTransfer`).
+
+A transfer source may be:
+
+- a packaged dataset reference,
+- a custom `ElementScalarSet`,
+- a generic `ValuePolicy`,
+- a wrapper policy that exposes `as_value_policy()`.
+
+`LinearTransfer` currently accepts exactly one predictor source at runtime, even
+though the public API stores predictors as a tuple for forward compatibility.
 
 ::: atomref.transfer
diff --git a/docs/api/xh.md b/docs/api/xh.md
new file mode 100644
index 0000000..cca073e
--- /dev/null
+++ b/docs/api/xh.md
@@ -0,0 +1,22 @@
+# atomref.xh
+
+This module provides the provisional X–H bond-length helpers introduced in the
+`0.1.x` line.
+
+It is intentionally narrow:
+
+- one packaged sparse target dataset, `csd_legacy_xh_cno`,
+- one wrapper policy, `XHPolicy`,
+- convenience helpers for listing packaged X–H sets and resolving X–H values.
+
+The built-in quantity is keyed by the **parent element `X`** in `X–H` and is
+currently aimed at hydrogen-position normalisation or related geometry
+workflows.
+
+In the default policy:
+
+- `C`, `N`, and `O` use curated ConQuest/CSD defaults,
+- other parent elements may be inferred from `cordero2008`,
+- fuller X–H literature support is planned for `0.2.x`.
+
+::: atomref.xh
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
index 20d4c3e..d699ff0 100644
--- a/docs/datasets/index.md
+++ b/docs/datasets/index.md
@@ -4,6 +4,7 @@
 Instead, the package records several layers of classification:
 
 - **quantity** — the operational property being requested,
+- **domain** — the key space used to index that quantity,
 - **semantic class** — what the dataset scientifically represents,
 - **origin class** — how the values were obtained,
 - **phase context** — what physical context they describe,
@@ -22,13 +23,16 @@ The most useful catalog helpers are:
 - `atomref.get_quantity_info(...)`
 - `atomref.list_dataset_infos(...)`
 - `atomref.list_radii_set_infos(...)`
+- `atomref.list_xh_set_infos(...)`
 
-If you only need dataset ids, use `list_dataset_ids(...)` or `list_radii_sets(...)`.
-If you want the packaged values themselves, use `get_builtin_set(...)` or
-`get_radii_set(...)`.
+If you only need dataset ids, use `list_dataset_ids(...)`, `list_radii_sets(...)`,
+or `list_xh_sets(...)`.
+If you want the packaged values themselves, use `get_builtin_set(...)`,
+`get_radii_set(...)`, or `get_xh_set(...)`.
 
-## Built-in quantity families in v0.1
+## Built-in quantity families in `0.1.x`
 
 - [Covalent radius](covalent_radius.md)
 - [van der Waals radius](van_der_waals_radius.md)
 - [Atomic radius](atomic_radius.md)
+- [X–H bond length](xh_bond_length.md)
diff --git a/docs/datasets/xh_bond_length.md b/docs/datasets/xh_bond_length.md
new file mode 100644
index 0000000..2bef656
--- /dev/null
+++ b/docs/datasets/xh_bond_length.md
@@ -0,0 +1,39 @@
+# X–H bond length
+
+The `xh_bond_length` quantity is a small provisional addition in the `0.1.x`
+line.
+
+Its purpose is not to claim a complete literature survey of X–H bond lengths.
+Instead, it provides a stable, provenance-aware starting point for
+hydrogen-normalisation workflows and related geometry code.
+
+## Packaged target dataset
+
+### CSD legacy X–H neutron-normalisation targets (`csd_legacy_xh_cno`)
+
+- **What it is:** the fixed `C–H`, `N–H`, and `O–H` target lengths used by
+  ConQuest for terminal-hydrogen normalisation.
+- **Coverage:** only parent elements `C`, `N`, and `O`.
+- **Values:** `C–H = 1.089 Å`, `N–H = 1.015 Å`, `O–H = 0.993 Å`.
+- **Primary provenance:** the ConQuest user guide section *Hydrogen Atom
+  Location in Crystal Structure Analyses*.
+- **Secondary provenance:** Allen & Bruno (2010), which the ConQuest guide cites
+  for these defaults.
+
+## How `atomref` uses it
+
+The built-in `DEFAULT_XH_POLICY` treats `csd_legacy_xh_cno` as a sparse target
+set and restores missing parent elements through a fitted linear transfer from
+`cordero2008` covalent radii.
+
+That means the package draws a sharp line between:
+
+- **curated dataset values** — currently only `C`, `N`, and `O`, and
+- **policy-generated values** — inferred for other parent elements when the
+  predictor policy can supply a covalent radius.
+
+## Scope note
+
+This is intentionally a small addendum rather than full X–H support.
+Broader X–H datasets, richer policies, and more complete literature treatment
+are planned for `0.2.x`.
diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md
index 7dd08b4..cbdf743 100644
--- a/docs/dev/architecture.md
+++ b/docs/dev/architecture.md
@@ -1,7 +1,83 @@
 # Architecture
 
-Publicly, v0.1 is radii-first.
+Publicly, `atomref` is still radii-first, with a small provisional X–H helper.
 
-Internally, the package is built around element-indexed scalar datasets plus a
-small transfer layer. That keeps the public API simple while leaving a clean
-path to later quantities such as X-H bond lengths.
+Internally, the package is built around four layers:
+
+1. **elements** — stable element metadata and symbol canonicalization,
+2. **registry** — curated quantity and dataset metadata plus packaged data
+   loading,
+3. **policy core** — generic value selection with overrides, transfers,
+   fallbacks, blocked keys, and provenance,
+4. **quantity wrappers** — convenience APIs such as `atomref.radii` and
+   `atomref.xh`.
+
+## Core terminology
+
+A few terms are deliberately separated in the design:
+
+- **quantity** — the operational property family being requested,
+- **domain** — the key space used to index that quantity,
+- **dataset** — one curated source table inside the quantity,
+- **policy** — the ordered rule set used to select a final value.
+
+This separation is what allows the package to say, for example, that
+`rahm2016` belongs to the `atomic_radius` quantity but can still act as support
+data in a van der Waals policy.
+
+## Domain support in the current runtime
+
+The registry schema is domain-aware, but the current resolver intentionally
+implements only one domain:
+
+- `element`
+
+That means:
+
+- packaged built-in sets are currently element-indexed scalar tables,
+- `ValuePolicy` resolves element symbols,
+- transfer fitting is performed over element-wise overlap.
+
+The metadata keeps `domain` explicit now so later versions can extend the data
+model without having to reinterpret existing registry entries.
+
+## Policy resolution and transfer sources
+
+The generic resolver works in a fixed order:
+
+1. blocked keys,
+2. overrides,
+3. base dataset,
+4. transfer models,
+5. fallback,
+6. missing.
+
+Transfer sources can be:
+
+- packaged datasets,
+- custom `ElementScalarSet` objects,
+- generic `ValuePolicy` objects,
+- wrapper policies exposing `as_value_policy()`.
+
+That last point is important. It means higher-level code can express
+"infer values from my chosen covalent-radii policy" instead of being forced to
+refer to one hard-coded predictor dataset.
+
+## Placeholder handling
+
+Placeholder semantics stay attached to the value that was actually returned.
+This means `LookupResult.is_placeholder` can be true for:
+
+- a base lookup,
+- a substitution transfer,
+- a nested policy used as a transfer source.
+
+A linear transfer normally returns a computed value and therefore does not carry
+placeholder status itself.
+
+## Why the design stays small
+
+The package deliberately avoids a large object graph or a chemistry-specific DSL.
+A quantity wrapper is usually only a thin adapter over the generic policy core.
+That keeps the internals easy to test and lets other scientific packages reuse
+`atomref` without bringing in the rest of the Delone Commons stack.
diff --git a/docs/guide/policies.md b/docs/guide/policies.md
index 62663ae..fd53047 100644
--- a/docs/guide/policies.md
+++ b/docs/guide/policies.md
@@ -7,18 +7,39 @@ That may sound simple, but in practice scientific datasets are often
 incomplete. A policy makes the decision process explicit instead of hiding it in
 algorithm code.
 
+## Terms used in the policy layer
+
+A few terms appear repeatedly in the API and docs:
+
+- **quantity** — the operational property family being requested.
+- **domain** — the lookup key space. In the current runtime that means
+  `element`, so lookups are keyed by element symbol.
+- **dataset** — a curated named table inside one quantity.
+- **policy** — the ordered rule set used to resolve missing values.
+
+The quantity and dataset live in the curated registry. The policy is the
+selection logic that sits on top of them.
+
 ## Resolution order
 
-In v0.1 every lookup follows the same ordered path:
+In `0.1.x` every lookup follows the same ordered path:
 
-1. **Override**
-2. **Base dataset**
-3. **Transfer models**, in the order you listed them
-4. **Fallback**
-5. **Missing**
+1. **Blocked key** (optional)
+2. **Override**
+3. **Base dataset**
+4. **Transfer models**, in the order you listed them
+5. **Fallback**
+6. **Missing**
 
 Each step has a specific meaning.
 
+### Blocked key
+
+Some quantity wrappers need to declare that certain domain keys should never be
+resolved, even if a transfer model could otherwise invent a number. The current
+X–H helper uses this for `H`, because `xh_bond_length` is keyed by the parent
+atom `X` in `X–H`, not by hydrogen itself.
+
 ### Override
 
 An override is a value you provide directly for a specific element. It wins over
@@ -37,16 +58,28 @@ default vdW policy starts from the **Alvarez van der Waals radii**
 A transfer model is used only when the base dataset has no value for the
 requested element.
 
-Built-in transfer models in v0.1 are:
+Built-in transfer models in `0.1.x` are:
+
+- `SubstitutionTransfer` — take a value directly from another dataset or policy,
+- `LinearTransfer` — infer a target-equivalent value from another dataset or
+  policy through a fitted linear model.
 
-- `SubstitutionTransfer` — take a value directly from another dataset,
-- `LinearTransfer` — infer a target-equivalent value from a support dataset
-  through a fitted linear model.
+`LinearTransfer` already accepts a tuple of predictors in the API, but the
+current runtime intentionally supports exactly one predictor source. That keeps
+the implementation simple now while leaving room for later multi-predictor
+linear models.
 
-`LinearTransfer` already accepts a tuple of predictors in the API, but the v0.1
-runtime intentionally supports exactly one predictor dataset. That keeps the
-implementation simple now while leaving room for later multi-predictor linear
-models.
+Transfer sources can now be:
+
+- a packaged dataset reference (`DatasetRef`),
+- a custom `ElementScalarSet`,
+- a generic `ValuePolicy`,
+- a wrapper policy such as `RadiiPolicy` or `XHPolicy`.
+
+When a transfer source is itself a policy, `atomref` uses the values selected by
+that policy. This lets higher-level workflows express things like “infer X–H
+lengths from my chosen covalent-radii policy” instead of hard-coding a specific
+support dataset.
 
 ### Fallback
 
@@ -60,6 +93,25 @@ If nothing above can produce a value and no fallback was configured, the result
 is simply missing. In that case `get_*` returns `None`, while `lookup_*`
 returns a `LookupResult` with `source="missing"` and explanatory notes.
 
+## Placeholder values and `is_placeholder`
+
+Some support datasets use placeholder numbers to stand in for “unknown but keep
+this legacy table dense enough for downstream heuristics”.
+
+`LookupResult.is_placeholder` answers one narrow question:
+
+> Is the **returned numeric value itself** marked as a placeholder by the source
+> that supplied it?
+
+It does **not** mean “a transfer happened”. Examples:
+
+- a base lookup can have `is_placeholder=True` if the base dataset contains a
+  placeholder value,
+- a substitution transfer can also have `is_placeholder=True` if it copied a
+  placeholder from the transfer source,
+- a linear transfer is computed, not copied, so `is_placeholder` is normally
+  `False`.
+
 ## Target datasets and support datasets
 
 `atomref` separates **what a dataset is used for** from **what it scientifically
@@ -68,6 +120,7 @@ represents**.
 That is why the package stores:
 
 - the operational **quantity**,
+- the lookup **domain**,
 - the scientific **semantic class**,
 - the package-level **usage role**.
 
@@ -77,7 +130,9 @@ radii, but they are not the same thing as a condensed-phase structural vdW
 radius set. In `atomref`, that difference is recorded in the metadata instead of
 being hidden.
 
-## Example
+## Examples
+
+A standard dataset-backed transfer:
 
 ```python
 import atomref as ar
@@ -94,9 +149,27 @@ policy = ar.RadiiPolicy(
 )
 ```
 
-With that policy:
+A policy-backed transfer source:
+
+```python
+import atomref as ar
+
+xh_policy = ar.XHPolicy(
+    base_set="csd_legacy_xh_cno",
+    transfers=(
+        ar.LinearTransfer(
+            predictors=(ar.DEFAULT_COVALENT_POLICY,),
+            min_points=3,
+        ),
+    ),
+)
+```
+
+With that X–H policy:
 
-- xenon uses the explicit override,
-- elements present in `alvarez2013` use the base vdW value,
-- missing elements may be restored from `rahm2016`,
-- anything still unresolved remains missing unless you also set a fallback.
+- `C`, `N`, and `O` use the curated ConQuest defaults,
+- missing parent elements may be inferred from the **selected covalent-radii
+  policy**, not just from one hard-coded support dataset,
+- if the predictor policy itself needed a transfer to produce a covalent radius,
+  the resulting `LookupResult` still records that provenance in `resolved_from`
+  and `notes`.
diff --git a/docs/guide/quickstart.md b/docs/guide/quickstart.md
index 3649653..72e6858 100644
--- a/docs/guide/quickstart.md
+++ b/docs/guide/quickstart.md
@@ -11,6 +11,8 @@ The two most important user-facing ideas in `atomref` are:
 0.76
 >>> ar.get_vdw_radius("O")
 1.5
+>>> ar.get_xh_bond_length("N")
+1.015
 >>> lookup = ar.lookup_vdw_radius("Pm")
 >>> lookup.value
 2.8972265395148358
@@ -29,9 +31,9 @@ You can inspect the packaged quantity layer directly:
 ```pycon
 >>> import atomref as ar
 >>> ar.list_quantities()
-('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
->>> ar.get_quantity_info("atomic_radius")
-QuantityInfo(quantity='atomic_radius', domain='element', units='angstrom', description='Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data.')
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius', 'xh_bond_length')
+>>> ar.get_quantity_info("xh_bond_length")
+QuantityInfo(quantity='xh_bond_length', domain='element', units='angstrom', description='Element-indexed reference X-H bond lengths keyed by parent element X and intended for hydrogen-position normalisation or related geometry workflows.')
 >>> [info.ref.set_id for info in ar.list_radii_set_infos("van_der_waals", usage_role="target")]
 ['bondi1964', 'rowland_taylor1996', 'alvarez2013', 'chernyshov2020']
 ```
@@ -46,6 +48,9 @@ And you can load a packaged set object directly:
 >>> raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
 >>> raw.get("Pm")
 2.83
+>>> xh = ar.get_xh_set("csd_legacy_xh_cno")
+>>> xh.get("C")
+1.089
 ```
 
 For longer, runnable examples see:
diff --git a/docs/index.md b/docs/index.md
index 3bc7495..17c5481 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,15 +14,36 @@ It is not meant to be yet another periodic-table encyclopedia. The package is
 for code that needs stable atomic reference values with explicit provenance,
 clear fallback behavior, and honest handling of incomplete preferred datasets.
 
-What you get in v0.1:
+What you get in the current `0.1.x` line:
 
 - stable element metadata,
 - curated named radii sets,
+- provisional X–H bond-length support for hydrogen-normalisation workflows,
 - dataset provenance and coverage metadata,
 - deterministic lookup policies,
-- substitution and linear transfer from support datasets into target datasets,
+- substitution and linear transfer from support datasets or policies into target datasets,
 - user-defined custom element-indexed scalar sets.
 
+## Core terms
+
+`atomref` uses a small vocabulary on purpose.
+
+- **quantity** — the operational property family being requested, such as
+  `covalent_radius`, `van_der_waals_radius`, `atomic_radius`, or
+  `xh_bond_length`.
+- **domain** — the key space used to index that quantity. In the current
+  runtime, the supported domain is `element`, meaning lookups are keyed by an
+  element symbol.
+- **dataset** — one curated named table inside a quantity, such as
+  `cordero2008`, `alvarez2013`, or `csd_legacy_xh_cno`.
+- **policy** — the ordered rule set that decides what value to return when the
+  preferred dataset is incomplete.
+
+The metadata layer already records `domain` explicitly because the package is
+built for later extension, but the current runtime intentionally keeps the
+implementation narrow and stable: **v0.1 resolves only element-domain scalar
+values**.
+
 ## Why this exists
 
 Scientific software often wants a complete lookup table, but the best dataset
@@ -31,7 +52,7 @@ Instead of hiding ad hoc defaults inside algorithm code, you choose a target
 set, describe how missing values may be restored, and keep provenance on what
 was actually returned.
 
-The default v0.1 behavior is intentionally simple and practical:
+The default `0.1.x` behavior is intentionally simple and practical:
 
 - **Cordero covalent radii** (`cordero2008`) are the preferred covalent target
   set, with missing values substituted from the **legacy CSD covalent radii**
@@ -39,6 +60,10 @@ The default v0.1 behavior is intentionally simple and practical:
 - **Alvarez van der Waals radii** (`alvarez2013`) are the preferred vdW target
   set, with missing values restored from the **Rahm isodensity atomic radii**
   (`rahm2016`) through a fitted linear transfer.
+- **CSD/ConQuest hydrogen-normalisation defaults** (`csd_legacy_xh_cno`) are a
+  provisional sparse X–H target set for `C`, `N`, and `O`, with other parent
+  elements inferred from **Cordero covalent radii** through a fitted linear
+  policy.
 
 ## Quick example
 
@@ -48,6 +73,8 @@ The default v0.1 behavior is intentionally simple and practical:
 0.76
 >>> ar.get_vdw_radius("O")
 1.5
+>>> ar.get_xh_bond_length("N")
+1.015
 >>> lookup = ar.lookup_vdw_radius("Pm")
 >>> lookup.value
 2.8972265395148358
@@ -58,16 +85,17 @@ The default v0.1 behavior is intentionally simple and practical:
 ```
 
 `get_*` returns only the number. `lookup_*` returns a `LookupResult` that also
-records where the value came from and whether a transfer model was involved.
+records where the value came from and whether a transfer model or policy source
+was involved.
 
 You can inspect the packaged quantity and dataset catalog directly:
 
 ```pycon
 >>> import atomref as ar
 >>> ar.list_quantities()
-('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
->>> ar.get_quantity_info("atomic_radius")
-QuantityInfo(quantity='atomic_radius', domain='element', units='angstrom', description='Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data.')
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius', 'xh_bond_length')
+>>> ar.get_quantity_info("xh_bond_length")
+QuantityInfo(quantity='xh_bond_length', domain='element', units='angstrom', description='Element-indexed reference X-H bond lengths keyed by parent element X and intended for hydrogen-position normalisation or related geometry workflows.')
 >>> [info.ref.set_id for info in ar.list_dataset_infos("van_der_waals_radius", usage_role="target")]
 ['bondi1964', 'rowland_taylor1996', 'alvarez2013', 'chernyshov2020']
 ```
@@ -79,14 +107,14 @@ You can also load a packaged set directly:
 >>> vdw = ar.get_radii_set("van_der_waals", "alvarez2013")
 >>> vdw.get("O")
 1.5
->>> raw = ar.get_builtin_set(ar.DatasetRef("atomic_radius", "rahm2016"))
->>> raw.get("Pm")
-2.83
+>>> xh = ar.get_xh_set("csd_legacy_xh_cno")
+>>> xh.get("C")
+1.089
 ```
 
 ## Notebook walkthroughs
 
-The repository ships example notebooks for the main v0.1 workflows. In the
+The repository ships example notebooks for the main `0.1.x` workflows. In the
 documentation they are also available as rendered Markdown pages, so users can
 read them without opening Jupyter first.
 
@@ -100,7 +128,7 @@ read them without opening Jupyter first.
 `atomref` is designed as a standalone package, but within Delone Commons it is
 primarily intended to support chemistry-aware packages such as:
 
-- `molcryst`, for covalent-bond detection and contact analysis,
+- `molcryst`, for covalent-bond detection, contact analysis, and hydrogen workflows,
 - future `chemvoro`, for chemistry-aware contact and hydrogen workflows.
 
 By contrast, `pyvoro2` and `pbcgraph` are intentionally general mathematical
@@ -115,8 +143,9 @@ The repository also ships small maintenance tools. The most important ones are:
 - `python tools/check_notebooks.py` — execute notebook code cells,
 - `python tools/export_notebooks.py` — turn notebooks into Markdown pages for
   the docs,
-- `python tools/gen_readme.py` — regenerate `README.md` from this page.
-- `python tools/release_check.py` — run the full release-preparation checklist, including linting, tests, docs, builds, and artifact validation.
+- `python tools/gen_readme.py` — regenerate `README.md` from this page,
+- `python tools/release_check.py` — run the full release-preparation checklist,
+  including linting, tests, docs, builds, and artifact validation.
 
 See the [tools README](https://github.com/DeloneCommons/atomref/blob/main/tools/README.md)
 for a short description of each script.
diff --git a/docs/notebooks/01-quickstart.md b/docs/notebooks/01-quickstart.md
index 3a9f22b..475e218 100644
--- a/docs/notebooks/01-quickstart.md
+++ b/docs/notebooks/01-quickstart.md
@@ -15,7 +15,7 @@ print(ar.list_quantities())
 **Output**
 ```text
 Element(z=17, symbol='Cl', name='Chlorine')
-('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+('covalent_radius', 'van_der_waals_radius', 'atomic_radius', 'xh_bond_length')
 ```
 ```python
 r_c = ar.get_covalent_radius('C')
diff --git a/mkdocs.yml b/mkdocs.yml
index 2658174..e0952f2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -29,6 +29,7 @@ nav:
       - Covalent radius: datasets/covalent_radius.md
       - van der Waals radius: datasets/van_der_waals_radius.md
       - Atomic radius: datasets/atomic_radius.md
+      - X–H bond length: datasets/xh_bond_length.md
   - Notebooks:
       - Overview: guide/notebooks.md
       - Quickstart notebook: notebooks/01-quickstart.md
@@ -46,3 +47,4 @@ nav:
       - atomref.transfer: api/transfer.md
       - atomref.policy: api/policy.md
       - atomref.radii: api/radii.md
+      - atomref.xh: api/xh.md
diff --git a/src/atomref/__about__.py b/src/atomref/__about__.py
index 3dc1f76..ae73625 100644
--- a/src/atomref/__about__.py
+++ b/src/atomref/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.0"
+__version__ = "0.1.3"
diff --git a/src/atomref/__init__.py b/src/atomref/__init__.py
index 6104c9d..fb569b3 100644
--- a/src/atomref/__init__.py
+++ b/src/atomref/__init__.py
@@ -8,7 +8,7 @@
     iter_elements,
     is_valid_element_symbol,
 )
-from .policy import LookupResult, ValuePolicy
+from .policy import LookupResult, ValuePolicy, get_value, lookup_value
 from .radii import (
     DEFAULT_COVALENT_POLICY,
     DEFAULT_VDW_POLICY,
@@ -25,6 +25,16 @@
     lookup_covalent_radius,
     lookup_vdw_radius,
 )
+from .xh import (
+    DEFAULT_XH_POLICY,
+    XHPolicy,
+    get_xh_bond_length,
+    get_xh_set,
+    get_xh_set_info,
+    list_xh_set_infos,
+    list_xh_sets,
+    lookup_xh_bond_length,
+)
 from .registry import (
     CoverageInfo,
     DatasetInfo,
@@ -65,6 +75,8 @@
     "SubstitutionTransfer",
     "LookupResult",
     "ValuePolicy",
+    "lookup_value",
+    "get_value",
     "RadiiPolicy",
     "RadiiElementAssessment",
     "RadiiPolicyAssessment",
@@ -79,4 +91,12 @@
     "lookup_vdw_radius",
     "get_vdw_radius",
     "assess_radii_policy",
+    "XHPolicy",
+    "DEFAULT_XH_POLICY",
+    "list_xh_sets",
+    "list_xh_set_infos",
+    "get_xh_set",
+    "get_xh_set_info",
+    "lookup_xh_bond_length",
+    "get_xh_bond_length",
 ]
diff --git a/src/atomref/data/registry.json b/src/atomref/data/registry.json
index a722e1c..e6e4469 100644
--- a/src/atomref/data/registry.json
+++ b/src/atomref/data/registry.json
@@ -23,6 +23,11 @@
       "domain": "element",
       "units": "angstrom",
       "description": "Element-indexed isolated-atom or theory-defined atomic radii used as transferable support data."
+    },
+    "xh_bond_length": {
+      "domain": "element",
+      "units": "angstrom",
+      "description": "Element-indexed reference X-H bond lengths keyed by parent element X and intended for hydrogen-position normalisation or related geometry workflows."
     }
   },
   "datasets": {
@@ -437,6 +442,65 @@
         ],
         "usage_role": "support"
       }
+    },
+    "xh_bond_length": {
+      "csd_legacy_xh_cno": {
+        "name": "CSD legacy X-H neutron-normalisation targets (C/N/O)",
+        "description": "Fixed C-H, N-H, and O-H target bond lengths used by ConQuest for hydrogen-position normalisation.",
+        "semantic_class": "xh_neutron_normalisation",
+        "origin_class": "compiled_experimental",
+        "phase_context": "condensed_phase",
+        "method_summary": "Sparse parent-element target set for hydrogen normalisation. ConQuest moves H along the experimentally determined X-H vector to these neutron-derived distances.",
+        "storage": {
+          "format": "dense_by_z_csv",
+          "filename": "xh_bond_length.csv",
+          "column": "csd_legacy_xh_cno"
+        },
+        "coverage": {
+          "n_values": 3,
+          "z_min": 6,
+          "z_max": 8,
+          "has_placeholders": false,
+          "covered_z": [
+            6,
+            7,
+            8
+          ],
+          "missing_z": [
+            1,
+            2,
+            3,
+            4,
+            5
+          ]
+        },
+        "placeholder_value": null,
+        "extraction_source": "ConQuest User Guide and Tutorials, section 'Hydrogen Atom Location in Crystal Structure Analyses'.",
+        "aliases": [
+          "CSD X-H normalisation defaults",
+          "ConQuest X-H normalisation",
+          "CSD legacy X-H"
+        ],
+        "references": [
+          {
+            "publisher": "Cambridge Crystallographic Data Centre (CCDC)",
+            "title": "ConQuest User Guide and Tutorials",
+            "url": "https://www.ccdc.cam.ac.uk/media/Documentation/C82017ED-FAE4-4D93-BA5A-8D841F1E4314/ConQuest-UserGuide_2020_1.pdf",
+            "note": "Hydrogen Atom Location in Crystal Structure Analyses; ConQuest normalises terminal C-H, N-H, and O-H distances to 1.089 Å, 1.015 Å, and 0.993 Å, respectively."
+          },
+          {
+            "authors": "F. H. Allen; I. J. Bruno",
+            "title": "Bond lengths in organic and metal-organic compounds revisited: X-H bond lengths from neutron diffraction data",
+            "venue": "Acta Cryst. B66 (2010) 380-386"
+          }
+        ],
+        "notes": [
+          "Sparse provisional target set for parent elements C, N, and O only.",
+          "In atomref v0.1.x this dataset seeds transfer-based inference for other parent elements rather than claiming direct curated coverage beyond C/N/O.",
+          "Fuller X-H dataset and policy support is planned for atomref 0.2.x."
+        ],
+        "usage_role": "target"
+      }
     }
   }
 }
diff --git a/src/atomref/data/xh_bond_length.csv b/src/atomref/data/xh_bond_length.csv
new file mode 100644
index 0000000..4ae4bca
--- /dev/null
+++ b/src/atomref/data/xh_bond_length.csv
@@ -0,0 +1,119 @@
+z,csd_legacy_xh_cno
+1,
+2,
+3,
+4,
+5,
+6,1.089
+7,1.015
+8,0.993
+9,
+10,
+11,
+12,
+13,
+14,
+15,
+16,
+17,
+18,
+19,
+20,
+21,
+22,
+23,
+24,
+25,
+26,
+27,
+28,
+29,
+30,
+31,
+32,
+33,
+34,
+35,
+36,
+37,
+38,
+39,
+40,
+41,
+42,
+43,
+44,
+45,
+46,
+47,
+48,
+49,
+50,
+51,
+52,
+53,
+54,
+55,
+56,
+57,
+58,
+59,
+60,
+61,
+62,
+63,
+64,
+65,
+66,
+67,
+68,
+69,
+70,
+71,
+72,
+73,
+74,
+75,
+76,
+77,
+78,
+79,
+80,
+81,
+82,
+83,
+84,
+85,
+86,
+87,
+88,
+89,
+90,
+91,
+92,
+93,
+94,
+95,
+96,
+97,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
+106,
+107,
+108,
+109,
+110,
+111,
+112,
+113,
+114,
+115,
+116,
+117,
+118,
diff --git a/src/atomref/policy.py b/src/atomref/policy.py
index 36741fe..235bbe3 100644
--- a/src/atomref/policy.py
+++ b/src/atomref/policy.py
@@ -6,9 +6,14 @@
 from dataclasses import dataclass, field
 from functools import lru_cache
 import math
+from types import MappingProxyType
 from typing import Generic, Literal, TypeVar
 
-from .elements import canonicalize_element_symbol, is_valid_element_symbol
+from .elements import (
+    canonicalize_element_symbol,
+    is_valid_element_symbol,
+    iter_elements,
+)
 from .errors import PolicyError
 from .registry import (
     DatasetLike,
@@ -18,7 +23,13 @@
     get_builtin_set,
     resolve_dataset_like,
 )
-from .transfer import LinearFit, LinearTransfer, SubstitutionTransfer, TransferModel
+from .transfer import (
+    LinearFit,
+    LinearTransfer,
+    SubstitutionTransfer,
+    SupportsValuePolicy,
+    TransferModel,
+)
 
 K = TypeVar("K")
 
@@ -31,6 +42,8 @@
     "missing",
 ]
 
+_ACTIVE_POLICY_IDS: list[int] = []
+
 
 @dataclass(frozen=True, slots=True)
 class LookupResult:
@@ -58,12 +71,114 @@ def __float__(self) -> float:
 
 @dataclass(frozen=True, slots=True)
 class ValuePolicy(Generic[K]):
-    """Ordered rule set for resolving element-domain scalar values."""
+    """Ordered rule set for resolving element-domain scalar values.
+
+    The v0.1 runtime resolves only element-domain policies even though the
+    metadata layer already records a more general ``domain`` concept. During
+    construction, element-domain override keys are normalized to canonical
+    element symbols and validated as finite floats.
+    """
 
     base: DatasetLike
     transfers: tuple[TransferModel, ...] = ()
     overrides: Mapping[K, float] = field(default_factory=dict)
     fallback: float | None = None
+    blocked: tuple[str, ...] = ()
+
+    def __post_init__(self) -> None:
+        """Validate and normalize policy configuration eagerly."""
+
+        if self.fallback is not None:
+            object.__setattr__(
+                self,
+                "fallback",
+                _coerce_policy_float(self.fallback, what="policy fallback"),
+            )
+
+        base_set = resolve_dataset_like(self.base)
+        if base_set.info.domain != "element":
+            return
+
+        normalized_blocked: list[str] = []
+        seen_blocked: set[str] = set()
+        for key in self.blocked:
+            if not isinstance(key, str):
+                raise PolicyError(
+                    "element-domain blocked keys must be element-symbol strings"
+                )
+            sym = _normalize_element_symbol(key)
+            if sym is None:
+                raise PolicyError(f"invalid blocked element symbol: {key!r}")
+            if sym not in seen_blocked:
+                normalized_blocked.append(sym)
+                seen_blocked.add(sym)
+        object.__setattr__(self, "blocked", tuple(normalized_blocked))
+
+        normalized_overrides: dict[str, float] = {}
+        seen_original_keys: dict[str, str] = {}
+        for key, value in self.overrides.items():
+            if not isinstance(key, str):
+                raise PolicyError(
+                    "element-domain policy overrides must be keyed by element symbols"
+                )
+            sym = _normalize_element_symbol(key)
+            if sym is None:
+                raise PolicyError(f"invalid override element symbol: {key!r}")
+            if sym in seen_blocked:
+                raise PolicyError(f"override key {key!r} is blocked in this policy")
+            previous = seen_original_keys.get(sym)
+            if previous is not None and previous != key:
+                raise PolicyError(
+                    f"override keys {previous!r} and {key!r} both normalize to {sym!r}"
+                )
+            seen_original_keys[sym] = key
+            normalized_overrides[sym] = _coerce_policy_float(
+                value,
+                what=f"override value for {key!r}",
+            )
+
+        object.__setattr__(
+            self,
+            "overrides",
+            MappingProxyType(normalized_overrides),
+        )
+
+
+@dataclass(frozen=True, slots=True)
+class _ResolvedElementSource:
+    """Internal representation of an element-domain transfer source."""
+
+    ref: DatasetRef
+    values_by_z: tuple[float | None, ...]
+    placeholder_by_z: tuple[bool, ...]
+    via_policy: bool = False
+
+
+@dataclass(frozen=True, slots=True)
+class _TransferSourceValue:
+    """Internal representation of one value obtained from a transfer source."""
+
+    value: float
+    ref: DatasetRef
+    resolved_from: tuple[DatasetRef, ...]
+    is_placeholder: bool
+    via_policy: bool = False
+    lookup_source: LookupSource | None = None
+    notes: tuple[str, ...] = ()
+
+
+
+def _coerce_policy_float(value: object, *, what: str) -> float:
+    """Return a finite float for policy configuration values."""
+
+    try:
+        out = float(value)
+    except (TypeError, ValueError) as exc:
+        raise PolicyError(f"{what} must be a finite float") from exc
+    if not math.isfinite(out):
+        raise PolicyError(f"{what} must be a finite float")
+    return out
+
 
 
 def _normalize_element_symbol(symbol: str | None) -> str | None:
@@ -82,35 +197,139 @@ def _normalize_element_symbol(symbol: str | None) -> str | None:
     return cand
 
 
+
 def _resolve_target_ref(policy: ValuePolicy[object]) -> DatasetRef:
     """Return the target dataset reference implied by a policy base."""
 
     return resolve_dataset_like(policy.base).ref
 
 
+
+def _coerce_nested_policy(source: object) -> ValuePolicy[str] | None:
+    """Return ``source`` as a generic value policy when possible."""
+
+    if isinstance(source, ValuePolicy):
+        return source
+    if isinstance(source, SupportsValuePolicy):
+        nested = source.as_value_policy()
+        if not isinstance(nested, ValuePolicy):
+            raise PolicyError("policy-like transfer sources must return ValuePolicy")
+        return nested
+    return None
+
+
+
+def _materialize_transfer_source(
+    source: DatasetLike | SupportsValuePolicy | ValuePolicy[str],
+) -> _ResolvedElementSource:
+    """Materialize any element-domain transfer source into dense by-Z arrays."""
+
+    nested_policy = _coerce_nested_policy(source)
+    if nested_policy is None:
+        dataset = resolve_dataset_like(source)
+        placeholders = tuple(
+            False
+            if value is None
+            else _is_placeholder_value(dataset.info, float(value))
+            for value in dataset.values_by_z
+        )
+        return _ResolvedElementSource(
+            ref=dataset.ref,
+            values_by_z=dataset.values_by_z,
+            placeholder_by_z=placeholders,
+            via_policy=False,
+        )
+
+    target = _resolve_target_ref(nested_policy)
+    n_z = max(elem.z for elem in iter_elements())
+    values: list[float | None] = [None] * (n_z + 1)
+    placeholders: list[bool] = [False] * (n_z + 1)
+    for elem in iter_elements():
+        lookup = lookup_value(elem.symbol, policy=nested_policy)
+        values[elem.z] = lookup.value
+        placeholders[elem.z] = lookup.is_placeholder if lookup.value is not None else False
+    return _ResolvedElementSource(
+        ref=target,
+        values_by_z=tuple(values),
+        placeholder_by_z=tuple(placeholders),
+        via_policy=True,
+    )
+
+
+
+def _lookup_transfer_source_value(
+    symbol: str,
+    source: DatasetLike | SupportsValuePolicy | ValuePolicy[str],
+) -> tuple[_TransferSourceValue | None, str | None]:
+    """Resolve one element value from a transfer source or nested policy."""
+
+    nested_policy = _coerce_nested_policy(source)
+    if nested_policy is None:
+        source_set = resolve_dataset_like(source)
+        value = source_set.get(symbol)
+        if value is None:
+            return None, f"no value in {source_set.ref.set_id}"
+        value_f = float(value)
+        return (
+            _TransferSourceValue(
+                value=value_f,
+                ref=source_set.ref,
+                resolved_from=(source_set.ref,),
+                is_placeholder=_is_placeholder_value(source_set.info, value_f),
+                via_policy=False,
+                lookup_source="base",
+                notes=(),
+            ),
+            None,
+        )
+
+    lookup = lookup_value(symbol, policy=nested_policy)
+    if lookup.value is None:
+        if lookup.notes:
+            return (
+                None,
+                "policy source returned no value: " + "; ".join(lookup.notes),
+            )
+        return None, "policy source returned no value"
+
+    return (
+        _TransferSourceValue(
+            value=float(lookup.value),
+            ref=_resolve_target_ref(nested_policy),
+            resolved_from=lookup.resolved_from,
+            is_placeholder=lookup.is_placeholder,
+            via_policy=True,
+            lookup_source=lookup.source,
+            notes=lookup.notes,
+        ),
+        None,
+    )
+
+
+
 def _fit_linear_transfer(
     base_set: ElementScalarSet,
-    predictor_set: ElementScalarSet,
+    predictor_source: _ResolvedElementSource,
     *,
     min_points: int,
     exclude_placeholders: bool,
 ) -> LinearFit:
-    """Fit a one-predictor linear transfer model between two datasets."""
+    """Fit a one-predictor linear transfer model between two sources."""
 
     xs: list[float] = []
     ys: list[float] = []
 
-    n_z = min(len(base_set.values_by_z), len(predictor_set.values_by_z))
+    n_z = min(len(base_set.values_by_z), len(predictor_source.values_by_z))
     for z in range(1, n_z):
         y = base_set.values_by_z[z]
-        x = predictor_set.values_by_z[z]
+        x = predictor_source.values_by_z[z]
         if y is None or x is None:
             continue
         y_f = float(y)
         x_f = float(x)
         if exclude_placeholders and (
             _is_placeholder_value(base_set.info, y_f)
-            or _is_placeholder_value(predictor_set.info, x_f)
+            or predictor_source.placeholder_by_z[z]
         ):
             continue
         xs.append(x_f)
@@ -156,19 +375,20 @@ def _fit_linear_transfer_cached(
 
     return _fit_linear_transfer(
         get_builtin_set(base_ref),
-        get_builtin_set(predictor_ref),
+        _materialize_transfer_source(predictor_ref),
         min_points=min_points,
         exclude_placeholders=exclude_placeholders,
     )
 
 
+
 def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit | None:
     """Return the fit object for a transfer model when it needs one."""
 
     if not isinstance(transfer, LinearTransfer):
         return None
     if len(transfer.predictors) != 1:
-        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor dataset")
+        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor source")
 
     predictor = transfer.predictors[0]
     if isinstance(base, DatasetRef) and isinstance(predictor, DatasetRef):
@@ -180,38 +400,50 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
         )
     return _fit_linear_transfer(
         resolve_dataset_like(base),
-        resolve_dataset_like(predictor),
+        _materialize_transfer_source(predictor),
         min_points=transfer.min_points,
         exclude_placeholders=transfer.exclude_placeholders,
     )
 
 
+
 def _apply_substitution_transfer(
     symbol: str,
     *,
     target: DatasetRef,
     transfer: SubstitutionTransfer,
 ) -> tuple[LookupResult | None, str | None]:
-    """Try to resolve ``symbol`` by direct substitution from another dataset."""
-
-    source_set = resolve_dataset_like(transfer.source)
-    value = source_set.get(symbol)
-    if value is None:
-        return None, f"no substitution value in {source_set.ref.set_id}"
-    value_f = float(value)
+    """Try to resolve ``symbol`` by direct substitution from another source."""
+
+    source_value, note = _lookup_transfer_source_value(symbol, transfer.source)
+    if source_value is None:
+        return None, note
+
+    notes = [
+        "missing in base set; substituted from policy source"
+        if source_value.via_policy
+        else "missing in base set; substituted from transfer source"
+    ]
+    if source_value.via_policy and source_value.lookup_source not in (None, "base"):
+        notes.append(
+            f"policy source resolved the value via {source_value.lookup_source}"
+        )
+    if source_value.is_placeholder:
+        notes.append("transfer source value is marked as a placeholder")
     return (
         LookupResult(
-            value=value_f,
+            value=source_value.value,
             source="transfer_substitution",
             target=target,
-            resolved_from=(source_set.ref,),
-            is_placeholder=_is_placeholder_value(source_set.info, value_f),
-            notes=("missing in base set; substituted from transfer source",),
+            resolved_from=source_value.resolved_from,
+            is_placeholder=source_value.is_placeholder,
+            notes=tuple(notes),
         ),
         None,
     )
 
 
+
 def _apply_linear_transfer(
     symbol: str,
     *,
@@ -222,105 +454,154 @@ def _apply_linear_transfer(
     """Try to resolve ``symbol`` through linear transfer from predictor data."""
 
     if len(transfer.predictors) != 1:
-        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor dataset")
+        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor source")
 
-    predictor_set = resolve_dataset_like(transfer.predictors[0])
-    predictor_value = predictor_set.get(symbol)
+    predictor_value, note = _lookup_transfer_source_value(symbol, transfer.predictors[0])
     if predictor_value is None:
-        return None, f"no predictor value in {predictor_set.ref.set_id}"
-    predictor_f = float(predictor_value)
+        return None, note
 
-    if transfer.exclude_placeholders and _is_placeholder_value(
-        predictor_set.info,
-        predictor_f,
-    ):
-        return None, f"predictor value in {predictor_set.ref.set_id} is a placeholder"
+    if transfer.exclude_placeholders and predictor_value.is_placeholder:
+        if predictor_value.via_policy:
+            return None, "predictor value from policy source is a placeholder"
+        return None, f"predictor value in {predictor_value.ref.set_id} is a placeholder"
 
     fit = _fit_transfer_model(base, transfer)
     if fit is None:
         return None, "no fit available for linear transfer"
-    predicted = fit.coefficients[0] * predictor_f + fit.intercept
+    predicted = fit.coefficients[0] * predictor_value.value + fit.intercept
+
+    notes = ["missing in base set; inferred via linear transfer"]
+    if predictor_value.via_policy:
+        notes.append("predictor value supplied by policy source")
+        notes.append("linear fit used policy-materialized predictor values")
+        if predictor_value.lookup_source not in (None, "base"):
+            notes.append(
+                f"policy predictor resolved the value via {predictor_value.lookup_source}"
+            )
+
     return (
         LookupResult(
             value=float(predicted),
             source="transfer_linear",
             target=target,
-            resolved_from=(predictor_set.ref,),
+            resolved_from=predictor_value.resolved_from,
             is_placeholder=False,
             fit=fit,
-            notes=("missing in base set; inferred via linear transfer",),
+            notes=tuple(notes),
         ),
         None,
     )
 
 
+
 def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
     """Resolve a value through override, base, transfer, and fallback steps."""
 
-    target = _resolve_target_ref(policy)
-    base_set = resolve_dataset_like(policy.base)
-    if base_set.info.domain != "element":
-        raise PolicyError("v0.1 resolver supports only element-domain datasets")
-
-    sym = _normalize_element_symbol(symbol)
-    if sym is None:
-        note = "unknown element" if symbol is not None else "missing element symbol"
-        return LookupResult(value=None, source="missing", target=target, notes=(note,))
-
-    if sym in policy.overrides:
-        return LookupResult(
-            value=float(policy.overrides[sym]),
-            source="override",
-            target=target,
-            notes=("value supplied by policy override",),
-        )
-
-    base_value = base_set.get(sym)
-    if base_value is not None:
-        base_f = float(base_value)
-        return LookupResult(
-            value=base_f,
-            source="base",
-            target=target,
-            resolved_from=(base_set.ref,),
-            is_placeholder=_is_placeholder_value(base_set.info, base_f),
-            notes=(),
-        )
+    policy_id = id(policy)
+    if policy_id in _ACTIVE_POLICY_IDS:
+        raise PolicyError("cyclic policy resolution detected")
+
+    _ACTIVE_POLICY_IDS.append(policy_id)
+    try:
+        target = _resolve_target_ref(policy)
+        base_set = resolve_dataset_like(policy.base)
+        if base_set.info.domain != "element":
+            raise PolicyError("v0.1 resolver supports only element-domain datasets")
+
+        sym = _normalize_element_symbol(symbol)
+        if sym is None:
+            note = "unknown element" if symbol is not None else "missing element symbol"
+            return LookupResult(value=None, source="missing", target=target, notes=(note,))
+
+        if sym in policy.blocked:
+            return LookupResult(
+                value=None,
+                source="missing",
+                target=target,
+                notes=(f"{sym} is blocked by this policy",),
+            )
 
-    transfer_notes: list[str] = ["missing in base set"]
-    for transfer in policy.transfers:
-        if isinstance(transfer, SubstitutionTransfer):
-            result, note = _apply_substitution_transfer(
-                sym,
+        if sym in policy.overrides:
+            return LookupResult(
+                value=float(policy.overrides[sym]),
+                source="override",
                 target=target,
-                transfer=transfer,
+                notes=("value supplied by policy override",),
+            )
+
+        base_value = base_set.get(sym)
+        if base_value is not None:
+            base_f = float(base_value)
+            is_placeholder = _is_placeholder_value(base_set.info, base_f)
+            notes = (
+                ("base dataset value is marked as a placeholder",)
+                if is_placeholder
+                else ()
             )
-        elif isinstance(transfer, LinearTransfer):
-            result, note = _apply_linear_transfer(
-                sym,
-                base=policy.base,
+            return LookupResult(
+                value=base_f,
+                source="base",
                 target=target,
-                transfer=transfer,
+                resolved_from=(base_set.ref,),
+                is_placeholder=is_placeholder,
+                notes=notes,
             )
-        else:  # pragma: no cover - closed union today
-            raise PolicyError(f"unsupported transfer model: {type(transfer)!r}")
 
-        if result is not None:
-            return result
-        if note:
-            transfer_notes.append(note)
+        transfer_notes: list[str] = ["missing in base set"]
+        for transfer in policy.transfers:
+            if isinstance(transfer, SubstitutionTransfer):
+                result, note = _apply_substitution_transfer(
+                    sym,
+                    target=target,
+                    transfer=transfer,
+                )
+            elif isinstance(transfer, LinearTransfer):
+                result, note = _apply_linear_transfer(
+                    sym,
+                    base=policy.base,
+                    target=target,
+                    transfer=transfer,
+                )
+            else:  # pragma: no cover - closed union today
+                raise PolicyError(f"unsupported transfer model: {type(transfer)!r}")
+
+            if result is not None:
+                return result
+            if note:
+                transfer_notes.append(note)
+
+        if policy.fallback is not None:
+            return LookupResult(
+                value=float(policy.fallback),
+                source="fallback",
+                target=target,
+                notes=tuple(transfer_notes + ["using fallback value"]),
+            )
 
-    if policy.fallback is not None:
         return LookupResult(
-            value=float(policy.fallback),
-            source="fallback",
+            value=None,
+            source="missing",
             target=target,
-            notes=tuple(transfer_notes + ["using fallback value"]),
+            notes=tuple(transfer_notes),
         )
+    finally:
+        popped = _ACTIVE_POLICY_IDS.pop()
+        assert popped == policy_id  # internal stack discipline
 
-    return LookupResult(
-        value=None,
-        source="missing",
-        target=target,
-        notes=tuple(transfer_notes),
-    )
+
+
+def lookup_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
+    """Public entry point for generic element-domain scalar lookup.
+
+    This is the same resolver used internally by the radii convenience layer.
+    In v0.1 the runtime supports only element-domain policies.
+    """
+
+    return _resolve_value(symbol, policy=policy)
+
+
+
+def get_value(symbol: str | None, *, policy: ValuePolicy[str]) -> float | None:
+    """Return only the resolved scalar value for an element-domain policy."""
+
+    return lookup_value(symbol, policy=policy).value
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index 01f13f4..de7ff36 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -4,11 +4,18 @@
 
 from collections.abc import Iterable, Mapping
 from dataclasses import dataclass, field
+import math
 from typing import Literal
 
 from .elements import canonicalize_element_symbol, get_element, is_valid_element_symbol
 from .errors import PolicyError
-from .policy import LookupResult, ValuePolicy, _fit_transfer_model, _resolve_value
+from .policy import (
+    LookupResult,
+    ValuePolicy,
+    _fit_transfer_model,
+    get_value,
+    lookup_value,
+)
 from .registry import (
     DatasetInfo,
     DatasetRef,
@@ -59,18 +66,27 @@ def as_value_policy(self) -> ValuePolicy[str]:
         else:
             base = DatasetRef(quantity, self.base_set)
 
-        normalized_overrides: dict[str, float] = {}
-        for key, value in self.overrides.items():
-            sym = _normalize_radii_symbol(key)
-            if sym is None or not is_valid_element_symbol(sym):
-                raise PolicyError(f"invalid override element symbol: {key!r}")
-            normalized_overrides[sym] = float(value)
+        checked_overrides = {
+            key: _coerce_non_negative_radii_value(
+                value,
+                what=f"radii override value for {key!r}",
+            )
+            for key, value in self.overrides.items()
+        }
+        checked_fallback = (
+            None
+            if self.fallback is None
+            else _coerce_non_negative_radii_value(
+                self.fallback,
+                what="radii fallback",
+            )
+        )
 
         return ValuePolicy(
             base=base,
             transfers=self.transfers,
-            overrides=normalized_overrides,
-            fallback=self.fallback,
+            overrides=checked_overrides,
+            fallback=checked_fallback,
         )
 
 
@@ -107,6 +123,26 @@ class RadiiPolicyAssessment:
     per_element: tuple[RadiiElementAssessment, ...] = ()
 
 
+
+def _coerce_non_negative_radii_value(value: object, *, what: str) -> float:
+    """Validate a radii-like policy number.
+
+    The generic :class:`atomref.policy.ValuePolicy` accepts any finite scalar.
+    Radii-specific convenience helpers are stricter and reject negative values.
+    """
+
+    try:
+        out = float(value)
+    except (TypeError, ValueError) as exc:
+        raise PolicyError(f"{what} must be a finite float") from exc
+    if not math.isfinite(out):
+        raise PolicyError(f"{what} must be a finite float")
+    if out < 0:
+        raise PolicyError(f"{what} must be non-negative")
+    return out
+
+
+
 def _quantity_for_kind(kind: RadiiKind) -> str:
     """Translate public radii kind names into registry quantity ids."""
 
@@ -116,6 +152,7 @@ def _quantity_for_kind(kind: RadiiKind) -> str:
         raise PolicyError(f"unknown radii kind: {kind!r}") from exc
 
 
+
 def _normalize_radii_symbol(symbol: str | None) -> str | None:
     """Normalize symbols accepted by the radii convenience layer."""
 
@@ -125,6 +162,7 @@ def _normalize_radii_symbol(symbol: str | None) -> str | None:
     return cand
 
 
+
 def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
     """Normalize, validate, deduplicate, and sort assessment element labels."""
 
@@ -141,6 +179,7 @@ def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
     )
 
 
+
 def list_radii_sets(
     kind: RadiiKind,
     *,
@@ -151,6 +190,7 @@ def list_radii_sets(
     return list_dataset_ids(_quantity_for_kind(kind), usage_role=usage_role)
 
 
+
 def list_radii_set_infos(
     kind: RadiiKind,
     *,
@@ -161,18 +201,21 @@ def list_radii_set_infos(
     return list_dataset_infos(_quantity_for_kind(kind), usage_role=usage_role)
 
 
+
 def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
     """Return metadata for one packaged radii set."""
 
     return get_dataset_info(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
+
 def get_radii_set(kind: RadiiKind, set_id: str) -> RadiiSet:
     """Load one packaged radii set as an :class:`ElementScalarSet`."""
 
     return get_builtin_set(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
+
 def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
     """Raise when a policy is used with the wrong public radii helper."""
 
@@ -180,10 +223,12 @@ def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
         raise PolicyError(f"expected a {expected!r} radii policy, got {policy.kind!r}")
 
 
+
 def _lookup_radius(symbol: str | None, *, policy: RadiiPolicy) -> LookupResult:
     """Shared implementation for radii lookup helpers."""
 
-    return _resolve_value(symbol, policy=policy.as_value_policy())
+    return lookup_value(symbol, policy=policy.as_value_policy())
+
 
 
 def lookup_covalent_radius(
@@ -198,6 +243,7 @@ def lookup_covalent_radius(
     return _lookup_radius(symbol, policy=active)
 
 
+
 def get_covalent_radius(
     symbol: str | None,
     *,
@@ -205,7 +251,10 @@ def get_covalent_radius(
 ) -> float | None:
     """Return only the selected covalent-radius value, without provenance."""
 
-    return lookup_covalent_radius(symbol, policy=policy).value
+    active = DEFAULT_COVALENT_POLICY if policy is None else policy
+    _validate_policy_kind(active, expected="covalent")
+    return get_value(symbol, policy=active.as_value_policy())
+
 
 
 def lookup_vdw_radius(
@@ -220,14 +269,18 @@ def lookup_vdw_radius(
     return _lookup_radius(symbol, policy=active)
 
 
+
 def get_vdw_radius(
     symbol: str | None,
     *,
     policy: RadiiPolicy | None = None,
 ) -> float | None:
-    """Return only the selected van der Waals radius, without provenance."""
+    """Return only the selected van der Waals-radius value, without provenance."""
+
+    active = DEFAULT_VDW_POLICY if policy is None else policy
+    _validate_policy_kind(active, expected="van_der_waals")
+    return get_value(symbol, policy=active.as_value_policy())
 
-    return lookup_vdw_radius(symbol, policy=policy).value
 
 
 def assess_radii_policy(
@@ -254,7 +307,7 @@ def assess_radii_policy(
     per_element: list[RadiiElementAssessment] = []
 
     for symbol in elems:
-        lookup = _resolve_value(symbol, policy=value_policy)
+        lookup = lookup_value(symbol, policy=value_policy)
         if lookup.source == "override":
             n_override += 1
         elif lookup.source == "base":
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index 594e98e..b9f2730 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -8,6 +8,9 @@
 from functools import lru_cache
 from importlib import resources
 import json
+import math
+from types import MappingProxyType
+import unicodedata
 
 from .elements import canonicalize_element_symbol, get_element, iter_elements
 from .errors import DatasetError
@@ -121,21 +124,44 @@ def from_mapping(
 
         n_z = max(e.z for e in iter_elements())
         values_by_z: list[float | None] = [None] * (n_z + 1)
+        seen_keys: dict[str, str] = {}
+
+        placeholder_f = (
+            None
+            if placeholder_value is None
+            else _coerce_finite_float(
+                placeholder_value,
+                what=f"placeholder value for custom dataset {ref.set_id!r}",
+            )
+        )
 
         for key, value in values.items():
             sym = _normalize_element_domain_symbol(key)
             elem = get_element(sym)
             if elem is None:
                 raise DatasetError(f"invalid element symbol in custom set: {key!r}")
-            values_by_z[elem.z] = None if value is None else float(value)
+            previous = seen_keys.get(sym)
+            if previous is not None and previous != key:
+                raise DatasetError(
+                    f"custom-set keys {previous!r} and {key!r} both normalize to {sym!r}"
+                )
+            seen_keys[sym] = key
+            values_by_z[elem.z] = (
+                None
+                if value is None
+                else _coerce_finite_float(
+                    value,
+                    what=f"value for element {key!r} in custom dataset {ref.set_id!r}",
+                )
+            )
 
         covered_z = tuple(
             z for z, value in enumerate(values_by_z) if z > 0 and value is not None
         )
         has_placeholders = False
-        if placeholder_value is not None:
+        if placeholder_f is not None:
             has_placeholders = any(
-                value is not None and abs(value - placeholder_value) < 1e-12
+                value is not None and abs(value - placeholder_f) < 1e-12
                 for value in values_by_z[1:]
             )
 
@@ -149,7 +175,7 @@ def from_mapping(
             semantic_class=semantic_class,
             origin_class=origin_class,
             phase_context=phase_context,
-            placeholder_value=placeholder_value,
+            placeholder_value=placeholder_f,
             aliases=(),
             references=tuple(references),
             notes=tuple(notes),
@@ -178,6 +204,19 @@ def get(self, symbol: str | None) -> float | None:
 DatasetLike = DatasetRef | ElementScalarSet
 
 
+_DASH_TRANSLATION = str.maketrans(
+    {
+        "‐": "-",
+        "‑": "-",
+        "‒": "-",
+        "–": "-",
+        "—": "-",
+        "―": "-",
+        "−": "-",
+    }
+)
+
+
 def _normalize_element_domain_symbol(symbol: str | None) -> str | None:
     """Normalize element-domain symbols and fold D/T onto hydrogen."""
 
@@ -199,6 +238,35 @@ def _load_registry_json() -> dict[str, object]:
     return data
 
 
+def _freeze_json_like(value: object) -> object:
+    """Recursively freeze JSON-like metadata structures.
+
+    Registry metadata is cached globally. Returning raw dicts or lists from that
+    cache would let callers mutate shared package state through the metadata
+    objects returned by :func:`get_dataset_info`.
+    """
+
+    if isinstance(value, dict):
+        frozen = {str(key): _freeze_json_like(item) for key, item in value.items()}
+        return MappingProxyType(frozen)
+    if isinstance(value, list):
+        return tuple(_freeze_json_like(item) for item in value)
+    return value
+
+
+def _coerce_finite_float(value: object, *, what: str) -> float:
+    """Return ``value`` as a finite float or raise :class:`DatasetError`."""
+
+    try:
+        out = float(value)
+    except (TypeError, ValueError) as exc:
+        raise DatasetError(f"{what} must be a finite float") from exc
+    if not math.isfinite(out):
+        raise DatasetError(f"{what} must be a finite float")
+    return out
+
+
+
 def _get_quantities_mapping() -> Mapping[str, object]:
     """Return the raw ``quantities`` mapping from ``registry.json``."""
 
@@ -208,6 +276,7 @@ def _get_quantities_mapping() -> Mapping[str, object]:
     return quantities
 
 
+
 def _get_datasets_mapping() -> Mapping[str, object]:
     """Return the raw ``datasets`` mapping from ``registry.json``."""
 
@@ -217,6 +286,7 @@ def _get_datasets_mapping() -> Mapping[str, object]:
     return datasets
 
 
+
 def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
     """Return the dataset table for one quantity or raise on unknown input."""
 
@@ -226,12 +296,14 @@ def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
     return datasets
 
 
+
 def list_quantities() -> tuple[str, ...]:
     """List packaged quantity identifiers in registry order."""
 
     return tuple(_get_quantities_mapping().keys())
 
 
+
 def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
     """Return quantity-level metadata for a packaged quantity."""
 
@@ -253,10 +325,14 @@ def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
     )
 
 
+
 def _canonicalize_alias_token(value: str) -> str:
     """Normalize a dataset id or alias for case-insensitive comparison."""
 
-    return " ".join(value.strip().lower().split())
+    normalized = unicodedata.normalize("NFKC", value)
+    normalized = normalized.translate(_DASH_TRANSLATION)
+    return " ".join(normalized.strip().lower().split())
+
 
 
 def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
@@ -282,6 +358,7 @@ def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
     raise DatasetError(f"unknown dataset id for {quantity!r}: {set_id!r}")
 
 
+
 def list_dataset_ids(
     quantity: QuantityId, *, usage_role: str | None = None
 ) -> tuple[str, ...]:
@@ -305,6 +382,7 @@ def list_dataset_ids(
     return tuple(filtered)
 
 
+
 def list_dataset_infos(
     quantity: QuantityId, *, usage_role: str | None = None
 ) -> tuple[DatasetInfo, ...]:
@@ -316,6 +394,7 @@ def list_dataset_infos(
     )
 
 
+
 def _coerce_reference(obj: object) -> Reference:
     """Coerce a raw registry reference entry into :class:`Reference`."""
 
@@ -335,6 +414,7 @@ def _coerce_reference(obj: object) -> Reference:
     )
 
 
+
 def _coerce_coverage(obj: object) -> CoverageInfo | None:
     """Coerce raw coverage metadata into :class:`CoverageInfo`."""
 
@@ -354,6 +434,7 @@ def _coerce_coverage(obj: object) -> CoverageInfo | None:
     )
 
 
+
 def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
     """Return curated metadata for a packaged dataset reference."""
 
@@ -401,7 +482,9 @@ def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
         else ()
     )
     storage = (
-        raw_entry.get("storage") if isinstance(raw_entry.get("storage"), dict) else None
+        _freeze_json_like(raw_entry.get("storage"))
+        if isinstance(raw_entry.get("storage"), dict)
+        else None
     )
 
     return DatasetInfo(
@@ -444,7 +527,10 @@ def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
             else None
         ),
         placeholder_value=(
-            float(raw_entry["placeholder_value"])
+            _coerce_finite_float(
+                raw_entry["placeholder_value"],
+                what=f"placeholder value for packaged dataset {actual_ref!r}",
+            )
             if raw_entry.get("placeholder_value") is not None
             else None
         ),
@@ -456,7 +542,7 @@ def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
         aliases=aliases,
         references=references,
         notes=notes,
-        storage=storage,
+        storage=storage if isinstance(storage, Mapping) else None,
         coverage=_coerce_coverage(raw_entry.get("coverage")),
     )
 
@@ -483,7 +569,14 @@ def _load_csv_columns(filename: str) -> dict[str, tuple[float | None, ...]]:
                     values[name][z] = None
                     continue
                 raw = raw.strip()
-                values[name][z] = float(raw) if raw else None
+                values[name][z] = (
+                    _coerce_finite_float(
+                        raw,
+                        what=f"value in {filename!r} column {name!r} for Z={z}",
+                    )
+                    if raw
+                    else None
+                )
     return {name: tuple(vals) for name, vals in values.items()}
 
 
@@ -511,6 +604,7 @@ def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
     return ElementScalarSet(ref=info.ref, info=info, values_by_z=table[column])
 
 
+
 def resolve_dataset_like(dataset: DatasetLike) -> ElementScalarSet:
     """Resolve either a packaged reference or a custom set to a loaded set."""
 
@@ -519,6 +613,7 @@ def resolve_dataset_like(dataset: DatasetLike) -> ElementScalarSet:
     return get_builtin_set(dataset)
 
 
+
 def _is_placeholder_value(info: DatasetInfo, value: float) -> bool:
     """Return ``True`` when ``value`` equals the dataset's placeholder value."""
 
diff --git a/src/atomref/transfer.py b/src/atomref/transfer.py
index 14362db..9e071db 100644
--- a/src/atomref/transfer.py
+++ b/src/atomref/transfer.py
@@ -3,9 +3,22 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from typing import TYPE_CHECKING, Protocol, runtime_checkable
 
+from .errors import PolicyError
 from .registry import DatasetLike
 
+if TYPE_CHECKING:  # pragma: no cover - typing only
+    from .policy import ValuePolicy
+
+
+@runtime_checkable
+class SupportsValuePolicy(Protocol):
+    """Protocol for wrapper objects that can expose a generic value policy."""
+
+    def as_value_policy(self) -> "ValuePolicy[str]":
+        """Return the generic element-domain value policy."""
+
 
 @dataclass(frozen=True, slots=True)
 class LinearFit:
@@ -25,24 +38,32 @@ class LinearFit:
 
 @dataclass(frozen=True, slots=True)
 class SubstitutionTransfer:
-    """Use another dataset directly when the base dataset is missing a value."""
+    """Use another dataset or policy directly when the base dataset is missing a value."""
 
-    source: DatasetLike
+    source: DatasetLike | SupportsValuePolicy | ValuePolicy[str]
 
 
 @dataclass(frozen=True, slots=True)
 class LinearTransfer:
-    """Infer missing target values from one or more predictor datasets.
+    """Infer missing target values from one or more predictor datasets or policies.
 
     In v0.1 the public API stores predictors as a tuple for forward
     compatibility, but the runtime implementation intentionally accepts exactly
-    one predictor dataset.
+    one predictor source.
     """
 
-    predictors: tuple[DatasetLike, ...]
+    predictors: tuple[DatasetLike | SupportsValuePolicy | ValuePolicy[str], ...]
     min_points: int = 2
     exclude_placeholders: bool = True
 
+    def __post_init__(self) -> None:
+        """Validate obvious configuration errors eagerly."""
+
+        if not self.predictors:
+            raise PolicyError("LinearTransfer requires at least one predictor")
+        if self.min_points < 2:
+            raise PolicyError("LinearTransfer min_points must be at least 2")
+
 
 TransferModel = SubstitutionTransfer | LinearTransfer
 """Closed union of transfer models supported by the core resolver."""
diff --git a/src/atomref/xh.py b/src/atomref/xh.py
new file mode 100644
index 0000000..c2a87be
--- /dev/null
+++ b/src/atomref/xh.py
@@ -0,0 +1,168 @@
+"""X-H bond-length helpers built on the generic policy core."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+import math
+
+from .elements import canonicalize_element_symbol, is_valid_element_symbol
+from .errors import PolicyError
+from .policy import LookupResult, ValuePolicy, get_value, lookup_value
+from .registry import DatasetInfo, DatasetRef, ElementScalarSet, get_builtin_set, get_dataset_info, list_dataset_ids, list_dataset_infos
+from .transfer import LinearTransfer, TransferModel
+
+XHSet = ElementScalarSet
+
+_QUANTITY = "xh_bond_length"
+
+
+@dataclass(frozen=True, slots=True)
+class XHPolicy:
+    """Policy wrapper specialized for parent-element X-H bond lengths.
+
+    The quantity key is fixed to ``"xh_bond_length"`` and uses the parent
+    element ``X`` as the lookup key. ``H`` itself is not considered a valid
+    parent element for this quantity.
+    """
+
+    base_set: str | XHSet
+    transfers: tuple[TransferModel, ...] = ()
+    overrides: Mapping[str, float] = field(default_factory=dict)
+    fallback: float | None = None
+
+    def as_value_policy(self) -> ValuePolicy[str]:
+        """Convert the X-H policy into the generic scalar-value policy."""
+
+        if isinstance(self.base_set, ElementScalarSet):
+            if self.base_set.ref.quantity != _QUANTITY:
+                raise PolicyError(
+                    f"base_set quantity {self.base_set.ref.quantity!r} is incompatible with X-H lookup"
+                )
+            base = self.base_set
+        else:
+            base = DatasetRef(_QUANTITY, self.base_set)
+
+        checked_overrides: dict[str, float] = {}
+        for key, value in self.overrides.items():
+            sym = _normalize_xh_symbol(key)
+            if sym is None or not is_valid_element_symbol(sym):
+                raise PolicyError(f"invalid X-H parent element symbol: {key!r}")
+            if sym == "H":
+                raise PolicyError("H is not a valid parent element for xh_bond_length")
+            checked_overrides[key] = _coerce_non_negative_xh_value(
+                value,
+                what=f"X-H override value for {key!r}",
+            )
+
+        checked_fallback = (
+            None
+            if self.fallback is None
+            else _coerce_non_negative_xh_value(self.fallback, what="X-H fallback")
+        )
+
+        return ValuePolicy(
+            base=base,
+            transfers=self.transfers,
+            overrides=checked_overrides,
+            fallback=checked_fallback,
+            blocked=("H",),
+        )
+
+
+
+def _coerce_non_negative_xh_value(value: object, *, what: str) -> float:
+    """Validate an X-H-like policy number."""
+
+    try:
+        out = float(value)
+    except (TypeError, ValueError) as exc:
+        raise PolicyError(f"{what} must be a finite float") from exc
+    if not math.isfinite(out):
+        raise PolicyError(f"{what} must be a finite float")
+    if out < 0:
+        raise PolicyError(f"{what} must be non-negative")
+    return out
+
+
+
+def _normalize_xh_symbol(symbol: str | None) -> str | None:
+    """Normalize symbols accepted by the X-H convenience layer."""
+
+    cand = canonicalize_element_symbol(symbol)
+    if cand in {"D", "T"}:
+        cand = "H"
+    return cand
+
+
+
+def list_xh_sets(*, usage_role: str | None = None) -> tuple[str, ...]:
+    """List packaged X-H set ids."""
+
+    return list_dataset_ids(_QUANTITY, usage_role=usage_role)
+
+
+
+def list_xh_set_infos(*, usage_role: str | None = None) -> tuple[DatasetInfo, ...]:
+    """Return packaged metadata objects for X-H sets."""
+
+    return list_dataset_infos(_QUANTITY, usage_role=usage_role)
+
+
+
+def get_xh_set_info(set_id: str) -> DatasetInfo:
+    """Return metadata for one packaged X-H set."""
+
+    return get_dataset_info(DatasetRef(_QUANTITY, set_id))
+
+
+
+def get_xh_set(set_id: str) -> XHSet:
+    """Load one packaged X-H set as an :class:`ElementScalarSet`."""
+
+    return get_builtin_set(DatasetRef(_QUANTITY, set_id))
+
+
+
+def lookup_xh_bond_length(
+    symbol: str | None,
+    *,
+    policy: XHPolicy | None = None,
+) -> LookupResult:
+    """Resolve a parent-element X-H bond length with provenance."""
+
+    active = DEFAULT_XH_POLICY if policy is None else policy
+    lookup = lookup_value(symbol, policy=active.as_value_policy())
+    if lookup.value is None and _normalize_xh_symbol(symbol) == "H":
+        return LookupResult(
+            value=None,
+            source="missing",
+            target=lookup.target,
+            notes=("H is not a valid parent element for xh_bond_length",),
+        )
+    return lookup
+
+
+
+def get_xh_bond_length(
+    symbol: str | None,
+    *,
+    policy: XHPolicy | None = None,
+) -> float | None:
+    """Return only the selected X-H bond-length value, without provenance."""
+
+    active = DEFAULT_XH_POLICY if policy is None else policy
+    return get_value(symbol, policy=active.as_value_policy())
+
+
+DEFAULT_XH_POLICY = XHPolicy(
+    base_set="csd_legacy_xh_cno",
+    transfers=(
+        LinearTransfer(
+            predictors=(DatasetRef("covalent_radius", "cordero2008"),),
+            min_points=3,
+            exclude_placeholders=True,
+        ),
+    ),
+)
+"""Default X-H policy used by the convenience helpers."""
diff --git a/tests/meta/test_imports.py b/tests/meta/test_imports.py
index 374996a..66210e7 100644
--- a/tests/meta/test_imports.py
+++ b/tests/meta/test_imports.py
@@ -10,6 +10,7 @@
     'atomref.transfer',
     'atomref.policy',
     'atomref.radii',
+    'atomref.xh',
 ]
 
 
diff --git a/tests/meta/test_package_data.py b/tests/meta/test_package_data.py
index e5c393c..a9a7e61 100644
--- a/tests/meta/test_package_data.py
+++ b/tests/meta/test_package_data.py
@@ -11,6 +11,7 @@ def test_packaged_data_files_are_available() -> None:
         'covalent.csv',
         'van_der_waals.csv',
         'registry.json',
+        'xh_bond_length.csv',
     ):
         assert data_root.joinpath(name).is_file(), name
 
@@ -20,6 +21,7 @@ def test_packaged_registry_keeps_atomic_support_classification() -> None:
     raw = json.loads(data_root.joinpath('registry.json').read_text(encoding='utf-8'))
 
     assert 'atomic_radius' in raw['datasets']
+    assert 'xh_bond_length' in raw['datasets']
     rahm = raw['datasets']['atomic_radius']['rahm2016']
     assert rahm['usage_role'] == 'support'
     assert rahm['semantic_class'] == 'atomic_isodensity'
diff --git a/tests/meta/test_public_api.py b/tests/meta/test_public_api.py
index 8f191bf..f3583a1 100644
--- a/tests/meta/test_public_api.py
+++ b/tests/meta/test_public_api.py
@@ -21,6 +21,13 @@
     'lookup_covalent_radius',
     'get_vdw_radius',
     'lookup_vdw_radius',
+    'XHPolicy',
+    'DEFAULT_XH_POLICY',
+    'get_xh_set',
+    'get_xh_bond_length',
+    'lookup_xh_bond_length',
+    'list_xh_sets',
+    'list_xh_set_infos',
     'list_quantities',
     'list_dataset_ids',
     'list_dataset_infos',
diff --git a/tests/meta/test_registry_integrity.py b/tests/meta/test_registry_integrity.py
index 853df5c..a32b44c 100644
--- a/tests/meta/test_registry_integrity.py
+++ b/tests/meta/test_registry_integrity.py
@@ -4,22 +4,18 @@
 from dataclasses import asdict
 
 import atomref as ar
-from atomref.registry import get_builtin_set
+from atomref.registry import _canonicalize_alias_token, get_builtin_set
 
 _ALLOWED_USAGE_ROLES = {"target", "support"}
 
 
-def _canonical_token(value: str) -> str:
-    return " ".join(value.strip().lower().split())
-
-
 def test_dataset_aliases_are_unique_within_each_quantity() -> None:
     for quantity in ar.list_quantities():
         seen: dict[str, str] = {}
         for set_id in ar.list_dataset_ids(quantity):
             info = ar.get_dataset_info(ar.DatasetRef(quantity, set_id))
             for token in (set_id, *info.aliases):
-                key = _canonical_token(token)
+                key = _canonicalize_alias_token(token)
                 previous = seen.get(key)
                 assert previous in (None, set_id)
                 seen[key] = set_id
diff --git a/tests/policy/test_policy.py b/tests/policy/test_policy.py
new file mode 100644
index 0000000..3b38717
--- /dev/null
+++ b/tests/policy/test_policy.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import pytest
+
+import atomref as ar
+from atomref.errors import PolicyError
+
+
+def test_lookup_value_is_public_generic_entry_point() -> None:
+    policy = ar.ValuePolicy(
+        base=ar.DatasetRef('covalent_radius', 'cordero2008'),
+        overrides={'d': 0.5},
+    )
+    lookup = ar.lookup_value('H', policy=policy)
+    assert lookup.source == 'override'
+    assert lookup.value == pytest.approx(0.5)
+
+
+def test_get_value_returns_only_scalar() -> None:
+    policy = ar.ValuePolicy(base=ar.DatasetRef('covalent_radius', 'cordero2008'))
+    assert ar.get_value('C', policy=policy) == pytest.approx(0.76)
+
+
+def test_value_policy_rejects_normalized_override_collisions() -> None:
+    with pytest.raises(PolicyError):
+        ar.ValuePolicy(
+            base=ar.DatasetRef('covalent_radius', 'cordero2008'),
+            overrides={'H': 0.31, 'D': 0.4},
+        )
+
+
+def test_value_policy_rejects_non_finite_fallback() -> None:
+    with pytest.raises(PolicyError):
+        ar.ValuePolicy(
+            base=ar.DatasetRef('covalent_radius', 'cordero2008'),
+            fallback=float('nan'),
+        )
+
+
+def test_substitution_transfer_accepts_policy_source() -> None:
+    custom = ar.ElementScalarSet.from_mapping(
+        ref=ar.DatasetRef('covalent_radius', 'demo_user_cov'),
+        values={'C': 0.77},
+        name='Demo covalent set',
+        units='angstrom',
+    )
+    policy = ar.ValuePolicy(
+        base=custom,
+        transfers=(ar.SubstitutionTransfer(source=ar.DEFAULT_COVALENT_POLICY),),
+    )
+    lookup = ar.lookup_value('Bk', policy=policy)
+    assert lookup.source == 'transfer_substitution'
+    assert lookup.value == pytest.approx(1.54)
+    assert lookup.resolved_from == (ar.DatasetRef('covalent_radius', 'csd_legacy_cov'),)
+    assert any('policy source' in note for note in lookup.notes)
+
+
+def test_linear_transfer_accepts_policy_predictor() -> None:
+    predictor_policy = ar.ValuePolicy(base=ar.DatasetRef('atomic_radius', 'rahm2016'))
+    policy = ar.RadiiPolicy(
+        kind='van_der_waals',
+        base_set='alvarez2013',
+        transfers=(ar.LinearTransfer(predictors=(predictor_policy,),),),
+    )
+    lookup = ar.lookup_vdw_radius('Pm', policy=policy)
+    assert lookup.source == 'transfer_linear'
+    assert lookup.value == pytest.approx(ar.lookup_vdw_radius('Pm').value)
+    assert lookup.fit is not None
+    assert any('policy source' in note for note in lookup.notes)
diff --git a/tests/radii/test_selection.py b/tests/radii/test_selection.py
index e84a4f0..8977363 100644
--- a/tests/radii/test_selection.py
+++ b/tests/radii/test_selection.py
@@ -97,3 +97,53 @@ def test_linear_transfer_rejects_multiple_predictors_in_v0_1() -> None:
     )
     with pytest.raises(PolicyError):
         ar.lookup_vdw_radius("Pm", policy=policy)
+
+
+def test_base_placeholder_note_is_explicit() -> None:
+    policy = ar.RadiiPolicy(kind='covalent', base_set='csd_legacy_cov')
+    lookup = ar.lookup_covalent_radius('Es', policy=policy)
+    assert lookup.source == 'base'
+    assert lookup.is_placeholder is True
+    assert any('placeholder' in note for note in lookup.notes)
+
+
+def test_substitution_placeholder_note_is_explicit() -> None:
+    lookup = ar.lookup_covalent_radius('Es')
+    assert lookup.source == 'transfer_substitution'
+    assert lookup.is_placeholder is True
+    assert any('placeholder' in note for note in lookup.notes)
+
+
+def test_radii_policy_rejects_normalized_override_collisions() -> None:
+    policy = ar.RadiiPolicy(
+        kind='covalent',
+        base_set='cordero2008',
+        overrides={'H': 0.31, 'D': 0.4},
+    )
+    with pytest.raises(PolicyError):
+        ar.lookup_covalent_radius('H', policy=policy)
+
+
+def test_radii_policy_rejects_non_finite_override() -> None:
+    policy = ar.RadiiPolicy(
+        kind='covalent',
+        base_set='cordero2008',
+        overrides={'C': float('nan')},
+    )
+    with pytest.raises(PolicyError):
+        ar.lookup_covalent_radius('C', policy=policy)
+
+
+def test_radii_policy_rejects_negative_fallback() -> None:
+    policy = ar.RadiiPolicy(
+        kind='van_der_waals',
+        base_set='bondi1964',
+        fallback=-1.0,
+    )
+    with pytest.raises(PolicyError):
+        ar.lookup_vdw_radius('Be', policy=policy)
+
+
+def test_linear_transfer_validates_empty_predictors() -> None:
+    with pytest.raises(PolicyError):
+        ar.LinearTransfer(predictors=())
diff --git a/tests/registry/test_registry.py b/tests/registry/test_registry.py
index 48afbae..d497d9f 100644
--- a/tests/registry/test_registry.py
+++ b/tests/registry/test_registry.py
@@ -1,8 +1,12 @@
 from __future__ import annotations
 
 from importlib import resources
+from types import MappingProxyType
+
+import pytest
 
 import atomref as ar
+from atomref.errors import DatasetError
 from atomref.registry import get_builtin_set
 
 
@@ -34,7 +38,12 @@ def test_builtin_set_loading_works() -> None:
 
 def test_list_quantities_and_quantity_info() -> None:
     quantities = ar.list_quantities()
-    assert quantities == ('covalent_radius', 'van_der_waals_radius', 'atomic_radius')
+    assert quantities == (
+        'covalent_radius',
+        'van_der_waals_radius',
+        'atomic_radius',
+        'xh_bond_length',
+    )
 
     info = ar.get_quantity_info('atomic_radius')
     assert info.quantity == 'atomic_radius'
@@ -93,3 +102,42 @@ def test_public_radii_set_helper_returns_packaged_radii_set() -> None:
     assert ds.info.ref.quantity == 'van_der_waals_radius'
     assert ds.info.ref.set_id == 'alvarez2013'
     assert ds.get('O') == 1.5
+
+
+def test_dataset_info_storage_is_frozen() -> None:
+    info = ar.get_dataset_info(ar.DatasetRef('covalent_radius', 'cordero2008'))
+    assert isinstance(info.storage, MappingProxyType)
+    assert info.storage['column'] == 'cordero2008'
+    with pytest.raises(TypeError):
+        info.storage['column'] = 'broken'
+
+    fresh = ar.get_dataset_info(ar.DatasetRef('covalent_radius', 'cordero2008'))
+    assert fresh.storage is not None
+    assert fresh.storage['column'] == 'cordero2008'
+
+
+def test_dataset_alias_resolution_normalizes_dash_variants() -> None:
+    info = ar.get_dataset_info(
+        ar.DatasetRef('covalent_radius', 'Cordero-Alvarez covalent radii')
+    )
+    assert info.ref.set_id == 'cordero2008'
+
+
+def test_custom_set_rejects_normalized_key_collisions() -> None:
+    with pytest.raises(DatasetError):
+        ar.ElementScalarSet.from_mapping(
+            ref=ar.DatasetRef('covalent_radius', 'demo'),
+            values={'H': 0.31, 'D': 0.5},
+            name='Demo',
+            units='angstrom',
+        )
+
+
+def test_custom_set_rejects_non_finite_values() -> None:
+    with pytest.raises(DatasetError):
+        ar.ElementScalarSet.from_mapping(
+            ref=ar.DatasetRef('covalent_radius', 'demo'),
+            values={'C': float('nan')},
+            name='Demo',
+            units='angstrom',
+        )
diff --git a/tests/xh/test_xh.py b/tests/xh/test_xh.py
new file mode 100644
index 0000000..3cffe15
--- /dev/null
+++ b/tests/xh/test_xh.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import pytest
+
+import atomref as ar
+from atomref.errors import PolicyError
+
+
+def test_get_xh_bond_length_returns_curated_cno_values() -> None:
+    assert ar.get_xh_bond_length('C') == pytest.approx(1.089)
+    assert ar.get_xh_bond_length('N') == pytest.approx(1.015)
+    assert ar.get_xh_bond_length('O') == pytest.approx(0.993)
+
+
+def test_lookup_xh_bond_length_infers_other_elements_from_cordero() -> None:
+    lookup = ar.lookup_xh_bond_length('S')
+    assert lookup.source == 'transfer_linear'
+    assert lookup.resolved_from == (ar.DatasetRef('covalent_radius', 'cordero2008'),)
+    assert lookup.fit is not None
+    assert lookup.fit.n_points == 3
+    assert lookup.value == pytest.approx(1.3587333333333333)
+
+
+def test_lookup_xh_bond_length_rejects_h_as_parent_element() -> None:
+    lookup = ar.lookup_xh_bond_length('H')
+    assert lookup.value is None
+    assert lookup.source == 'missing'
+    assert any('not a valid parent element' in note for note in lookup.notes)
+
+
+def test_list_xh_sets_and_metadata() -> None:
+    assert ar.list_xh_sets() == ('csd_legacy_xh_cno',)
+    info = ar.get_xh_set_info('csd_legacy_xh_cno')
+    assert info.ref.quantity == 'xh_bond_length'
+    assert info.usage_role == 'target'
+    assert info.coverage is not None
+    assert info.coverage.n_values == 3
+
+
+def test_xh_policy_rejects_h_override_key() -> None:
+    policy = ar.XHPolicy(base_set='csd_legacy_xh_cno', overrides={'H': 1.0})
+    with pytest.raises(PolicyError):
+        policy.as_value_policy()
+
+
+def test_xh_policy_rejects_negative_fallback() -> None:
+    policy = ar.XHPolicy(base_set='csd_legacy_xh_cno', fallback=-1.0)
+    with pytest.raises(PolicyError):
+        policy.as_value_policy()
+
+
+def test_xh_policy_accepts_wrapper_policy_predictor() -> None:
+    policy = ar.XHPolicy(
+        base_set='csd_legacy_xh_cno',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(ar.DEFAULT_COVALENT_POLICY,),
+                min_points=3,
+                exclude_placeholders=True,
+            ),
+        ),
+    )
+    lookup = ar.lookup_xh_bond_length('Bk', policy=policy)
+    assert lookup.source == 'transfer_linear'
+    assert lookup.value == pytest.approx(1.8291333333333335)
+    assert lookup.resolved_from == (ar.DatasetRef('covalent_radius', 'csd_legacy_cov'),)
+    assert any('policy source' in note for note in lookup.notes)
diff --git a/tools/check_registry.py b/tools/check_registry.py
index 02b1e14..3af6025 100644
--- a/tools/check_registry.py
+++ b/tools/check_registry.py
@@ -28,7 +28,8 @@ def _get_builtin_set(ref):
 
 
 def _canonical_token(value: str) -> str:
-    return " ".join(value.strip().lower().split())
+    registry = import_module("atomref.registry")
+    return registry._canonicalize_alias_token(value)
 
 
 def _iter_dataset_refs() -> Iterable[object]:

From d593e5a5c479034a502c226e71ef45461a9335a1 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sun, 15 Mar 2026 19:55:46 +0300
Subject: [PATCH 13/15] Fixes formatting

---
 src/atomref/policy.py   | 32 +++++++++++++++-----------------
 src/atomref/radii.py    | 15 ---------------
 src/atomref/registry.py | 17 ++---------------
 src/atomref/transfer.py |  5 ++++-
 src/atomref/xh.py       | 22 ++++++++++++----------
 5 files changed, 33 insertions(+), 58 deletions(-)

diff --git a/src/atomref/policy.py b/src/atomref/policy.py
index 235bbe3..a8c8616 100644
--- a/src/atomref/policy.py
+++ b/src/atomref/policy.py
@@ -167,7 +167,6 @@ class _TransferSourceValue:
     notes: tuple[str, ...] = ()
 
 
-
 def _coerce_policy_float(value: object, *, what: str) -> float:
     """Return a finite float for policy configuration values."""
 
@@ -180,7 +179,6 @@ def _coerce_policy_float(value: object, *, what: str) -> float:
     return out
 
 
-
 def _normalize_element_symbol(symbol: str | None) -> str | None:
     """Normalize user input to a packaged element symbol.
 
@@ -197,14 +195,12 @@ def _normalize_element_symbol(symbol: str | None) -> str | None:
     return cand
 
 
-
 def _resolve_target_ref(policy: ValuePolicy[object]) -> DatasetRef:
     """Return the target dataset reference implied by a policy base."""
 
     return resolve_dataset_like(policy.base).ref
 
 
-
 def _coerce_nested_policy(source: object) -> ValuePolicy[str] | None:
     """Return ``source`` as a generic value policy when possible."""
 
@@ -218,7 +214,6 @@ def _coerce_nested_policy(source: object) -> ValuePolicy[str] | None:
     return None
 
 
-
 def _materialize_transfer_source(
     source: DatasetLike | SupportsValuePolicy | ValuePolicy[str],
 ) -> _ResolvedElementSource:
@@ -247,7 +242,9 @@ def _materialize_transfer_source(
     for elem in iter_elements():
         lookup = lookup_value(elem.symbol, policy=nested_policy)
         values[elem.z] = lookup.value
-        placeholders[elem.z] = lookup.is_placeholder if lookup.value is not None else False
+        placeholders[elem.z] = (
+            lookup.is_placeholder if lookup.value is not None else False
+        )
     return _ResolvedElementSource(
         ref=target,
         values_by_z=tuple(values),
@@ -256,7 +253,6 @@ def _materialize_transfer_source(
     )
 
 
-
 def _lookup_transfer_source_value(
     symbol: str,
     source: DatasetLike | SupportsValuePolicy | ValuePolicy[str],
@@ -306,7 +302,6 @@ def _lookup_transfer_source_value(
     )
 
 
-
 def _fit_linear_transfer(
     base_set: ElementScalarSet,
     predictor_source: _ResolvedElementSource,
@@ -381,7 +376,6 @@ def _fit_linear_transfer_cached(
     )
 
 
-
 def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit | None:
     """Return the fit object for a transfer model when it needs one."""
 
@@ -406,7 +400,6 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
     )
 
 
-
 def _apply_substitution_transfer(
     symbol: str,
     *,
@@ -443,7 +436,6 @@ def _apply_substitution_transfer(
     )
 
 
-
 def _apply_linear_transfer(
     symbol: str,
     *,
@@ -456,7 +448,10 @@ def _apply_linear_transfer(
     if len(transfer.predictors) != 1:
         raise PolicyError("v0.1 LinearTransfer supports exactly one predictor source")
 
-    predictor_value, note = _lookup_transfer_source_value(symbol, transfer.predictors[0])
+    predictor_value, note = _lookup_transfer_source_value(
+        symbol,
+        transfer.predictors[0],
+    )
     if predictor_value is None:
         return None, note
 
@@ -476,7 +471,8 @@ def _apply_linear_transfer(
         notes.append("linear fit used policy-materialized predictor values")
         if predictor_value.lookup_source not in (None, "base"):
             notes.append(
-                f"policy predictor resolved the value via {predictor_value.lookup_source}"
+                "policy predictor resolved the value via "
+                f"{predictor_value.lookup_source}"
             )
 
     return (
@@ -493,7 +489,6 @@ def _apply_linear_transfer(
     )
 
 
-
 def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
     """Resolve a value through override, base, transfer, and fallback steps."""
 
@@ -511,7 +506,12 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
         sym = _normalize_element_symbol(symbol)
         if sym is None:
             note = "unknown element" if symbol is not None else "missing element symbol"
-            return LookupResult(value=None, source="missing", target=target, notes=(note,))
+            return LookupResult(
+                value=None,
+                source="missing",
+                target=target,
+                notes=(note,),
+            )
 
         if sym in policy.blocked:
             return LookupResult(
@@ -589,7 +589,6 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
         assert popped == policy_id  # internal stack discipline
 
 
-
 def lookup_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
     """Public entry point for generic element-domain scalar lookup.
 
@@ -600,7 +599,6 @@ def lookup_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResul
     return _resolve_value(symbol, policy=policy)
 
 
-
 def get_value(symbol: str | None, *, policy: ValuePolicy[str]) -> float | None:
     """Return only the resolved scalar value for an element-domain policy."""
 
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index de7ff36..449de58 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -123,7 +123,6 @@ class RadiiPolicyAssessment:
     per_element: tuple[RadiiElementAssessment, ...] = ()
 
 
-
 def _coerce_non_negative_radii_value(value: object, *, what: str) -> float:
     """Validate a radii-like policy number.
 
@@ -142,7 +141,6 @@ def _coerce_non_negative_radii_value(value: object, *, what: str) -> float:
     return out
 
 
-
 def _quantity_for_kind(kind: RadiiKind) -> str:
     """Translate public radii kind names into registry quantity ids."""
 
@@ -152,7 +150,6 @@ def _quantity_for_kind(kind: RadiiKind) -> str:
         raise PolicyError(f"unknown radii kind: {kind!r}") from exc
 
 
-
 def _normalize_radii_symbol(symbol: str | None) -> str | None:
     """Normalize symbols accepted by the radii convenience layer."""
 
@@ -162,7 +159,6 @@ def _normalize_radii_symbol(symbol: str | None) -> str | None:
     return cand
 
 
-
 def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
     """Normalize, validate, deduplicate, and sort assessment element labels."""
 
@@ -179,7 +175,6 @@ def _normalize_assessment_elements(elements: Iterable[str]) -> tuple[str, ...]:
     )
 
 
-
 def list_radii_sets(
     kind: RadiiKind,
     *,
@@ -190,7 +185,6 @@ def list_radii_sets(
     return list_dataset_ids(_quantity_for_kind(kind), usage_role=usage_role)
 
 
-
 def list_radii_set_infos(
     kind: RadiiKind,
     *,
@@ -201,21 +195,18 @@ def list_radii_set_infos(
     return list_dataset_infos(_quantity_for_kind(kind), usage_role=usage_role)
 
 
-
 def get_radii_set_info(kind: RadiiKind, set_id: str) -> DatasetInfo:
     """Return metadata for one packaged radii set."""
 
     return get_dataset_info(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
-
 def get_radii_set(kind: RadiiKind, set_id: str) -> RadiiSet:
     """Load one packaged radii set as an :class:`ElementScalarSet`."""
 
     return get_builtin_set(DatasetRef(_quantity_for_kind(kind), set_id))
 
 
-
 def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
     """Raise when a policy is used with the wrong public radii helper."""
 
@@ -223,14 +214,12 @@ def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
         raise PolicyError(f"expected a {expected!r} radii policy, got {policy.kind!r}")
 
 
-
 def _lookup_radius(symbol: str | None, *, policy: RadiiPolicy) -> LookupResult:
     """Shared implementation for radii lookup helpers."""
 
     return lookup_value(symbol, policy=policy.as_value_policy())
 
 
-
 def lookup_covalent_radius(
     symbol: str | None,
     *,
@@ -243,7 +232,6 @@ def lookup_covalent_radius(
     return _lookup_radius(symbol, policy=active)
 
 
-
 def get_covalent_radius(
     symbol: str | None,
     *,
@@ -256,7 +244,6 @@ def get_covalent_radius(
     return get_value(symbol, policy=active.as_value_policy())
 
 
-
 def lookup_vdw_radius(
     symbol: str | None,
     *,
@@ -269,7 +256,6 @@ def lookup_vdw_radius(
     return _lookup_radius(symbol, policy=active)
 
 
-
 def get_vdw_radius(
     symbol: str | None,
     *,
@@ -282,7 +268,6 @@ def get_vdw_radius(
     return get_value(symbol, policy=active.as_value_policy())
 
 
-
 def assess_radii_policy(
     elements: Iterable[str],
     *,
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index b9f2730..479ff97 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -143,7 +143,8 @@ def from_mapping(
             previous = seen_keys.get(sym)
             if previous is not None and previous != key:
                 raise DatasetError(
-                    f"custom-set keys {previous!r} and {key!r} both normalize to {sym!r}"
+                    "custom-set keys "
+                    f"{previous!r} and {key!r} both normalize to {sym!r}"
                 )
             seen_keys[sym] = key
             values_by_z[elem.z] = (
@@ -266,7 +267,6 @@ def _coerce_finite_float(value: object, *, what: str) -> float:
     return out
 
 
-
 def _get_quantities_mapping() -> Mapping[str, object]:
     """Return the raw ``quantities`` mapping from ``registry.json``."""
 
@@ -276,7 +276,6 @@ def _get_quantities_mapping() -> Mapping[str, object]:
     return quantities
 
 
-
 def _get_datasets_mapping() -> Mapping[str, object]:
     """Return the raw ``datasets`` mapping from ``registry.json``."""
 
@@ -286,7 +285,6 @@ def _get_datasets_mapping() -> Mapping[str, object]:
     return datasets
 
 
-
 def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
     """Return the dataset table for one quantity or raise on unknown input."""
 
@@ -296,14 +294,12 @@ def _datasets_for_quantity(quantity: QuantityId) -> Mapping[str, object]:
     return datasets
 
 
-
 def list_quantities() -> tuple[str, ...]:
     """List packaged quantity identifiers in registry order."""
 
     return tuple(_get_quantities_mapping().keys())
 
 
-
 def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
     """Return quantity-level metadata for a packaged quantity."""
 
@@ -325,7 +321,6 @@ def get_quantity_info(quantity: QuantityId) -> QuantityInfo:
     )
 
 
-
 def _canonicalize_alias_token(value: str) -> str:
     """Normalize a dataset id or alias for case-insensitive comparison."""
 
@@ -334,7 +329,6 @@ def _canonicalize_alias_token(value: str) -> str:
     return " ".join(normalized.strip().lower().split())
 
 
-
 def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
     """Resolve a dataset id or alias to its canonical packaged set id."""
 
@@ -358,7 +352,6 @@ def _resolve_set_id(quantity: QuantityId, set_id: str) -> str:
     raise DatasetError(f"unknown dataset id for {quantity!r}: {set_id!r}")
 
 
-
 def list_dataset_ids(
     quantity: QuantityId, *, usage_role: str | None = None
 ) -> tuple[str, ...]:
@@ -382,7 +375,6 @@ def list_dataset_ids(
     return tuple(filtered)
 
 
-
 def list_dataset_infos(
     quantity: QuantityId, *, usage_role: str | None = None
 ) -> tuple[DatasetInfo, ...]:
@@ -394,7 +386,6 @@ def list_dataset_infos(
     )
 
 
-
 def _coerce_reference(obj: object) -> Reference:
     """Coerce a raw registry reference entry into :class:`Reference`."""
 
@@ -414,7 +405,6 @@ def _coerce_reference(obj: object) -> Reference:
     )
 
 
-
 def _coerce_coverage(obj: object) -> CoverageInfo | None:
     """Coerce raw coverage metadata into :class:`CoverageInfo`."""
 
@@ -434,7 +424,6 @@ def _coerce_coverage(obj: object) -> CoverageInfo | None:
     )
 
 
-
 def get_dataset_info(ref: DatasetRef) -> DatasetInfo:
     """Return curated metadata for a packaged dataset reference."""
 
@@ -604,7 +593,6 @@ def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
     return ElementScalarSet(ref=info.ref, info=info, values_by_z=table[column])
 
 
-
 def resolve_dataset_like(dataset: DatasetLike) -> ElementScalarSet:
     """Resolve either a packaged reference or a custom set to a loaded set."""
 
@@ -613,7 +601,6 @@ def resolve_dataset_like(dataset: DatasetLike) -> ElementScalarSet:
     return get_builtin_set(dataset)
 
 
-
 def _is_placeholder_value(info: DatasetInfo, value: float) -> bool:
     """Return ``True`` when ``value`` equals the dataset's placeholder value."""
 
diff --git a/src/atomref/transfer.py b/src/atomref/transfer.py
index 9e071db..909d136 100644
--- a/src/atomref/transfer.py
+++ b/src/atomref/transfer.py
@@ -38,7 +38,10 @@ class LinearFit:
 
 @dataclass(frozen=True, slots=True)
 class SubstitutionTransfer:
-    """Use another dataset or policy directly when the base dataset is missing a value."""
+    """Use another dataset or policy directly when the base dataset is missing.
+
+    The selected value is copied from the source rather than inferred.
+    """
 
     source: DatasetLike | SupportsValuePolicy | ValuePolicy[str]
 
diff --git a/src/atomref/xh.py b/src/atomref/xh.py
index c2a87be..e445f11 100644
--- a/src/atomref/xh.py
+++ b/src/atomref/xh.py
@@ -9,7 +9,15 @@
 from .elements import canonicalize_element_symbol, is_valid_element_symbol
 from .errors import PolicyError
 from .policy import LookupResult, ValuePolicy, get_value, lookup_value
-from .registry import DatasetInfo, DatasetRef, ElementScalarSet, get_builtin_set, get_dataset_info, list_dataset_ids, list_dataset_infos
+from .registry import (
+    DatasetInfo,
+    DatasetRef,
+    ElementScalarSet,
+    get_builtin_set,
+    get_dataset_info,
+    list_dataset_ids,
+    list_dataset_infos,
+)
 from .transfer import LinearTransfer, TransferModel
 
 XHSet = ElementScalarSet
@@ -37,7 +45,9 @@ def as_value_policy(self) -> ValuePolicy[str]:
         if isinstance(self.base_set, ElementScalarSet):
             if self.base_set.ref.quantity != _QUANTITY:
                 raise PolicyError(
-                    f"base_set quantity {self.base_set.ref.quantity!r} is incompatible with X-H lookup"
+                    "base_set quantity "
+                    f"{self.base_set.ref.quantity!r} is incompatible "
+                    "with X-H lookup"
                 )
             base = self.base_set
         else:
@@ -70,7 +80,6 @@ def as_value_policy(self) -> ValuePolicy[str]:
         )
 
 
-
 def _coerce_non_negative_xh_value(value: object, *, what: str) -> float:
     """Validate an X-H-like policy number."""
 
@@ -85,7 +94,6 @@ def _coerce_non_negative_xh_value(value: object, *, what: str) -> float:
     return out
 
 
-
 def _normalize_xh_symbol(symbol: str | None) -> str | None:
     """Normalize symbols accepted by the X-H convenience layer."""
 
@@ -95,35 +103,30 @@ def _normalize_xh_symbol(symbol: str | None) -> str | None:
     return cand
 
 
-
 def list_xh_sets(*, usage_role: str | None = None) -> tuple[str, ...]:
     """List packaged X-H set ids."""
 
     return list_dataset_ids(_QUANTITY, usage_role=usage_role)
 
 
-
 def list_xh_set_infos(*, usage_role: str | None = None) -> tuple[DatasetInfo, ...]:
     """Return packaged metadata objects for X-H sets."""
 
     return list_dataset_infos(_QUANTITY, usage_role=usage_role)
 
 
-
 def get_xh_set_info(set_id: str) -> DatasetInfo:
     """Return metadata for one packaged X-H set."""
 
     return get_dataset_info(DatasetRef(_QUANTITY, set_id))
 
 
-
 def get_xh_set(set_id: str) -> XHSet:
     """Load one packaged X-H set as an :class:`ElementScalarSet`."""
 
     return get_builtin_set(DatasetRef(_QUANTITY, set_id))
 
 
-
 def lookup_xh_bond_length(
     symbol: str | None,
     *,
@@ -143,7 +146,6 @@ def lookup_xh_bond_length(
     return lookup
 
 
-
 def get_xh_bond_length(
     symbol: str | None,
     *,

From 1370a025a98c248ca806c786006e2a13019aea10 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sun, 15 Mar 2026 21:24:29 +0300
Subject: [PATCH 14/15] Improves transfer policies

---
 CHANGELOG.md                                  |  82 +++++++
 README.md                                     |  15 +-
 docs/api/policy.md                            |   6 +-
 docs/api/transfer.md                          |  18 ++
 docs/api/xh.md                                |   2 +
 docs/dev/architecture.md                      |  28 ++-
 docs/guide/policies.md                        |  53 +++-
 docs/index.md                                 |  15 +-
 .../notebooks/03-custom-sets-and-discovery.md |   6 +-
 src/atomref/__about__.py                      |   2 +-
 src/atomref/policy.py                         | 229 ++++++++++++++++--
 src/atomref/radii.py                          |  12 +-
 src/atomref/transfer.py                       |  98 +++++++-
 src/atomref/xh.py                             |  11 +-
 tests/policy/test_policy.py                   | 181 +++++++++++++-
 15 files changed, 711 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fbb2887..8650d50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,87 @@
 # Changelog
 
+## 0.1.4 - 2026-03-15
+
+### Added
+
+- explicit nested-policy safeguards for `LinearTransfer`:
+  - `fit_sources`
+  - `fit_max_depth`
+  - `prediction_sources`
+  - `prediction_max_depth`
+- `LookupResult.transfer_depth` to record how many transfer steps produced the
+  returned value.
+
+### Changed
+
+- linear-transfer fitting now distinguishes direct predictor values from nested
+  policy-derived predictor values.
+- the default nested linear-transfer behavior is now conservative for fitting
+  and allows at most one additional completion step for the final predictor
+  value.
+- cycle detection now uses context-local resolution tokens and correctly catches
+  recursion through wrapper policies such as `RadiiPolicy` and `XHPolicy`.
+- docs were expanded to explain nested-policy predictors, transfer depth, and
+  cycle detection.
+
+## 0.1.4 - 2026-03-15
+
+### Added
+
+- `LookupResult.transfer_depth`, which records how many transfer steps were
+  involved in the returned numeric value.
+- Source/depth controls for nested linear-transfer workflows via
+  `LinearTransfer.fit_sources`, `LinearTransfer.fit_max_depth`,
+  `LinearTransfer.prediction_sources`, and `LinearTransfer.prediction_max_depth`.
+- Regression tests covering generic-policy cycles, wrapper-policy cycles,
+  conservative nested-fit defaults, and explicit opt-in for deeper nested
+  linear workflows.
+
+### Changed
+
+- Nested policy-backed linear transfers are now guarded in two phases:
+  conservative defaults are used for fit training, while one additional nested
+  completion step remains allowed at prediction time.
+- Cycle detection now tracks both generic policies and wrapper policies using a
+  context-local activation stack, so recursion through freshly materialized
+  wrapper policies is detected reliably and safely.
+- Radii and X–H convenience helpers now resolve through wrapper-aware cycle
+  tracking rather than materializing a fresh generic policy for each public
+  lookup call.
+
+### Documentation
+
+- Expanded the transfer and policy docs to explain nested-policy safeguards,
+  `transfer_depth`, and cycle detection.
+- Added guidance on when chained correlations are scientifically reasonable and
+  how to opt in deliberately when broader fit training is desired.
+
+## 0.1.4 - 2026-03-15
+
+### Added
+
+- `LookupResult.transfer_depth` is now used consistently across nested
+  substitution and linear-transfer workflows so callers can tell how many
+  transfer steps contributed to a returned value.
+- New tests covering nested-policy fit controls, prediction-depth limits, and
+  cycle detection for both generic and wrapper policies.
+
+### Changed
+
+- `LinearTransfer` now distinguishes between values that may participate in
+  fitting (`fit_sources`, `fit_max_depth`) and values that may be used for the
+  final element-specific predictor lookup (`prediction_sources`,
+  `prediction_max_depth`).
+- The default linear-transfer behavior is now conservative for fitting
+  (direct predictor values only) while still allowing one nested completion
+  step during final prediction.
+- Policy-resolution cycle detection now tracks wrapper-policy identities as
+  well as generic `ValuePolicy` objects and is stored in a context-local stack
+  instead of a process-global mutable list.
+- Quantity wrappers continue to use the generic policy core, but now route
+  through wrapper-aware lookup helpers so cycle checks remain effective for
+  `RadiiPolicy` and `XHPolicy`.
+
 ## 0.1.3 - 2026-03-15
 
 ### Added
diff --git a/README.md b/README.md
index 0d784fc..52a3e8c 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@ What you get in the current `0.1.x` line:
 - dataset provenance and coverage metadata,
 - deterministic lookup policies,
 - substitution and linear transfer from support datasets or policies into target datasets,
+- guarded nested policy-backed transfers with explicit transfer depth,
+  conservative fit/prediction controls, and cycle detection,
 - user-defined custom element-indexed scalar sets.
 
 ## Core terms
@@ -65,6 +67,13 @@ The default `0.1.x` behavior is intentionally simple and practical:
   elements inferred from **Cordero covalent radii** through a fitted linear
   policy.
 
+Nested policy predictors are supported too. In `0.1.4`, `LinearTransfer`
+separates **fit-time** use of nested predictor values from
+**prediction-time** use. By default, the fit may use only direct nested
+values, while the final requested element may still use one additional
+nested completion step. That is a useful compromise for workflows such as
+provisional X–H inference from a chosen covalent-radii policy.
+
 ## Quick example
 
 ```pycon
@@ -80,13 +89,15 @@ The default `0.1.x` behavior is intentionally simple and practical:
 2.8972265395148358
 >>> lookup.source
 'transfer_linear'
+>>> lookup.transfer_depth
+1
 >>> lookup.resolved_from
 (DatasetRef(quantity='atomic_radius', set_id='rahm2016'),)
 ```
 
 `get_*` returns only the number. `lookup_*` returns a `LookupResult` that also
-records where the value came from and whether a transfer model or policy source
-was involved.
+records where the value came from, whether a transfer model or policy source was
+involved, and how many transfer steps were needed (`transfer_depth`).
 
 You can inspect the packaged quantity and dataset catalog directly:
 
diff --git a/docs/api/policy.md b/docs/api/policy.md
index 5b68440..29b4142 100644
--- a/docs/api/policy.md
+++ b/docs/api/policy.md
@@ -3,7 +3,7 @@
 This module contains the generic resolver that sits below the radii-specific and
 X–H-specific convenience APIs.
 
-Use it when you want to work directly with the common value-selection engine:
+Use it when you want to work directly with the shared value-selection engine:
 
 - `ValuePolicy` — generic element-domain policy configuration,
 - `lookup_value(...)` — resolve one value together with provenance,
@@ -18,5 +18,9 @@ A few practical notes:
   wrapper policies that expose `as_value_policy()`.
 - `LookupResult.is_placeholder` refers to the returned numeric value itself, not
   to whether any transfer happened.
+- `LookupResult.transfer_depth` counts how many transfer steps were involved in
+  the returned numeric value.
+- Nested lookup is cycle-checked across both generic `ValuePolicy` objects and
+  wrapper policies such as `RadiiPolicy` and `XHPolicy`.
 
 ::: atomref.policy
diff --git a/docs/api/transfer.md b/docs/api/transfer.md
index 797626e..17e07ad 100644
--- a/docs/api/transfer.md
+++ b/docs/api/transfer.md
@@ -18,4 +18,22 @@ A transfer source may be:
 `LinearTransfer` currently accepts exactly one predictor source at runtime, even
 though the public API stores predictors as a tuple for forward compatibility.
 
+For policy-backed linear predictors, `LinearTransfer` separates two questions:
+
+- which nested predictor values may be used to **fit** the linear model
+  (`fit_sources`, `fit_max_depth`), and
+- which nested predictor values may be used to **predict** the final requested
+  element (`prediction_sources`, `prediction_max_depth`).
+
+The defaults are intentionally conservative:
+
+- fit only on nested predictor values that came directly from `base` or
+  `override`,
+- but allow one additional nested transfer step when evaluating the predictor
+  for the requested element.
+
+That default is meant for workflows such as a sparse X–H target set correlated
+against a partial covalent-radii policy that is itself completed from a broader
+support set.
+
 ::: atomref.transfer
diff --git a/docs/api/xh.md b/docs/api/xh.md
index cca073e..cbc1465 100644
--- a/docs/api/xh.md
+++ b/docs/api/xh.md
@@ -17,6 +17,8 @@ In the default policy:
 
 - `C`, `N`, and `O` use curated ConQuest/CSD defaults,
 - other parent elements may be inferred from `cordero2008`,
+- policy-backed predictors are supported as well, with conservative nested-fit
+  defaults and one additional nested prediction step allowed by default,
 - fuller X–H literature support is planned for `0.2.x`.
 
 ::: atomref.xh
diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md
index cbdf743..680b755 100644
--- a/docs/dev/architecture.md
+++ b/docs/dev/architecture.md
@@ -63,6 +63,31 @@ That last point is important. It means higher-level code can express
 "infer values from my chosen covalent-radii policy" instead of being forced to
 refer to one hard-coded predictor dataset.
 
+## Nested-policy safeguards and cycle detection
+
+Policy-backed transfer sources are materialized with more than just raw numeric
+values. The resolver also tracks, per element:
+
+- whether the value came from `base`, `override`, substitution, linear transfer,
+  or fallback,
+- the nested transfer depth that was required to produce it,
+- placeholder status.
+
+`LinearTransfer` uses that information twice:
+
+- once when fitting the linear relation (`fit_sources` / `fit_max_depth`),
+- again when deciding whether the predictor value for the requested element is
+  admissible (`prediction_sources` / `prediction_max_depth`).
+
+The default policy is intentionally conservative: fit only on direct nested
+predictor values, but allow one additional nested completion step when
+predicting the final requested element. This keeps the common two-stage use case
+possible without silently training on arbitrarily long inference chains.
+
+Cycle detection is handled with a context-local activation stack. Both generic
+`ValuePolicy` objects and wrapper policies are tracked, so recursion through a
+freshly materialized wrapper policy is still detected reliably and safely.
+
 ## Placeholder handling
 
 Placeholder semantics stay attached to the value that was actually returned.
@@ -73,7 +98,8 @@ This means `LookupResult.is_placeholder` can be true for:
 - a nested policy used as a transfer source.
 
 A linear transfer normally returns a computed value and therefore does not carry
-placeholder status itself.
+placeholder status itself. Instead, its provenance is carried by
+`resolved_from`, explanatory notes, and `transfer_depth`.
 
 ## Why the design stays small
 
diff --git a/docs/guide/policies.md b/docs/guide/policies.md
index fd53047..912563b 100644
--- a/docs/guide/policies.md
+++ b/docs/guide/policies.md
@@ -69,7 +69,7 @@ current runtime intentionally supports exactly one predictor source. That keeps
 the implementation simple now while leaving room for later multi-predictor
 linear models.
 
-Transfer sources can now be:
+Transfer sources can be:
 
 - a packaged dataset reference (`DatasetRef`),
 - a custom `ElementScalarSet`,
@@ -81,6 +81,35 @@ that policy. This lets higher-level workflows express things like “infer X–H
 lengths from my chosen covalent-radii policy” instead of hard-coding a specific
 support dataset.
 
+#### Nested policy safeguards for `LinearTransfer`
+
+When a predictor source is itself a policy, two different questions matter:
+
+1. Which nested predictor values are trustworthy enough to train the linear fit?
+2. Which nested predictor value is acceptable for the final requested element?
+
+`atomref` keeps those two decisions separate. By default:
+
+- `fit_sources=("base", "override")` and `fit_max_depth=0`,
+- `prediction_sources=("base", "override", "transfer_substitution", "transfer_linear")`
+  and `prediction_max_depth=1`.
+
+That means the fitted relationship is trained only on direct predictor values by
+default, while one additional nested completion step is still allowed at
+prediction time.
+
+This is a good default for workflows such as:
+
+- sparse target X–H data from `csd_legacy_xh_cno`,
+- a partial covalent-radii predictor policy with direct `s,p` values,
+- one inner transfer from a broader support set such as `cordero2008` to make
+  the predictor usable for `d` or `f` elements.
+
+In that setup, the outer X–H fit still uses direct predictor anchors, while the
+final requested element may use one nested predictor transfer. If you really do
+want fit training to use nested predictor values as well, you can opt in
+explicitly by widening `fit_sources` and/or increasing `fit_max_depth`.
+
 ### Fallback
 
 A fallback is a constant last-resort value. It is useful when an algorithm must
@@ -112,6 +141,24 @@ It does **not** mean “a transfer happened”. Examples:
 - a linear transfer is computed, not copied, so `is_placeholder` is normally
   `False`.
 
+## Transfer depth and cycle detection
+
+`LookupResult.transfer_depth` counts how many transfer steps were needed to
+produce the returned value:
+
+- direct base and override values have depth `0`,
+- one substitution or linear restoration has depth `1`,
+- nested transfer chains increase the depth further.
+
+This makes nested-policy behavior inspectable without trying to infer it from
+notes alone.
+
+Because policies may now depend on other policies, the resolver also performs
+cycle detection. A cyclic reference such as policy A depending on policy B while
+policy B depends back on policy A raises `PolicyError` instead of recurring
+indefinitely. The same protection applies when recursion goes through wrapper
+policies such as `RadiiPolicy` or `XHPolicy`.
+
 ## Target datasets and support datasets
 
 `atomref` separates **what a dataset is used for** from **what it scientifically
@@ -171,5 +218,5 @@ With that X–H policy:
 - missing parent elements may be inferred from the **selected covalent-radii
   policy**, not just from one hard-coded support dataset,
 - if the predictor policy itself needed a transfer to produce a covalent radius,
-  the resulting `LookupResult` still records that provenance in `resolved_from`
-  and `notes`.
+  the resulting `LookupResult` still records that provenance in `resolved_from`,
+  `notes`, and `transfer_depth`.
diff --git a/docs/index.md b/docs/index.md
index 17c5481..71babb9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -22,6 +22,8 @@ What you get in the current `0.1.x` line:
 - dataset provenance and coverage metadata,
 - deterministic lookup policies,
 - substitution and linear transfer from support datasets or policies into target datasets,
+- guarded nested policy-backed transfers with explicit transfer depth,
+  conservative fit/prediction controls, and cycle detection,
 - user-defined custom element-indexed scalar sets.
 
 ## Core terms
@@ -65,6 +67,13 @@ The default `0.1.x` behavior is intentionally simple and practical:
   elements inferred from **Cordero covalent radii** through a fitted linear
   policy.
 
+Nested policy predictors are supported too. In `0.1.4`, `LinearTransfer`
+separates **fit-time** use of nested predictor values from
+**prediction-time** use. By default, the fit may use only direct nested
+values, while the final requested element may still use one additional
+nested completion step. That is a useful compromise for workflows such as
+provisional X–H inference from a chosen covalent-radii policy.
+
 ## Quick example
 
 ```pycon
@@ -80,13 +89,15 @@ The default `0.1.x` behavior is intentionally simple and practical:
 2.8972265395148358
 >>> lookup.source
 'transfer_linear'
+>>> lookup.transfer_depth
+1
 >>> lookup.resolved_from
 (DatasetRef(quantity='atomic_radius', set_id='rahm2016'),)
 ```
 
 `get_*` returns only the number. `lookup_*` returns a `LookupResult` that also
-records where the value came from and whether a transfer model or policy source
-was involved.
+records where the value came from, whether a transfer model or policy source was
+involved, and how many transfer steps were needed (`transfer_depth`).
 
 You can inspect the packaged quantity and dataset catalog directly:
 
diff --git a/docs/notebooks/03-custom-sets-and-discovery.md b/docs/notebooks/03-custom-sets-and-discovery.md
index 51dc5e2..47138bf 100644
--- a/docs/notebooks/03-custom-sets-and-discovery.md
+++ b/docs/notebooks/03-custom-sets-and-discovery.md
@@ -33,9 +33,9 @@ for symbol in ("C", "O", "N"):
 ```
 **Output**
 ```text
-C LookupResult(value=0.77, source='base', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'),), is_placeholder=False, fit=None, notes=())
-O LookupResult(value=0.67, source='base', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'),), is_placeholder=False, fit=None, notes=())
-N LookupResult(value=0.71, source='transfer_substitution', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='cordero2008'),), is_placeholder=False, fit=None, notes=('missing in base set; substituted from transfer source',))
+C LookupResult(value=0.77, source='base', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'),), is_placeholder=False, fit=None, notes=(), transfer_depth=0)
+O LookupResult(value=0.67, source='base', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'),), is_placeholder=False, fit=None, notes=(), transfer_depth=0)
+N LookupResult(value=0.71, source='transfer_substitution', target=DatasetRef(quantity='covalent_radius', set_id='demo_user_cov'), resolved_from=(DatasetRef(quantity='covalent_radius', set_id='cordero2008'),), is_placeholder=False, fit=None, notes=('missing in base set; substituted from transfer source',), transfer_depth=1)
 ```
 ```python
 for info in ar.list_radii_set_infos("van_der_waals", usage_role="target"):
diff --git a/src/atomref/__about__.py b/src/atomref/__about__.py
index ae73625..bbab024 100644
--- a/src/atomref/__about__.py
+++ b/src/atomref/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.3"
+__version__ = "0.1.4"
diff --git a/src/atomref/policy.py b/src/atomref/policy.py
index a8c8616..a2f922f 100644
--- a/src/atomref/policy.py
+++ b/src/atomref/policy.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from collections.abc import Mapping
+import contextvars
 from dataclasses import dataclass, field
 from functools import lru_cache
 import math
@@ -42,7 +43,10 @@
     "missing",
 ]
 
-_ACTIVE_POLICY_IDS: list[int] = []
+PolicyToken = tuple[str, int]
+_ACTIVE_POLICY_TOKENS: contextvars.ContextVar[tuple[PolicyToken, ...]] = (
+    contextvars.ContextVar("atomref_active_policy_tokens", default=())
+)
 
 
 @dataclass(frozen=True, slots=True)
@@ -51,6 +55,8 @@ class LookupResult:
 
     ``value`` carries the final scalar value when one could be produced, while
     ``source`` and the remaining metadata explain how that value was obtained.
+    ``transfer_depth`` counts how many transfer steps were involved in producing
+    the returned value. Direct base and override values therefore have depth 0.
     """
 
     value: float | None
@@ -60,6 +66,7 @@ class LookupResult:
     is_placeholder: bool = False
     fit: LinearFit | None = None
     notes: tuple[str, ...] = ()
+    transfer_depth: int = 0
 
     def __float__(self) -> float:
         """Coerce the resolved value to ``float`` or raise if it is missing."""
@@ -119,7 +126,8 @@ def __post_init__(self) -> None:
         for key, value in self.overrides.items():
             if not isinstance(key, str):
                 raise PolicyError(
-                    "element-domain policy overrides must be keyed by element symbols"
+                    "element-domain policy overrides must be keyed by element "
+                    "symbols"
                 )
             sym = _normalize_element_symbol(key)
             if sym is None:
@@ -129,7 +137,8 @@ def __post_init__(self) -> None:
             previous = seen_original_keys.get(sym)
             if previous is not None and previous != key:
                 raise PolicyError(
-                    f"override keys {previous!r} and {key!r} both normalize to {sym!r}"
+                    f"override keys {previous!r} and {key!r} both normalize to "
+                    f"{sym!r}"
                 )
             seen_original_keys[sym] = key
             normalized_overrides[sym] = _coerce_policy_float(
@@ -151,6 +160,8 @@ class _ResolvedElementSource:
     ref: DatasetRef
     values_by_z: tuple[float | None, ...]
     placeholder_by_z: tuple[bool, ...]
+    lookup_source_by_z: tuple[LookupSource | None, ...]
+    transfer_depth_by_z: tuple[int | None, ...]
     via_policy: bool = False
 
 
@@ -165,6 +176,7 @@ class _TransferSourceValue:
     via_policy: bool = False
     lookup_source: LookupSource | None = None
     notes: tuple[str, ...] = ()
+    transfer_depth: int = 0
 
 
 def _coerce_policy_float(value: object, *, what: str) -> float:
@@ -201,17 +213,50 @@ def _resolve_target_ref(policy: ValuePolicy[object]) -> DatasetRef:
     return resolve_dataset_like(policy.base).ref
 
 
-def _coerce_nested_policy(source: object) -> ValuePolicy[str] | None:
-    """Return ``source`` as a generic value policy when possible."""
+def _policy_resolution_tokens(
+    policy: ValuePolicy[object],
+    *,
+    owner: object | None = None,
+) -> tuple[PolicyToken, ...]:
+    """Return all tokens that should be considered active for one resolution.
+
+    We always track the concrete :class:`ValuePolicy` object identity. When a
+    wrapper object such as :class:`atomref.radii.RadiiPolicy` or
+    :class:`atomref.xh.XHPolicy` is the logical source, we also track the
+    wrapper identity so recursion through freshly materialized generic policies
+    is still detected.
+    """
+
+    tokens: list[PolicyToken] = [("policy", id(policy))]
+    if owner is not None:
+        tokens.append((f"owner:{type(owner).__qualname__}", id(owner)))
+    return tuple(tokens)
+
+
+def _lookup_value_with_owner(
+    symbol: str | None,
+    *,
+    policy: ValuePolicy[str],
+    owner: object | None,
+) -> LookupResult:
+    """Internal lookup helper that carries wrapper identity for cycle checks."""
+
+    return _resolve_value(symbol, policy=policy, resolution_owner=owner)
+
+
+def _coerce_nested_policy(
+    source: object,
+) -> tuple[ValuePolicy[str] | None, object | None]:
+    """Return ``source`` as a generic value policy and its logical owner."""
 
     if isinstance(source, ValuePolicy):
-        return source
+        return source, None
     if isinstance(source, SupportsValuePolicy):
         nested = source.as_value_policy()
         if not isinstance(nested, ValuePolicy):
             raise PolicyError("policy-like transfer sources must return ValuePolicy")
-        return nested
-    return None
+        return nested, source
+    return None, None
 
 
 def _materialize_transfer_source(
@@ -219,7 +264,7 @@ def _materialize_transfer_source(
 ) -> _ResolvedElementSource:
     """Materialize any element-domain transfer source into dense by-Z arrays."""
 
-    nested_policy = _coerce_nested_policy(source)
+    nested_policy, nested_owner = _coerce_nested_policy(source)
     if nested_policy is None:
         dataset = resolve_dataset_like(source)
         placeholders = tuple(
@@ -228,10 +273,18 @@ def _materialize_transfer_source(
             else _is_placeholder_value(dataset.info, float(value))
             for value in dataset.values_by_z
         )
+        lookup_sources = tuple(
+            "base" if value is not None else None for value in dataset.values_by_z
+        )
+        transfer_depths = tuple(
+            0 if value is not None else None for value in dataset.values_by_z
+        )
         return _ResolvedElementSource(
             ref=dataset.ref,
             values_by_z=dataset.values_by_z,
             placeholder_by_z=placeholders,
+            lookup_source_by_z=lookup_sources,
+            transfer_depth_by_z=transfer_depths,
             via_policy=False,
         )
 
@@ -239,16 +292,25 @@ def _materialize_transfer_source(
     n_z = max(elem.z for elem in iter_elements())
     values: list[float | None] = [None] * (n_z + 1)
     placeholders: list[bool] = [False] * (n_z + 1)
+    lookup_sources: list[LookupSource | None] = [None] * (n_z + 1)
+    transfer_depths: list[int | None] = [None] * (n_z + 1)
     for elem in iter_elements():
-        lookup = lookup_value(elem.symbol, policy=nested_policy)
-        values[elem.z] = lookup.value
-        placeholders[elem.z] = (
-            lookup.is_placeholder if lookup.value is not None else False
+        lookup = _lookup_value_with_owner(
+            elem.symbol,
+            policy=nested_policy,
+            owner=nested_owner,
         )
+        values[elem.z] = lookup.value
+        if lookup.value is not None:
+            placeholders[elem.z] = lookup.is_placeholder
+            lookup_sources[elem.z] = lookup.source
+            transfer_depths[elem.z] = lookup.transfer_depth
     return _ResolvedElementSource(
         ref=target,
         values_by_z=tuple(values),
         placeholder_by_z=tuple(placeholders),
+        lookup_source_by_z=tuple(lookup_sources),
+        transfer_depth_by_z=tuple(transfer_depths),
         via_policy=True,
     )
 
@@ -259,7 +321,7 @@ def _lookup_transfer_source_value(
 ) -> tuple[_TransferSourceValue | None, str | None]:
     """Resolve one element value from a transfer source or nested policy."""
 
-    nested_policy = _coerce_nested_policy(source)
+    nested_policy, nested_owner = _coerce_nested_policy(source)
     if nested_policy is None:
         source_set = resolve_dataset_like(source)
         value = source_set.get(symbol)
@@ -275,11 +337,16 @@ def _lookup_transfer_source_value(
                 via_policy=False,
                 lookup_source="base",
                 notes=(),
+                transfer_depth=0,
             ),
             None,
         )
 
-    lookup = lookup_value(symbol, policy=nested_policy)
+    lookup = _lookup_value_with_owner(
+        symbol,
+        policy=nested_policy,
+        owner=nested_owner,
+    )
     if lookup.value is None:
         if lookup.notes:
             return (
@@ -297,22 +364,64 @@ def _lookup_transfer_source_value(
             via_policy=True,
             lookup_source=lookup.source,
             notes=lookup.notes,
+            transfer_depth=lookup.transfer_depth,
         ),
         None,
     )
 
 
+def _transfer_source_is_allowed(
+    lookup_source: LookupSource | None,
+    transfer_depth: int | None,
+    *,
+    allowed_sources: tuple[str, ...],
+    max_depth: int,
+) -> bool:
+    """Return whether a nested predictor value may participate downstream."""
+
+    if lookup_source is None or transfer_depth is None:
+        return False
+    return lookup_source in allowed_sources and transfer_depth <= max_depth
+
+
+def _explain_rejected_transfer_source(
+    *,
+    source_role: str,
+    lookup_source: LookupSource | None,
+    transfer_depth: int | None,
+    allowed_sources: tuple[str, ...],
+    max_depth: int,
+) -> str:
+    """Return a human-readable explanation for a rejected nested source."""
+
+    if lookup_source is None or transfer_depth is None:
+        return f"{source_role} policy source did not return a usable value"
+    if lookup_source not in allowed_sources:
+        allowed = ", ".join(allowed_sources)
+        return (
+            f"{source_role} policy source resolved via {lookup_source}, which is "
+            f"excluded by {source_role}_sources=({allowed})"
+        )
+    return (
+        f"{source_role} policy source transfer depth {transfer_depth} exceeds "
+        f"allowed maximum {max_depth} ({source_role}_max_depth)"
+    )
+
+
 def _fit_linear_transfer(
     base_set: ElementScalarSet,
     predictor_source: _ResolvedElementSource,
     *,
     min_points: int,
     exclude_placeholders: bool,
+    fit_sources: tuple[str, ...],
+    fit_max_depth: int,
 ) -> LinearFit:
     """Fit a one-predictor linear transfer model between two sources."""
 
     xs: list[float] = []
     ys: list[float] = []
+    filtered_by_fit_restrictions = 0
 
     n_z = min(len(base_set.values_by_z), len(predictor_source.values_by_z))
     for z in range(1, n_z):
@@ -320,6 +429,14 @@ def _fit_linear_transfer(
         x = predictor_source.values_by_z[z]
         if y is None or x is None:
             continue
+        if not _transfer_source_is_allowed(
+            predictor_source.lookup_source_by_z[z],
+            predictor_source.transfer_depth_by_z[z],
+            allowed_sources=fit_sources,
+            max_depth=fit_max_depth,
+        ):
+            filtered_by_fit_restrictions += 1
+            continue
         y_f = float(y)
         x_f = float(x)
         if exclude_placeholders and (
@@ -332,6 +449,11 @@ def _fit_linear_transfer(
 
     n = len(xs)
     if n < min_points:
+        if predictor_source.via_policy and filtered_by_fit_restrictions > 0:
+            raise PolicyError(
+                "not enough overlapping elements to fit linear transfer after "
+                "applying fit source constraints (fit-source restrictions)"
+            )
         raise PolicyError("not enough overlapping elements to fit linear transfer")
 
     x_mean = sum(xs) / n
@@ -365,6 +487,8 @@ def _fit_linear_transfer_cached(
     predictor_ref: DatasetRef,
     min_points: int,
     exclude_placeholders: bool,
+    fit_sources: tuple[str, ...],
+    fit_max_depth: int,
 ) -> LinearFit:
     """Cache fits between two packaged datasets for repeated reuse."""
 
@@ -373,6 +497,8 @@ def _fit_linear_transfer_cached(
         _materialize_transfer_source(predictor_ref),
         min_points=min_points,
         exclude_placeholders=exclude_placeholders,
+        fit_sources=fit_sources,
+        fit_max_depth=fit_max_depth,
     )
 
 
@@ -391,12 +517,16 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
             predictor,
             transfer.min_points,
             transfer.exclude_placeholders,
+            transfer.fit_sources,
+            transfer.fit_max_depth,
         )
     return _fit_linear_transfer(
         resolve_dataset_like(base),
         _materialize_transfer_source(predictor),
         min_points=transfer.min_points,
         exclude_placeholders=transfer.exclude_placeholders,
+        fit_sources=transfer.fit_sources,
+        fit_max_depth=transfer.fit_max_depth,
     )
 
 
@@ -431,6 +561,7 @@ def _apply_substitution_transfer(
             resolved_from=source_value.resolved_from,
             is_placeholder=source_value.is_placeholder,
             notes=tuple(notes),
+            transfer_depth=source_value.transfer_depth + 1,
         ),
         None,
     )
@@ -455,6 +586,23 @@ def _apply_linear_transfer(
     if predictor_value is None:
         return None, note
 
+    if not _transfer_source_is_allowed(
+        predictor_value.lookup_source,
+        predictor_value.transfer_depth,
+        allowed_sources=transfer.prediction_sources,
+        max_depth=transfer.prediction_max_depth,
+    ):
+        return (
+            None,
+            _explain_rejected_transfer_source(
+                source_role="prediction",
+                lookup_source=predictor_value.lookup_source,
+                transfer_depth=predictor_value.transfer_depth,
+                allowed_sources=transfer.prediction_sources,
+                max_depth=transfer.prediction_max_depth,
+            ),
+        )
+
     if transfer.exclude_placeholders and predictor_value.is_placeholder:
         if predictor_value.via_policy:
             return None, "predictor value from policy source is a placeholder"
@@ -468,7 +616,10 @@ def _apply_linear_transfer(
     notes = ["missing in base set; inferred via linear transfer"]
     if predictor_value.via_policy:
         notes.append("predictor value supplied by policy source")
-        notes.append("linear fit used policy-materialized predictor values")
+        notes.append(
+            "linear fit applied fit-source and transfer-depth limits to "
+            "policy-materialized predictor values"
+        )
         if predictor_value.lookup_source not in (None, "base"):
             notes.append(
                 "policy predictor resolved the value via "
@@ -484,19 +635,26 @@ def _apply_linear_transfer(
             is_placeholder=False,
             fit=fit,
             notes=tuple(notes),
+            transfer_depth=predictor_value.transfer_depth + 1,
         ),
         None,
     )
 
 
-def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
+def _resolve_value(
+    symbol: str | None,
+    *,
+    policy: ValuePolicy[str],
+    resolution_owner: object | None = None,
+) -> LookupResult:
     """Resolve a value through override, base, transfer, and fallback steps."""
 
-    policy_id = id(policy)
-    if policy_id in _ACTIVE_POLICY_IDS:
+    active_tokens = _ACTIVE_POLICY_TOKENS.get()
+    resolution_tokens = _policy_resolution_tokens(policy, owner=resolution_owner)
+    if any(token in active_tokens for token in resolution_tokens):
         raise PolicyError("cyclic policy resolution detected")
 
-    _ACTIVE_POLICY_IDS.append(policy_id)
+    stack_token = _ACTIVE_POLICY_TOKENS.set(active_tokens + resolution_tokens)
     try:
         target = _resolve_target_ref(policy)
         base_set = resolve_dataset_like(policy.base)
@@ -527,6 +685,7 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
                 source="override",
                 target=target,
                 notes=("value supplied by policy override",),
+                transfer_depth=0,
             )
 
         base_value = base_set.get(sym)
@@ -545,6 +704,7 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
                 resolved_from=(base_set.ref,),
                 is_placeholder=is_placeholder,
                 notes=notes,
+                transfer_depth=0,
             )
 
         transfer_notes: list[str] = ["missing in base set"]
@@ -576,6 +736,7 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
                 source="fallback",
                 target=target,
                 notes=tuple(transfer_notes + ["using fallback value"]),
+                transfer_depth=0,
             )
 
         return LookupResult(
@@ -585,8 +746,30 @@ def _resolve_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupRes
             notes=tuple(transfer_notes),
         )
     finally:
-        popped = _ACTIVE_POLICY_IDS.pop()
-        assert popped == policy_id  # internal stack discipline
+        _ACTIVE_POLICY_TOKENS.reset(stack_token)
+
+
+def _lookup_value_from_policy_source(
+    symbol: str | None,
+    *,
+    source: ValuePolicy[str] | SupportsValuePolicy,
+) -> LookupResult:
+    """Resolve a value from either a generic policy or a wrapper policy."""
+
+    if isinstance(source, ValuePolicy):
+        return _lookup_value_with_owner(symbol, policy=source, owner=None)
+    policy = source.as_value_policy()
+    return _lookup_value_with_owner(symbol, policy=policy, owner=source)
+
+
+def _get_value_from_policy_source(
+    symbol: str | None,
+    *,
+    source: ValuePolicy[str] | SupportsValuePolicy,
+) -> float | None:
+    """Return only the scalar selected by a generic or wrapper policy."""
+
+    return _lookup_value_from_policy_source(symbol, source=source).value
 
 
 def lookup_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResult:
@@ -596,7 +779,7 @@ def lookup_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResul
     In v0.1 the runtime supports only element-domain policies.
     """
 
-    return _resolve_value(symbol, policy=policy)
+    return _lookup_value_with_owner(symbol, policy=policy, owner=None)
 
 
 def get_value(symbol: str | None, *, policy: ValuePolicy[str]) -> float | None:
diff --git a/src/atomref/radii.py b/src/atomref/radii.py
index 449de58..b33877f 100644
--- a/src/atomref/radii.py
+++ b/src/atomref/radii.py
@@ -13,8 +13,8 @@
     LookupResult,
     ValuePolicy,
     _fit_transfer_model,
-    get_value,
-    lookup_value,
+    _get_value_from_policy_source,
+    _lookup_value_from_policy_source,
 )
 from .registry import (
     DatasetInfo,
@@ -217,7 +217,7 @@ def _validate_policy_kind(policy: RadiiPolicy, *, expected: RadiiKind) -> None:
 def _lookup_radius(symbol: str | None, *, policy: RadiiPolicy) -> LookupResult:
     """Shared implementation for radii lookup helpers."""
 
-    return lookup_value(symbol, policy=policy.as_value_policy())
+    return _lookup_value_from_policy_source(symbol, source=policy)
 
 
 def lookup_covalent_radius(
@@ -241,7 +241,7 @@ def get_covalent_radius(
 
     active = DEFAULT_COVALENT_POLICY if policy is None else policy
     _validate_policy_kind(active, expected="covalent")
-    return get_value(symbol, policy=active.as_value_policy())
+    return _get_value_from_policy_source(symbol, source=active)
 
 
 def lookup_vdw_radius(
@@ -265,7 +265,7 @@ def get_vdw_radius(
 
     active = DEFAULT_VDW_POLICY if policy is None else policy
     _validate_policy_kind(active, expected="van_der_waals")
-    return get_value(symbol, policy=active.as_value_policy())
+    return _get_value_from_policy_source(symbol, source=active)
 
 
 def assess_radii_policy(
@@ -292,7 +292,7 @@ def assess_radii_policy(
     per_element: list[RadiiElementAssessment] = []
 
     for symbol in elems:
-        lookup = lookup_value(symbol, policy=value_policy)
+        lookup = _lookup_value_from_policy_source(symbol, source=policy)
         if lookup.source == "override":
             n_override += 1
         elif lookup.source == "base":
diff --git a/src/atomref/transfer.py b/src/atomref/transfer.py
index 909d136..54eb724 100644
--- a/src/atomref/transfer.py
+++ b/src/atomref/transfer.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Protocol, runtime_checkable
+from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable
 
 from .errors import PolicyError
 from .registry import DatasetLike
@@ -12,6 +12,37 @@
     from .policy import ValuePolicy
 
 
+TransferValueSource = Literal[
+    "override",
+    "base",
+    "transfer_substitution",
+    "transfer_linear",
+    "fallback",
+]
+"""Source labels that may be admitted into nested linear-transfer workflows."""
+
+_ALLOWED_TRANSFER_VALUE_SOURCES = frozenset(
+    {
+        "override",
+        "base",
+        "transfer_substitution",
+        "transfer_linear",
+        "fallback",
+    }
+)
+
+_DEFAULT_LINEAR_FIT_SOURCES: tuple[TransferValueSource, ...] = (
+    "base",
+    "override",
+)
+_DEFAULT_LINEAR_PREDICTION_SOURCES: tuple[TransferValueSource, ...] = (
+    "base",
+    "override",
+    "transfer_substitution",
+    "transfer_linear",
+)
+
+
 @runtime_checkable
 class SupportsValuePolicy(Protocol):
     """Protocol for wrapper objects that can expose a generic value policy."""
@@ -53,11 +84,27 @@ class LinearTransfer:
     In v0.1 the public API stores predictors as a tuple for forward
     compatibility, but the runtime implementation intentionally accepts exactly
     one predictor source.
+
+    For nested policy predictors, two safeguards apply:
+
+    - ``fit_sources`` / ``fit_max_depth`` control which predictor values may be
+      used when fitting the linear model itself;
+    - ``prediction_sources`` / ``prediction_max_depth`` control which nested
+      predictor values may be used for the final requested element.
+
+    The defaults are intentionally conservative for fitting and permissive only
+    enough to allow one additional completion step at prediction time.
     """
 
     predictors: tuple[DatasetLike | SupportsValuePolicy | ValuePolicy[str], ...]
     min_points: int = 2
     exclude_placeholders: bool = True
+    fit_sources: tuple[TransferValueSource, ...] = _DEFAULT_LINEAR_FIT_SOURCES
+    prediction_sources: tuple[TransferValueSource, ...] = (
+        _DEFAULT_LINEAR_PREDICTION_SOURCES
+    )
+    fit_max_depth: int = 0
+    prediction_max_depth: int = 1
 
     def __post_init__(self) -> None:
         """Validate obvious configuration errors eagerly."""
@@ -67,6 +114,55 @@ def __post_init__(self) -> None:
         if self.min_points < 2:
             raise PolicyError("LinearTransfer min_points must be at least 2")
 
+        object.__setattr__(
+            self,
+            "fit_sources",
+            _normalize_transfer_value_sources(
+                self.fit_sources,
+                field_name="fit_sources",
+            ),
+        )
+        object.__setattr__(
+            self,
+            "prediction_sources",
+            _normalize_transfer_value_sources(
+                self.prediction_sources,
+                field_name="prediction_sources",
+            ),
+        )
+
+        if self.fit_max_depth < 0:
+            raise PolicyError("LinearTransfer fit_max_depth must be non-negative")
+        if self.prediction_max_depth < 0:
+            raise PolicyError(
+                "LinearTransfer prediction_max_depth must be non-negative"
+            )
+
 
 TransferModel = SubstitutionTransfer | LinearTransfer
 """Closed union of transfer models supported by the core resolver."""
+
+
+def _normalize_transfer_value_sources(
+    sources: tuple[str, ...],
+    *,
+    field_name: str,
+) -> tuple[TransferValueSource, ...]:
+    """Validate and deduplicate source-label controls for linear transfers."""
+
+    if not sources:
+        raise PolicyError(f"LinearTransfer {field_name} may not be empty")
+
+    normalized: list[TransferValueSource] = []
+    seen: set[str] = set()
+    for source in sources:
+        if source not in _ALLOWED_TRANSFER_VALUE_SOURCES:
+            allowed = ", ".join(sorted(_ALLOWED_TRANSFER_VALUE_SOURCES))
+            raise PolicyError(
+                f"LinearTransfer {field_name} contains unsupported source "
+                f"{source!r}; allowed values are: {allowed}"
+            )
+        if source not in seen:
+            normalized.append(source)
+            seen.add(source)
+    return tuple(normalized)
diff --git a/src/atomref/xh.py b/src/atomref/xh.py
index e445f11..5018d99 100644
--- a/src/atomref/xh.py
+++ b/src/atomref/xh.py
@@ -8,7 +8,12 @@
 
 from .elements import canonicalize_element_symbol, is_valid_element_symbol
 from .errors import PolicyError
-from .policy import LookupResult, ValuePolicy, get_value, lookup_value
+from .policy import (
+    LookupResult,
+    ValuePolicy,
+    _get_value_from_policy_source,
+    _lookup_value_from_policy_source,
+)
 from .registry import (
     DatasetInfo,
     DatasetRef,
@@ -135,7 +140,7 @@ def lookup_xh_bond_length(
     """Resolve a parent-element X-H bond length with provenance."""
 
     active = DEFAULT_XH_POLICY if policy is None else policy
-    lookup = lookup_value(symbol, policy=active.as_value_policy())
+    lookup = _lookup_value_from_policy_source(symbol, source=active)
     if lookup.value is None and _normalize_xh_symbol(symbol) == "H":
         return LookupResult(
             value=None,
@@ -154,7 +159,7 @@ def get_xh_bond_length(
     """Return only the selected X-H bond-length value, without provenance."""
 
     active = DEFAULT_XH_POLICY if policy is None else policy
-    return get_value(symbol, policy=active.as_value_policy())
+    return _get_value_from_policy_source(symbol, source=active)
 
 
 DEFAULT_XH_POLICY = XHPolicy(
diff --git a/tests/policy/test_policy.py b/tests/policy/test_policy.py
index 3b38717..618829a 100644
--- a/tests/policy/test_policy.py
+++ b/tests/policy/test_policy.py
@@ -1,11 +1,64 @@
 from __future__ import annotations
 
+from dataclasses import dataclass
+
 import pytest
 
 import atomref as ar
 from atomref.errors import PolicyError
 
 
+def _make_custom_set(
+    quantity: str,
+    set_id: str,
+    values: dict[str, float | None],
+) -> ar.ElementScalarSet:
+    return ar.ElementScalarSet.from_mapping(
+        ref=ar.DatasetRef(quantity, set_id),
+        values=values,
+        name=set_id,
+        units='angstrom',
+    )
+
+
+def _make_partial_covalent_policy(*, include_o: bool) -> ar.RadiiPolicy:
+    values = {
+        'C': 0.76,
+        'N': 0.71,
+    }
+    if include_o:
+        values['O'] = 0.66
+    custom = ar.ElementScalarSet.from_mapping(
+        ref=ar.DatasetRef('covalent_radius', 'demo_partial_cov'),
+        values=values,
+        name='Demo partial covalent set',
+        units='angstrom',
+    )
+    return ar.RadiiPolicy(
+        kind='covalent',
+        base_set=custom,
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(ar.DatasetRef('covalent_radius', 'cordero2008'),),
+                min_points=2,
+                exclude_placeholders=True,
+            ),
+        ),
+    )
+
+
+@dataclass
+class _DemoPolicyWrapper:
+    base: ar.ElementScalarSet
+    source: object | None = None
+
+    def as_value_policy(self) -> ar.ValuePolicy[str]:
+        transfers = ()
+        if self.source is not None:
+            transfers = (ar.SubstitutionTransfer(source=self.source),)
+        return ar.ValuePolicy(base=self.base, transfers=transfers)
+
+
 def test_lookup_value_is_public_generic_entry_point() -> None:
     policy = ar.ValuePolicy(
         base=ar.DatasetRef('covalent_radius', 'cordero2008'),
@@ -14,6 +67,7 @@ def test_lookup_value_is_public_generic_entry_point() -> None:
     lookup = ar.lookup_value('H', policy=policy)
     assert lookup.source == 'override'
     assert lookup.value == pytest.approx(0.5)
+    assert lookup.transfer_depth == 0
 
 
 def test_get_value_returns_only_scalar() -> None:
@@ -51,7 +105,10 @@ def test_substitution_transfer_accepts_policy_source() -> None:
     lookup = ar.lookup_value('Bk', policy=policy)
     assert lookup.source == 'transfer_substitution'
     assert lookup.value == pytest.approx(1.54)
-    assert lookup.resolved_from == (ar.DatasetRef('covalent_radius', 'csd_legacy_cov'),)
+    assert lookup.transfer_depth == 2
+    assert lookup.resolved_from == (
+        ar.DatasetRef('covalent_radius', 'csd_legacy_cov'),
+    )
     assert any('policy source' in note for note in lookup.notes)
 
 
@@ -65,5 +122,127 @@ def test_linear_transfer_accepts_policy_predictor() -> None:
     lookup = ar.lookup_vdw_radius('Pm', policy=policy)
     assert lookup.source == 'transfer_linear'
     assert lookup.value == pytest.approx(ar.lookup_vdw_radius('Pm').value)
+    assert lookup.transfer_depth == 1
     assert lookup.fit is not None
     assert any('policy source' in note for note in lookup.notes)
+
+
+def test_linear_transfer_defaults_allow_direct_fit_and_one_nested_prediction() -> None:
+    predictor_policy = _make_partial_covalent_policy(include_o=True)
+    policy = ar.XHPolicy(
+        base_set='csd_legacy_xh_cno',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(predictor_policy,),
+                min_points=3,
+                exclude_placeholders=True,
+            ),
+        ),
+    )
+    lookup = ar.lookup_xh_bond_length('S', policy=policy)
+    assert lookup.source == 'transfer_linear'
+    assert lookup.transfer_depth == 2
+    assert lookup.fit is not None
+    assert lookup.fit.n_points == 3
+    assert lookup.value == pytest.approx(ar.lookup_xh_bond_length('S').value)
+
+
+def test_linear_transfer_fit_restrictions_block_inference_on_inference_by_default(
+) -> None:
+    predictor_policy = _make_partial_covalent_policy(include_o=False)
+    policy = ar.XHPolicy(
+        base_set='csd_legacy_xh_cno',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(predictor_policy,),
+                min_points=3,
+                exclude_placeholders=True,
+            ),
+        ),
+    )
+    with pytest.raises(PolicyError, match='fit-source restrictions'):
+        ar.lookup_xh_bond_length('S', policy=policy)
+
+
+def test_linear_transfer_fit_restrictions_can_be_relaxed_explicitly() -> None:
+    predictor_policy = _make_partial_covalent_policy(include_o=False)
+    policy = ar.XHPolicy(
+        base_set='csd_legacy_xh_cno',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(predictor_policy,),
+                min_points=3,
+                exclude_placeholders=True,
+                fit_sources=('base', 'override', 'transfer_linear'),
+                fit_max_depth=1,
+            ),
+        ),
+    )
+    lookup = ar.lookup_xh_bond_length('S', policy=policy)
+    assert lookup.source == 'transfer_linear'
+    assert lookup.fit is not None
+    assert lookup.fit.n_points == 3
+
+
+def test_linear_transfer_prediction_depth_can_be_tightened() -> None:
+    predictor_policy = _make_partial_covalent_policy(include_o=True)
+    policy = ar.XHPolicy(
+        base_set='csd_legacy_xh_cno',
+        transfers=(
+            ar.LinearTransfer(
+                predictors=(predictor_policy,),
+                min_points=3,
+                exclude_placeholders=True,
+                prediction_max_depth=0,
+            ),
+        ),
+    )
+    lookup = ar.lookup_xh_bond_length('S', policy=policy)
+    assert lookup.value is None
+    assert lookup.source == 'missing'
+    assert any('prediction_max_depth' in note for note in lookup.notes)
+
+
+def test_linear_transfer_rejects_invalid_nested_source_configuration() -> None:
+    with pytest.raises(PolicyError, match='fit_max_depth'):
+        ar.LinearTransfer(
+            predictors=(ar.DatasetRef('covalent_radius', 'cordero2008'),),
+            fit_max_depth=-1,
+        )
+    with pytest.raises(PolicyError, match='allowed values'):
+        ar.LinearTransfer(
+            predictors=(ar.DatasetRef('covalent_radius', 'cordero2008'),),
+            prediction_sources=('missing',),  # type: ignore[arg-type]
+        )
+
+
+def test_lookup_value_detects_generic_policy_cycles() -> None:
+    empty_1 = _make_custom_set('covalent_radius', 'cycle_empty_1', {})
+    empty_2 = _make_custom_set('covalent_radius', 'cycle_empty_2', {})
+    policy_1 = ar.ValuePolicy(base=empty_1)
+    policy_2 = ar.ValuePolicy(
+        base=empty_2,
+        transfers=(ar.SubstitutionTransfer(source=policy_1),),
+    )
+    object.__setattr__(
+        policy_1,
+        'transfers',
+        (ar.SubstitutionTransfer(source=policy_2),),
+    )
+
+    with pytest.raises(PolicyError, match='cyclic policy resolution detected'):
+        ar.lookup_value('C', policy=policy_1)
+
+
+def test_wrapper_policy_cycles_are_detected() -> None:
+    empty = _make_custom_set('covalent_radius', 'demo_empty_cov', {})
+    wrapper_a = _DemoPolicyWrapper(base=empty)
+    wrapper_b = _DemoPolicyWrapper(base=empty, source=wrapper_a)
+    wrapper_a.source = wrapper_b
+
+    policy = ar.ValuePolicy(
+        base=empty,
+        transfers=(ar.SubstitutionTransfer(source=wrapper_a),),
+    )
+    with pytest.raises(PolicyError, match='cyclic policy resolution detected'):
+        ar.lookup_value('C', policy=policy)

From 53ae9f7754fcbf9a689e24f3ce2633f544f3f8f5 Mon Sep 17 00:00:00 2001
From: Ivan Chernyshov <ivan-chernyshoff@yandex.ru>
Date: Sun, 15 Mar 2026 22:31:21 +0300
Subject: [PATCH 15/15] Cleanups docs

---
 CHANGELOG.md                          | 56 +++------------------------
 DEV_PLAN.md                           | 21 ++++++----
 README.md                             | 17 ++++----
 docs/api/elements.md                  |  4 +-
 docs/api/radii.md                     |  2 +-
 docs/api/xh.md                        |  4 +-
 docs/datasets/atomic_radius.md        |  2 +-
 docs/datasets/covalent_radius.md      |  6 +--
 docs/datasets/index.md                |  2 +-
 docs/datasets/van_der_waals_radius.md |  6 +--
 docs/datasets/xh_bond_length.md       |  4 +-
 docs/dev/dev_plan.md                  | 21 ++++++----
 docs/guide/custom_sets.md             |  4 +-
 docs/guide/notebooks.md               |  2 +-
 docs/guide/policies.md                |  4 +-
 docs/index.md                         | 17 ++++----
 docs/notebooks/01-quickstart.md       |  2 +-
 notebooks/01-quickstart.ipynb         |  6 +--
 src/atomref/policy.py                 | 16 +++++---
 src/atomref/registry.py               |  2 +-
 src/atomref/transfer.py               |  6 +--
 21 files changed, 84 insertions(+), 120 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8650d50..18d2c3a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,35 +4,13 @@
 
 ### Added
 
-- explicit nested-policy safeguards for `LinearTransfer`:
+- `LookupResult.transfer_depth`, which records how many transfer steps were
+  involved in the returned numeric value.
+- Explicit nested-policy safeguards for `LinearTransfer` via:
   - `fit_sources`
   - `fit_max_depth`
   - `prediction_sources`
   - `prediction_max_depth`
-- `LookupResult.transfer_depth` to record how many transfer steps produced the
-  returned value.
-
-### Changed
-
-- linear-transfer fitting now distinguishes direct predictor values from nested
-  policy-derived predictor values.
-- the default nested linear-transfer behavior is now conservative for fitting
-  and allows at most one additional completion step for the final predictor
-  value.
-- cycle detection now uses context-local resolution tokens and correctly catches
-  recursion through wrapper policies such as `RadiiPolicy` and `XHPolicy`.
-- docs were expanded to explain nested-policy predictors, transfer depth, and
-  cycle detection.
-
-## 0.1.4 - 2026-03-15
-
-### Added
-
-- `LookupResult.transfer_depth`, which records how many transfer steps were
-  involved in the returned numeric value.
-- Source/depth controls for nested linear-transfer workflows via
-  `LinearTransfer.fit_sources`, `LinearTransfer.fit_max_depth`,
-  `LinearTransfer.prediction_sources`, and `LinearTransfer.prediction_max_depth`.
 - Regression tests covering generic-policy cycles, wrapper-policy cycles,
   conservative nested-fit defaults, and explicit opt-in for deeper nested
   linear workflows.
@@ -42,6 +20,8 @@
 - Nested policy-backed linear transfers are now guarded in two phases:
   conservative defaults are used for fit training, while one additional nested
   completion step remains allowed at prediction time.
+- Linear-transfer fitting now distinguishes direct predictor values from nested
+  policy-derived predictor values.
 - Cycle detection now tracks both generic policies and wrapper policies using a
   context-local activation stack, so recursion through freshly materialized
   wrapper policies is detected reliably and safely.
@@ -56,32 +36,6 @@
 - Added guidance on when chained correlations are scientifically reasonable and
   how to opt in deliberately when broader fit training is desired.
 
-## 0.1.4 - 2026-03-15
-
-### Added
-
-- `LookupResult.transfer_depth` is now used consistently across nested
-  substitution and linear-transfer workflows so callers can tell how many
-  transfer steps contributed to a returned value.
-- New tests covering nested-policy fit controls, prediction-depth limits, and
-  cycle detection for both generic and wrapper policies.
-
-### Changed
-
-- `LinearTransfer` now distinguishes between values that may participate in
-  fitting (`fit_sources`, `fit_max_depth`) and values that may be used for the
-  final element-specific predictor lookup (`prediction_sources`,
-  `prediction_max_depth`).
-- The default linear-transfer behavior is now conservative for fitting
-  (direct predictor values only) while still allowing one nested completion
-  step during final prediction.
-- Policy-resolution cycle detection now tracks wrapper-policy identities as
-  well as generic `ValuePolicy` objects and is stored in a context-local stack
-  instead of a process-global mutable list.
-- Quantity wrappers continue to use the generic policy core, but now route
-  through wrapper-aware lookup helpers so cycle checks remain effective for
-  `RadiiPolicy` and `XHPolicy`.
-
 ## 0.1.3 - 2026-03-15
 
 ### Added
diff --git a/DEV_PLAN.md b/DEV_PLAN.md
index 7252862..94cdaac 100644
--- a/DEV_PLAN.md
+++ b/DEV_PLAN.md
@@ -1,21 +1,26 @@
 # Development plan
 
-## v0.1
+## Current status (implemented in the `0.1.x` line)
 
-- element metadata
-- covalent and van der Waals radii sets
-- explicit provenance
-- radii policies
+- stable element metadata
+- curated covalent, van der Waals, and atomic-radius support datasets
+- explicit provenance and coverage metadata
+- generic value-policy core plus radii and X–H convenience wrappers
 - substitution and linear transfer
 - custom element-indexed scalar sets
+- policy-backed transfer sources
+- nested-policy safeguards, transfer-depth tracking, and cycle detection
+- provisional X–H support via `csd_legacy_xh_cno`, `XHPolicy`, and
+  `DEFAULT_XH_POLICY`
 
-## v0.2
+## Planned for `0.2.x`
 
-- X-H bond-length datasets
+- broader X–H datasets and policies
 - experimental plus computational support sets
+- pairwise helper logic such as reference sums and normalization schemes
 - restoration of incomplete experimental data from broader-support predictors
 
-## v0.3
+## Longer-term design ideas
 
 - radial atomic reference functions
 - simple proto-density support based on spherically averaged atomic data
diff --git a/README.md b/README.md
index 52a3e8c..869aace 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ It is not meant to be yet another periodic-table encyclopedia. The package is
 for code that needs stable atomic reference values with explicit provenance,
 clear fallback behavior, and honest handling of incomplete preferred datasets.
 
-What you get in the current `0.1.x` line:
+What you get in the current release line:
 
 - stable element metadata,
 - curated named radii sets,
@@ -43,8 +43,8 @@ What you get in the current `0.1.x` line:
 
 The metadata layer already records `domain` explicitly because the package is
 built for later extension, but the current runtime intentionally keeps the
-implementation narrow and stable: **v0.1 resolves only element-domain scalar
-values**.
+implementation narrow and stable: **the current runtime resolves only
+element-domain scalar values**.
 
 ## Why this exists
 
@@ -54,7 +54,7 @@ Instead of hiding ad hoc defaults inside algorithm code, you choose a target
 set, describe how missing values may be restored, and keep provenance on what
 was actually returned.
 
-The default `0.1.x` behavior is intentionally simple and practical:
+The built-in default behavior is intentionally simple and practical:
 
 - **Cordero covalent radii** (`cordero2008`) are the preferred covalent target
   set, with missing values substituted from the **legacy CSD covalent radii**
@@ -65,11 +65,10 @@ The default `0.1.x` behavior is intentionally simple and practical:
 - **CSD/ConQuest hydrogen-normalisation defaults** (`csd_legacy_xh_cno`) are a
   provisional sparse X–H target set for `C`, `N`, and `O`, with other parent
   elements inferred from **Cordero covalent radii** through a fitted linear
-  policy.
+  transfer.
 
-Nested policy predictors are supported too. In `0.1.4`, `LinearTransfer`
-separates **fit-time** use of nested predictor values from
-**prediction-time** use. By default, the fit may use only direct nested
+Nested policy predictors are supported too. `LinearTransfer` separates
+**fit-time** use of nested predictor values from **prediction-time** use. By default, the fit may use only direct nested
 values, while the final requested element may still use one additional
 nested completion step. That is a useful compromise for workflows such as
 provisional X–H inference from a chosen covalent-radii policy.
@@ -125,7 +124,7 @@ You can also load a packaged set directly:
 
 ## Notebook walkthroughs
 
-The repository ships example notebooks for the main `0.1.x` workflows. In the
+The repository ships example notebooks for the main workflows. In the
 documentation they are also available as rendered Markdown pages, so users can
 read them without opening Jupyter first.
 
diff --git a/docs/api/elements.md b/docs/api/elements.md
index c4275a0..2f066c7 100644
--- a/docs/api/elements.md
+++ b/docs/api/elements.md
@@ -1,7 +1,7 @@
 # atomref.elements
 
-Element identity is intentionally minimal in v0.1: atomic number, symbol, and
-name. The module also contains the canonicalization helpers used throughout the
+Element identity is intentionally minimal in the current implementation:
+atomic number, symbol, and name. The module also contains the canonicalization helpers used throughout the
 package.
 
 ::: atomref.elements
diff --git a/docs/api/radii.md b/docs/api/radii.md
index 05617a4..ff5e214 100644
--- a/docs/api/radii.md
+++ b/docs/api/radii.md
@@ -1,6 +1,6 @@
 # atomref.radii
 
-This is the main user-facing module in v0.1.
+This is the main user-facing module for radii workflows.
 
 It provides radii policies, packaged radii-set discovery, lookup helpers, and
 policy-assessment reports.
diff --git a/docs/api/xh.md b/docs/api/xh.md
index cbc1465..f96db27 100644
--- a/docs/api/xh.md
+++ b/docs/api/xh.md
@@ -1,7 +1,7 @@
 # atomref.xh
 
-This module provides the provisional X–H bond-length helpers introduced in the
-`0.1.x` line.
+This module provides the provisional X–H bond-length helpers available in the
+current release line.
 
 It is intentionally narrow:
 
diff --git a/docs/datasets/atomic_radius.md b/docs/datasets/atomic_radius.md
index 1704980..2852b3e 100644
--- a/docs/datasets/atomic_radius.md
+++ b/docs/datasets/atomic_radius.md
@@ -1,6 +1,6 @@
 # Atomic radius
 
-The `atomic_radius` quantity exists in v0.1 to hold support datasets that are
+The `atomic_radius` quantity exists to hold support datasets that are
 scientifically useful but should not be presented as direct condensed-phase vdW
 radii.
 
diff --git a/docs/datasets/covalent_radius.md b/docs/datasets/covalent_radius.md
index d2e2251..5e022fd 100644
--- a/docs/datasets/covalent_radius.md
+++ b/docs/datasets/covalent_radius.md
@@ -1,12 +1,12 @@
 # Covalent radius
 
-The covalent-radius quantity in v0.1 is aimed at bond-detection and related
-geometry workflows. It currently ships one preferred target dataset and one
+The covalent-radius quantity is aimed at bond-detection and related geometry
+workflows. It currently ships one preferred target dataset and one
 legacy support dataset.
 
 ## Cordero covalent radii (`cordero2008`)
 
-This is the main covalent-radius target set in `atomref` v0.1.
+This is the main covalent-radius target set in the current release line.
 
 - **What it is:** a broad covalent-radius compilation based mainly on
   crystallographic bond distances.
diff --git a/docs/datasets/index.md b/docs/datasets/index.md
index d699ff0..d3b2951 100644
--- a/docs/datasets/index.md
+++ b/docs/datasets/index.md
@@ -30,7 +30,7 @@ or `list_xh_sets(...)`.
 If you want the packaged values themselves, use `get_builtin_set(...)`,
 `get_radii_set(...)`, or `get_xh_set(...)`.
 
-## Built-in quantity families in `0.1.x`
+## Built-in quantity families
 
 - [Covalent radius](covalent_radius.md)
 - [van der Waals radius](van_der_waals_radius.md)
diff --git a/docs/datasets/van_der_waals_radius.md b/docs/datasets/van_der_waals_radius.md
index c678639..3013d57 100644
--- a/docs/datasets/van_der_waals_radius.md
+++ b/docs/datasets/van_der_waals_radius.md
@@ -1,7 +1,7 @@
 # van der Waals radius
 
-The van der Waals quantity in v0.1 intentionally includes several target sets
-with different scientific backgrounds. This lets users choose between a classic
+The van der Waals quantity intentionally includes several target sets with
+different scientific backgrounds. This lets users choose between a classic
 historical compilation, structural contact-derived sets, and compatibility-only
 legacy tables.
 
@@ -27,7 +27,7 @@ contacts.
 
 ## Alvarez van der Waals radii (`alvarez2013`)
 
-This is the main van der Waals target set in `atomref` v0.1.
+This is the main van der Waals target set in the current release line.
 
 - **What it is:** a broad structural vdW set derived from statistical analysis
   of many interatomic distances in the Cambridge Structural Database.
diff --git a/docs/datasets/xh_bond_length.md b/docs/datasets/xh_bond_length.md
index 2bef656..28364c5 100644
--- a/docs/datasets/xh_bond_length.md
+++ b/docs/datasets/xh_bond_length.md
@@ -1,7 +1,7 @@
 # X–H bond length
 
-The `xh_bond_length` quantity is a small provisional addition in the `0.1.x`
-line.
+The `xh_bond_length` quantity is a small provisional addition in the current
+release line.
 
 Its purpose is not to claim a complete literature survey of X–H bond lengths.
 Instead, it provides a stable, provenance-aware starting point for
diff --git a/docs/dev/dev_plan.md b/docs/dev/dev_plan.md
index 7252862..94cdaac 100644
--- a/docs/dev/dev_plan.md
+++ b/docs/dev/dev_plan.md
@@ -1,21 +1,26 @@
 # Development plan
 
-## v0.1
+## Current status (implemented in the `0.1.x` line)
 
-- element metadata
-- covalent and van der Waals radii sets
-- explicit provenance
-- radii policies
+- stable element metadata
+- curated covalent, van der Waals, and atomic-radius support datasets
+- explicit provenance and coverage metadata
+- generic value-policy core plus radii and X–H convenience wrappers
 - substitution and linear transfer
 - custom element-indexed scalar sets
+- policy-backed transfer sources
+- nested-policy safeguards, transfer-depth tracking, and cycle detection
+- provisional X–H support via `csd_legacy_xh_cno`, `XHPolicy`, and
+  `DEFAULT_XH_POLICY`
 
-## v0.2
+## Planned for `0.2.x`
 
-- X-H bond-length datasets
+- broader X–H datasets and policies
 - experimental plus computational support sets
+- pairwise helper logic such as reference sums and normalization schemes
 - restoration of incomplete experimental data from broader-support predictors
 
-## v0.3
+## Longer-term design ideas
 
 - radial atomic reference functions
 - simple proto-density support based on spherically averaged atomic data
diff --git a/docs/guide/custom_sets.md b/docs/guide/custom_sets.md
index ed4d664..71306bb 100644
--- a/docs/guide/custom_sets.md
+++ b/docs/guide/custom_sets.md
@@ -26,6 +26,6 @@ This is useful when you want to:
 - combine a user dataset with built-in support data through substitution or
   linear transfer.
 
-In v0.1 custom sets are element-domain scalar datasets, which keeps the data
-model small and stable. Later versions may add more specialized domains, but
+In the current implementation custom sets are element-domain scalar datasets,
+which keeps the data model small and stable. Later versions may add more specialized domains, but
 custom element-wise sets are already enough for many geometry workflows.
diff --git a/docs/guide/notebooks.md b/docs/guide/notebooks.md
index cdd1721..2ad0045 100644
--- a/docs/guide/notebooks.md
+++ b/docs/guide/notebooks.md
@@ -1,6 +1,6 @@
 # Notebook gallery
 
-`atomref` ships example Jupyter notebooks that cover the main v0.1 workflows.
+`atomref` ships example Jupyter notebooks that cover the main workflows.
 Each notebook is available in two forms:
 
 - the original `.ipynb` file in the repository,
diff --git a/docs/guide/policies.md b/docs/guide/policies.md
index 912563b..b9e3b7a 100644
--- a/docs/guide/policies.md
+++ b/docs/guide/policies.md
@@ -22,7 +22,7 @@ selection logic that sits on top of them.
 
 ## Resolution order
 
-In `0.1.x` every lookup follows the same ordered path:
+In the current implementation every lookup follows the same ordered path:
 
 1. **Blocked key** (optional)
 2. **Override**
@@ -58,7 +58,7 @@ default vdW policy starts from the **Alvarez van der Waals radii**
 A transfer model is used only when the base dataset has no value for the
 requested element.
 
-Built-in transfer models in `0.1.x` are:
+Built-in transfer models are:
 
 - `SubstitutionTransfer` — take a value directly from another dataset or policy,
 - `LinearTransfer` — infer a target-equivalent value from another dataset or
diff --git a/docs/index.md b/docs/index.md
index 71babb9..198fa6a 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,7 +14,7 @@ It is not meant to be yet another periodic-table encyclopedia. The package is
 for code that needs stable atomic reference values with explicit provenance,
 clear fallback behavior, and honest handling of incomplete preferred datasets.
 
-What you get in the current `0.1.x` line:
+What you get in the current release line:
 
 - stable element metadata,
 - curated named radii sets,
@@ -43,8 +43,8 @@ What you get in the current `0.1.x` line:
 
 The metadata layer already records `domain` explicitly because the package is
 built for later extension, but the current runtime intentionally keeps the
-implementation narrow and stable: **v0.1 resolves only element-domain scalar
-values**.
+implementation narrow and stable: **the current runtime resolves only
+element-domain scalar values**.
 
 ## Why this exists
 
@@ -54,7 +54,7 @@ Instead of hiding ad hoc defaults inside algorithm code, you choose a target
 set, describe how missing values may be restored, and keep provenance on what
 was actually returned.
 
-The default `0.1.x` behavior is intentionally simple and practical:
+The built-in default behavior is intentionally simple and practical:
 
 - **Cordero covalent radii** (`cordero2008`) are the preferred covalent target
   set, with missing values substituted from the **legacy CSD covalent radii**
@@ -65,11 +65,10 @@ The default `0.1.x` behavior is intentionally simple and practical:
 - **CSD/ConQuest hydrogen-normalisation defaults** (`csd_legacy_xh_cno`) are a
   provisional sparse X–H target set for `C`, `N`, and `O`, with other parent
   elements inferred from **Cordero covalent radii** through a fitted linear
-  policy.
+  transfer.
 
-Nested policy predictors are supported too. In `0.1.4`, `LinearTransfer`
-separates **fit-time** use of nested predictor values from
-**prediction-time** use. By default, the fit may use only direct nested
+Nested policy predictors are supported too. `LinearTransfer` separates
+**fit-time** use of nested predictor values from **prediction-time** use. By default, the fit may use only direct nested
 values, while the final requested element may still use one additional
 nested completion step. That is a useful compromise for workflows such as
 provisional X–H inference from a chosen covalent-radii policy.
@@ -125,7 +124,7 @@ You can also load a packaged set directly:
 
 ## Notebook walkthroughs
 
-The repository ships example notebooks for the main `0.1.x` workflows. In the
+The repository ships example notebooks for the main workflows. In the
 documentation they are also available as rendered Markdown pages, so users can
 read them without opening Jupyter first.
 
diff --git a/docs/notebooks/01-quickstart.md b/docs/notebooks/01-quickstart.md
index 475e218..12e8813 100644
--- a/docs/notebooks/01-quickstart.md
+++ b/docs/notebooks/01-quickstart.md
@@ -3,7 +3,7 @@
 [Open the original notebook on GitHub](https://github.com/DeloneCommons/atomref/blob/main/notebooks/01-quickstart.ipynb)
 # atomref quickstart
 
-This notebook covers the main public API in v0.1: element helpers, direct
+This notebook covers the main public API: element helpers, direct
 `get_*` calls, provenance-carrying `lookup_*` calls, and packaged dataset
 discovery.
 ```python
diff --git a/notebooks/01-quickstart.ipynb b/notebooks/01-quickstart.ipynb
index 6d6d16f..47b58d1 100644
--- a/notebooks/01-quickstart.ipynb
+++ b/notebooks/01-quickstart.ipynb
@@ -4,11 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# atomref quickstart\n",
-    "\n",
-    "This notebook covers the main public API in v0.1: element helpers, direct\n",
-    "`get_*` calls, provenance-carrying `lookup_*` calls, and packaged dataset\n",
-    "discovery.\n"
+    "# atomref quickstart\n\nThis notebook covers the main public API: element helpers, direct\n`get_*` calls, provenance-carrying `lookup_*` calls, and packaged dataset\ndiscovery.\n"
    ]
   },
   {
diff --git a/src/atomref/policy.py b/src/atomref/policy.py
index a2f922f..79cc9f3 100644
--- a/src/atomref/policy.py
+++ b/src/atomref/policy.py
@@ -80,7 +80,7 @@ def __float__(self) -> float:
 class ValuePolicy(Generic[K]):
     """Ordered rule set for resolving element-domain scalar values.
 
-    The v0.1 runtime resolves only element-domain policies even though the
+    The current runtime resolves only element-domain policies even though the
     metadata layer already records a more general ``domain`` concept. During
     construction, element-domain override keys are normalized to canonical
     element symbols and validated as finite floats.
@@ -508,7 +508,9 @@ def _fit_transfer_model(base: DatasetLike, transfer: TransferModel) -> LinearFit
     if not isinstance(transfer, LinearTransfer):
         return None
     if len(transfer.predictors) != 1:
-        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor source")
+        raise PolicyError(
+            "LinearTransfer currently supports exactly one predictor source"
+        )
 
     predictor = transfer.predictors[0]
     if isinstance(base, DatasetRef) and isinstance(predictor, DatasetRef):
@@ -577,7 +579,9 @@ def _apply_linear_transfer(
     """Try to resolve ``symbol`` through linear transfer from predictor data."""
 
     if len(transfer.predictors) != 1:
-        raise PolicyError("v0.1 LinearTransfer supports exactly one predictor source")
+        raise PolicyError(
+            "LinearTransfer currently supports exactly one predictor source"
+        )
 
     predictor_value, note = _lookup_transfer_source_value(
         symbol,
@@ -659,7 +663,9 @@ def _resolve_value(
         target = _resolve_target_ref(policy)
         base_set = resolve_dataset_like(policy.base)
         if base_set.info.domain != "element":
-            raise PolicyError("v0.1 resolver supports only element-domain datasets")
+            raise PolicyError(
+                "the resolver currently supports only element-domain datasets"
+            )
 
         sym = _normalize_element_symbol(symbol)
         if sym is None:
@@ -776,7 +782,7 @@ def lookup_value(symbol: str | None, *, policy: ValuePolicy[str]) -> LookupResul
     """Public entry point for generic element-domain scalar lookup.
 
     This is the same resolver used internally by the radii convenience layer.
-    In v0.1 the runtime supports only element-domain policies.
+    In the current implementation the runtime supports only element-domain policies.
     """
 
     return _lookup_value_with_owner(symbol, policy=policy, owner=None)
diff --git a/src/atomref/registry.py b/src/atomref/registry.py
index 479ff97..b17b941 100644
--- a/src/atomref/registry.py
+++ b/src/atomref/registry.py
@@ -576,7 +576,7 @@ def get_builtin_set(ref: DatasetRef) -> ElementScalarSet:
     info = get_dataset_info(ref)
     if info.domain != "element":
         raise DatasetError(
-            f"only element-domain datasets are supported in v0.1: {info.ref!r}"
+            f"only element-domain datasets are currently supported: {info.ref!r}"
         )
     if not isinstance(info.storage, Mapping):
         raise DatasetError(f"missing storage metadata for dataset: {info.ref!r}")
diff --git a/src/atomref/transfer.py b/src/atomref/transfer.py
index 54eb724..9adb0ce 100644
--- a/src/atomref/transfer.py
+++ b/src/atomref/transfer.py
@@ -81,9 +81,9 @@ class SubstitutionTransfer:
 class LinearTransfer:
     """Infer missing target values from one or more predictor datasets or policies.
 
-    In v0.1 the public API stores predictors as a tuple for forward
-    compatibility, but the runtime implementation intentionally accepts exactly
-    one predictor source.
+    In the current implementation the public API stores predictors as a tuple
+    for forward compatibility, but the runtime intentionally accepts exactly one
+    predictor source.
 
     For nested policy predictors, two safeguards apply: