From 4cbaf02610e454a3d8616a8d27b3c2287966f5c3 Mon Sep 17 00:00:00 2001 From: Max Date: Tue, 6 Jun 2023 00:59:36 +0200 Subject: [PATCH] packages/dvc: fix build --- packages/patched-derivations.nix | 31 +- ...name-redefinition-in-pathspec-0-10-x.patch | 36 - patches/base/dvc/md5-to-sha256.patch | 1893 ----------------- patches/base/dvc/no-analytics.patch | 166 +- patches/base/dvc/yaml-to-json.patch | 127 -- 5 files changed, 99 insertions(+), 2154 deletions(-) delete mode 100644 patches/base/dvc/GHPR_8663-ignore-solve-re-error-on-group-name-redefinition-in-pathspec-0-10-x.patch delete mode 100644 patches/base/dvc/md5-to-sha256.patch delete mode 100644 patches/base/dvc/yaml-to-json.patch diff --git a/packages/patched-derivations.nix b/packages/patched-derivations.nix index 03efc65..85a68e9 100644 --- a/packages/patched-derivations.nix +++ b/packages/patched-derivations.nix @@ -1,34 +1,55 @@ let tools = import ./lib/tools.nix; pins = import ./sources; + + dvcMd5ToSha256 = old: { + postPatch = (old.postPatch or "") + '' + grep -Rwl md5 | xargs sed -i s/md5/sha256/g + ''; + }; + + dvcYamlToJson = old: { + postPatch = (old.postPatch or "") + '' + grep -Rwl yaml | xargs sed -i s/yaml/json/g + grep -Rwl ruamel.json | xargs sed -i s/ruamel.json/ruamel.yaml/g + ''; + }; in with tools; super: rec { dvc = patch (super.dvc.overrideAttrs (old: let filteredBaseDeps = super.lib.subtractLists [ super.python3Packages.dvc-data + super.python3Packages.dvc-http ] old.propagatedBuildInputs; baseDeps = filteredBaseDeps ++ [ dvc-data + dvc-http ]; - in { + patched = dvcMd5ToSha256 old; + patched' = dvcYamlToJson patched; + in patched' // { propagatedBuildInputs = with super.python3Packages; baseDeps ++ [ aiobotocore boto3 (s3fs.overrideAttrs (_: { postPatch = '' substituteInPlace requirements.txt \ - --replace "fsspec==2022.02.0" "fsspec" \ + --replace "fsspec==2023.3.0" "fsspec" \ --replace "aiobotocore~=2.1.0" "aiobotocore" ''; })) ]; })) "patches/base/dvc"; - dvc-data = patch (super.python3Packages.dvc-data.override { + dvc-data = (super.python3Packages.dvc-data.override { inherit dvc-objects; - }) "patches/base/dvc-data"; + }).overrideAttrs dvcMd5ToSha256; - dvc-objects = patch super.python3Packages.dvc-objects "patches/base/dvc-objects"; + dvc-http = super.python3Packages.dvc-http.override { + inherit dvc-objects; + }; + + dvc-objects = super.python3Packages.dvc-objects.overrideAttrs dvcMd5ToSha256; sssd = (super.sssd.override { withSudo = true; }).overrideAttrs (old: { postFixup = (old.postFixup or "") + '' diff --git a/patches/base/dvc/GHPR_8663-ignore-solve-re-error-on-group-name-redefinition-in-pathspec-0-10-x.patch b/patches/base/dvc/GHPR_8663-ignore-solve-re-error-on-group-name-redefinition-in-pathspec-0-10-x.patch deleted file mode 100644 index bc62e7f..0000000 --- a/patches/base/dvc/GHPR_8663-ignore-solve-re-error-on-group-name-redefinition-in-pathspec-0-10-x.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 18eab2ef9a6e5fa1d9d7e93ea96710ad3949ccec Mon Sep 17 00:00:00 2001 -From: hiroto7 <32275337+hiroto7@users.noreply.github.com> -Date: Tue, 6 Dec 2022 10:43:20 +0000 -Subject: [PATCH] ignore: solve re.error on group name redefinition in pathspec - 0.10.x - -Remove regex concatenation that causes re.error -Fixes #8217 ---- - dvc/ignore.py | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/dvc/ignore.py b/dvc/ignore.py -index 2177768c29..2696e2678b 100644 ---- a/dvc/ignore.py -+++ b/dvc/ignore.py -@@ -40,7 +40,7 @@ def __init__(self, pattern_list, dirname, sep): - ] - - self.ignore_spec = [ -- (ignore, re.compile("|".join(item[0] for item in group))) -+ (ignore, [re.compile(item[0]) for item in group]) - for ignore, group in groupby( - self.regex_pattern_list, lambda x: x[1] - ) -@@ -107,8 +107,8 @@ def matches(pattern, path, is_dir) -> bool: - - result = False - -- for ignore, pattern in self.ignore_spec[::-1]: -- if matches(pattern, path, is_dir): -+ for ignore, patterns in self.ignore_spec[::-1]: -+ if any(matches(pattern, path, is_dir) for pattern in patterns): - result = ignore - break - return result diff --git a/patches/base/dvc/md5-to-sha256.patch b/patches/base/dvc/md5-to-sha256.patch deleted file mode 100644 index 503318d..0000000 --- a/patches/base/dvc/md5-to-sha256.patch +++ /dev/null @@ -1,1893 +0,0 @@ -commit 31347d4f51e60c708cb7baf8cb3360c7cdbda2e2 -Author: Max -Date: Sat Dec 17 13:39:54 2022 +0100 - - md5 to sha256 for 2.17.0 - -diff --git a/dvc/api/data.py b/dvc/api/data.py -index a063612f..71fd715b 100644 ---- a/dvc/api/data.py -+++ b/dvc/api/data.py -@@ -28,9 +28,9 @@ def get_url(path, repo=None, rev=None, remote=None): - raise OutputNotFoundError(path, repo) - - dvc_repo = info["repo"] -- md5 = dvc_info["md5"] -+ sha256 = dvc_info["sha256"] - -- return dvc_repo.cloud.get_url_for(remote, checksum=md5) -+ return dvc_repo.cloud.get_url_for(remote, checksum=sha256) - - - class _OpenContextManager(GCM): -diff --git a/dvc/fs/data.py b/dvc/fs/data.py -index c3612aed..dec0d386 100644 ---- a/dvc/fs/data.py -+++ b/dvc/fs/data.py -@@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) - class DataFileSystem(FileSystem): - protocol = "local" - -- PARAM_CHECKSUM = "md5" -+ PARAM_CHECKSUM = "sha256" - - def _prepare_credentials(self, **config): - return config -diff --git a/dvc/fs/dvc.py b/dvc/fs/dvc.py -index 87a9ad50..fc997e9f 100644 ---- a/dvc/fs/dvc.py -+++ b/dvc/fs/dvc.py -@@ -56,8 +56,8 @@ def _merge_info(repo, fs_info, dvc_info): - ret["dvc_info"] = dvc_info - ret["type"] = dvc_info["type"] - ret["size"] = dvc_info["size"] -- if not fs_info and "md5" in dvc_info: -- ret["md5"] = dvc_info["md5"] -+ if not fs_info and "sha256" in dvc_info: -+ ret["sha256"] = dvc_info["sha256"] - - if fs_info: - ret["type"] = fs_info["type"] -@@ -443,7 +443,7 @@ class _DvcFileSystem(AbstractFileSystem): # pylint:disable=abstract-method - - class DvcFileSystem(FileSystem): - protocol = "local" -- PARAM_CHECKSUM = "md5" -+ PARAM_CHECKSUM = "sha256" - - def _prepare_credentials(self, **config): - return config -diff --git a/dvc/lock.py b/dvc/lock.py -index 3360001c..706a1f10 100644 ---- a/dvc/lock.py -+++ b/dvc/lock.py -@@ -181,7 +181,7 @@ class HardlinkLock(flufl.lock.Lock, LockBase): - - if self._tmp_dir is not None: - # Under Windows file path length is limited so we hash it -- filename = hashlib.md5(self._claimfile.encode()).hexdigest() -+ filename = hashlib.sha256(self._claimfile.encode()).hexdigest() - self._claimfile = os.path.join(self._tmp_dir, filename + ".lock") - - -diff --git a/dvc/output.py b/dvc/output.py -index fb7c0be5..f763468b 100644 ---- a/dvc/output.py -+++ b/dvc/output.py -@@ -59,7 +59,7 @@ CASE_SENSITIVE_CHECKSUM_SCHEMA = Any( - - # NOTE: currently there are only 3 possible checksum names: - # --# 1) md5 (LOCAL, SSH); -+# 1) sha256 (LOCAL, SSH); - # 2) etag (S3, GS, OSS, AZURE, HTTP); - # 3) checksum (HDFS); - # -@@ -844,7 +844,7 @@ class Output: - odb, - from_info, - from_fs, -- "md5", -+ "sha256", - upload=upload, - no_progress_bar=no_progress_bar, - ) -diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py -index eb63e41b..b084cd74 100644 ---- a/dvc/repo/__init__.py -+++ b/dvc/repo/__init__.py -@@ -117,7 +117,7 @@ class Repo: - - def _get_database_dir(self, db_name): - # NOTE: by default, store SQLite-based remote indexes and state's -- # `links` and `md5s` caches in the repository itself to avoid any -+ # `links` and `sha256s` caches in the repository itself to avoid any - # possible state corruption in 'shared cache dir' scenario, but allow - # user to override this through config when, say, the repository is - # located on a mounted volume — see -@@ -513,7 +513,7 @@ class Repo: - try: - if remote: - remote_odb = self.cloud.get_remote_odb(name=remote) -- oid = fs.info(fs_path)["dvc_info"]["md5"] -+ oid = fs.info(fs_path)["dvc_info"]["sha256"] - fs = remote_odb.fs - fs_path = remote_odb.oid_to_path(oid) - -diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py -index 648a837e..f4264496 100644 ---- a/dvc/repo/diff.py -+++ b/dvc/repo/diff.py -@@ -143,7 +143,7 @@ def _output_paths(repo, targets): - repo.odb.repo, - output.fs_path, - repo.odb.repo.fs, -- "md5", -+ "sha256", - dry_run=True, - ignore=output.dvcignore, - ) -diff --git a/dvc/repo/imp_url.py b/dvc/repo/imp_url.py -index 35a684f6..ed6328f9 100644 ---- a/dvc/repo/imp_url.py -+++ b/dvc/repo/imp_url.py -@@ -78,7 +78,7 @@ def imp_url( - remote_odb = self.cloud.get_remote_odb(remote, "import-url") - stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs) - stage.save_deps() -- stage.md5 = stage.compute_md5() -+ stage.sha256 = stage.compute_sha256() - else: - stage.run(jobs=jobs) - -diff --git a/dvc/repo/index.py b/dvc/repo/index.py -index 9e3fa1a0..a6919abd 100644 ---- a/dvc/repo/index.py -+++ b/dvc/repo/index.py -@@ -13,7 +13,7 @@ from typing import ( - - from funcy import cached_property, nullcontext - --from dvc.utils import dict_md5 -+from dvc.utils import dict_sha256 - - if TYPE_CHECKING: - from networkx import DiGraph -@@ -308,7 +308,7 @@ class Index: - - Currently, it is unique to the platform (windows vs posix). - """ -- return dict_md5(self.dumpd()) -+ return dict_sha256(self.dumpd()) - - - if __name__ == "__main__": -diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py -index 5f4f02f7..f763d4eb 100644 ---- a/dvc/stage/__init__.py -+++ b/dvc/stage/__init__.py -@@ -28,7 +28,7 @@ from .utils import ( - check_missing_outputs, - check_no_externals, - check_stage_path, -- compute_md5, -+ compute_sha256, - fill_stage_dependencies, - fill_stage_outputs, - get_dump, -@@ -133,7 +133,7 @@ class Stage(params.StageParams): - wdir=os.curdir, - deps=None, - outs=None, -- md5=None, -+ sha256=None, - locked=False, # backward compatibility - frozen=False, - always_changed=False, -@@ -153,7 +153,7 @@ class Stage(params.StageParams): - self.wdir = wdir - self.outs = outs - self.deps = deps -- self.md5 = md5 -+ self.sha256 = sha256 - self.frozen = locked or frozen - self.always_changed = always_changed - self._stage_text = stage_text -@@ -347,7 +347,7 @@ class Stage(params.StageParams): - return False - - def changed_stage(self): -- changed = self.md5 != self.compute_md5() -+ changed = self.sha256 != self.compute_sha256() - if changed: - logger.debug(self._changed_stage_entry()) - return changed -@@ -355,7 +355,7 @@ class Stage(params.StageParams): - @rwlocked(read=["deps", "outs"]) - def changed(self): - is_changed = ( -- # Short-circuit order: stage md5 is fast, -+ # Short-circuit order: stage sha256 is fast, - # deps are expected to change - self.changed_stage() - or self.changed_deps() -@@ -446,19 +446,19 @@ class Stage(params.StageParams): - def dumpd(self): - return get_dump(self) - -- def compute_md5(self): -- # `dvc add`ed files don't need stage md5 -+ def compute_sha256(self): -+ # `dvc add`ed files don't need stage sha256 - if self.is_data_source and not (self.is_import or self.is_repo_import): - m = None - else: -- m = compute_md5(self) -- logger.debug("Computed %s md5: '%s'", self, m) -+ m = compute_sha256(self) -+ logger.debug("Computed %s sha256: '%s'", self, m) - return m - - def save(self, allow_missing=False): - self.save_deps(allow_missing=allow_missing) - self.save_outs(allow_missing=allow_missing) -- self.md5 = self.compute_md5() -+ self.sha256 = self.compute_sha256() - - self.repo.stage_cache.save(self) - -@@ -491,7 +491,7 @@ class Stage(params.StageParams): - return [str(entry) for entry in entries if entry.workspace_status()] - - def _changed_stage_entry(self): -- return f"'md5' of {self} changed." -+ return f"'sha256' of {self} changed." - - def changed_entries(self): - changed_deps = self._changed_entries(self.deps) -diff --git a/dvc/stage/params.py b/dvc/stage/params.py -index c43a75b1..961a8168 100644 ---- a/dvc/stage/params.py -+++ b/dvc/stage/params.py -@@ -1,5 +1,5 @@ - class StageParams: -- PARAM_MD5 = "md5" -+ PARAM_MD5 = "sha256" - PARAM_CMD = "cmd" - PARAM_WDIR = "wdir" - PARAM_DEPS = "deps" -diff --git a/dvc/stage/utils.py b/dvc/stage/utils.py -index abd63020..f140d808 100644 ---- a/dvc/stage/utils.py -+++ b/dvc/stage/utils.py -@@ -172,26 +172,26 @@ def check_missing_outputs(stage): - raise MissingDataSource(paths) - - --def compute_md5(stage): -+def compute_sha256(stage): - from dvc.output import Output - -- from ..utils import dict_md5 -+ from ..utils import dict_sha256 - - d = stage.dumpd() - -- # Remove md5 and meta, these should not affect stage md5 -+ # Remove sha256 and meta, these should not affect stage sha256 - d.pop(stage.PARAM_MD5, None) - d.pop(stage.PARAM_META, None) - d.pop(stage.PARAM_DESC, None) - - # Ignore the wdir default value. In this case DVC file w/o -- # wdir has the same md5 as a file with the default value specified. -+ # wdir has the same sha256 as a file with the default value specified. - # It's important for backward compatibility with pipelines that - # didn't have WDIR in their DVC files. - if d.get(stage.PARAM_WDIR) == ".": - del d[stage.PARAM_WDIR] - -- return dict_md5( -+ return dict_sha256( - d, - exclude=[ - stage.PARAM_LOCKED, # backward compatibility -@@ -226,7 +226,7 @@ def get_dump(stage): - key: value - for key, value in { - stage.PARAM_DESC: stage.desc, -- stage.PARAM_MD5: stage.md5, -+ stage.PARAM_MD5: stage.sha256, - stage.PARAM_CMD: stage.cmd, - stage.PARAM_WDIR: resolve_wdir(stage.wdir, stage.path), - stage.PARAM_FROZEN: stage.frozen, -diff --git a/dvc/testing/test_workspace.py b/dvc/testing/test_workspace.py -index 0b883a7b..088e5795 100644 ---- a/dvc/testing/test_workspace.py -+++ b/dvc/testing/test_workspace.py -@@ -12,14 +12,14 @@ class TestImport: - assert dvc.status() == {} - - @pytest.fixture -- def stage_md5(self): -+ def stage_sha256(self): - pytest.skip() - - @pytest.fixture -- def dir_md5(self): -+ def dir_sha256(self): - pytest.skip() - -- def test_import_dir(self, tmp_dir, dvc, workspace, stage_md5, dir_md5): -+ def test_import_dir(self, tmp_dir, dvc, workspace, stage_sha256, dir_sha256): - from dvc.odbmgr import ODBManager - - workspace.gen( -@@ -43,17 +43,17 @@ class TestImport: - - assert dvc.status() == {} - -- if stage_md5 is not None and dir_md5 is not None: -+ if stage_sha256 is not None and dir_sha256 is not None: - assert (tmp_dir / "dir.dvc").read_text() == ( -- f"md5: {stage_md5}\n" -+ f"sha256: {stage_sha256}\n" - "frozen: true\n" - "deps:\n" -- f"- md5: {dir_md5}\n" -+ f"- sha256: {dir_sha256}\n" - " size: 11\n" - " nfiles: 2\n" - " path: remote://workspace/dir\n" - "outs:\n" -- "- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n" -+ "- sha256: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n" - " size: 11\n" - " nfiles: 2\n" - " path: dir\n" -diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py -index e9c1dfa1..b3ecf520 100644 ---- a/dvc/utils/__init__.py -+++ b/dvc/utils/__init__.py -@@ -43,8 +43,8 @@ def dict_hash(d, typ, exclude=()): - return bytes_hash(byts, typ) - - --def dict_md5(d, **kwargs): -- return dict_hash(d, "md5", **kwargs) -+def dict_sha256(d, **kwargs): -+ return dict_hash(d, "sha256", **kwargs) - - - def dict_sha256(d, **kwargs): -diff --git a/scripts/innosetup/dvc.ico.dvc b/scripts/innosetup/dvc.ico.dvc -index e8ca30f5..78b76603 100644 ---- a/scripts/innosetup/dvc.ico.dvc -+++ b/scripts/innosetup/dvc.ico.dvc -@@ -1,3 +1,3 @@ - outs: --- md5: 90104d9e83cfb825cf45507e90aadd27 -+- sha256: 90104d9e83cfb825cf45507e90aadd27 - path: dvc.ico -diff --git a/scripts/innosetup/dvc_left.bmp.dvc b/scripts/innosetup/dvc_left.bmp.dvc -index be60334b..c97e16f8 100644 ---- a/scripts/innosetup/dvc_left.bmp.dvc -+++ b/scripts/innosetup/dvc_left.bmp.dvc -@@ -1,3 +1,3 @@ - outs: --- md5: 9106cda08aa427e73492389a0f17c72d -+- sha256: 9106cda08aa427e73492389a0f17c72d - path: dvc_left.bmp -diff --git a/scripts/innosetup/dvc_up.bmp.dvc b/scripts/innosetup/dvc_up.bmp.dvc -index 7fb5ae55..59df4a87 100644 ---- a/scripts/innosetup/dvc_up.bmp.dvc -+++ b/scripts/innosetup/dvc_up.bmp.dvc -@@ -1,3 +1,3 @@ - outs: --- md5: 94614d6650e062655f9f77507dc9c1f2 -+- sha256: 94614d6650e062655f9f77507dc9c1f2 - path: dvc_up.bmp -diff --git a/tests/func/test_add.py b/tests/func/test_add.py -index b096bbf5..b2c3fa23 100644 ---- a/tests/func/test_add.py -+++ b/tests/func/test_add.py -@@ -37,7 +37,7 @@ from dvc.testing.test_workspace import TestAdd - from dvc.utils import LARGE_DIR_SIZE, relpath - from dvc.utils.fs import path_isin - from dvc.utils.serialize import YAMLFileCorruptedError, load_yaml --from dvc_data.hashfile.hash import file_md5 -+from dvc_data.hashfile.hash import file_sha256 - from dvc_data.hashfile.hash_info import HashInfo - from tests.basic_env import TestDvc - from tests.utils import get_gitignore_content -@@ -45,7 +45,7 @@ from tests.utils import get_gitignore_content - - def test_add(tmp_dir, dvc): - (stage,) = tmp_dir.dvc_gen({"foo": "foo"}) -- md5 = file_md5("foo", dvc.fs) -+ sha256 = file_sha256("foo", dvc.fs) - - assert stage is not None - -@@ -54,13 +54,13 @@ def test_add(tmp_dir, dvc): - assert len(stage.outs) == 1 - assert len(stage.deps) == 0 - assert stage.cmd is None -- assert stage.outs[0].hash_info == HashInfo("md5", md5) -- assert stage.md5 is None -+ assert stage.outs[0].hash_info == HashInfo("sha256", sha256) -+ assert stage.sha256 is None - - assert (tmp_dir / "foo.dvc").parse() == { - "outs": [ - { -- "md5": "acbd18db4cc2f85cedef654fccc4a4d8", -+ "sha256": "acbd18db4cc2f85cedef654fccc4a4d8", - "path": "foo", - "size": 3, - } -@@ -78,7 +78,7 @@ def test_add_executable(tmp_dir, dvc): - assert (tmp_dir / "foo.dvc").parse() == { - "outs": [ - { -- "md5": "acbd18db4cc2f85cedef654fccc4a4d8", -+ "sha256": "acbd18db4cc2f85cedef654fccc4a4d8", - "path": "foo", - "size": 3, - "isexec": True, -@@ -296,7 +296,7 @@ def test_add_filtered_files_in_dir( - class TestAddExternal(TestAdd): - @pytest.fixture - def hash_name(self): -- return "md5" -+ return "sha256" - - @pytest.fixture - def hash_value(self): -@@ -317,7 +317,7 @@ def test_add_external_relpath(tmp_dir, dvc, local_cloud): - dvc.add(rel, external=True) - assert (tmp_dir / "file.dvc").read_text() == ( - "outs:\n" -- "- md5: 8c7dd922ad47494fc02c388e12c00eac\n" -+ "- sha256: 8c7dd922ad47494fc02c388e12c00eac\n" - " size: 4\n" - f" path: {rel}\n" - ) -@@ -379,7 +379,7 @@ class TestDoubleAddUnchanged(TestDvc): - - - def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir): -- file_md5_counter = mocker.spy(dvc_data.hashfile.hash, "file_md5") -+ file_sha256_counter = mocker.spy(dvc_data.hashfile.hash, "file_sha256") - tmp_dir.gen("foo", "foo") - - ret = main(["config", "cache.type", "copy"]) -@@ -387,30 +387,30 @@ def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir): - - ret = main(["add", "foo"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 1 -+ assert file_sha256_counter.mock.call_count == 1 - - ret = main(["status"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 1 -+ assert file_sha256_counter.mock.call_count == 1 - - ret = main(["run", "--single-stage", "-d", "foo", "echo foo"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 1 -+ assert file_sha256_counter.mock.call_count == 1 - - os.rename("foo", "foo.back") - ret = main(["checkout"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 1 -+ assert file_sha256_counter.mock.call_count == 1 - - ret = main(["status"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 1 -+ assert file_sha256_counter.mock.call_count == 1 - - - def test_should_update_state_entry_for_directory_after_add( - mocker, dvc, tmp_dir - ): -- file_md5_counter = mocker.spy(dvc_data.hashfile.hash, "file_md5") -+ file_sha256_counter = mocker.spy(dvc_data.hashfile.hash, "file_sha256") - - tmp_dir.gen({"data/data": "foo", "data/data_sub/sub_data": "foo"}) - -@@ -419,27 +419,27 @@ def test_should_update_state_entry_for_directory_after_add( - - ret = main(["add", "data"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 5 -+ assert file_sha256_counter.mock.call_count == 5 - - ret = main(["status"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 6 -+ assert file_sha256_counter.mock.call_count == 6 - - ls = "dir" if os.name == "nt" else "ls" - ret = main( - ["run", "--single-stage", "-d", "data", "{} {}".format(ls, "data")] - ) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 8 -+ assert file_sha256_counter.mock.call_count == 8 - - os.rename("data", "data" + ".back") - ret = main(["checkout"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 8 -+ assert file_sha256_counter.mock.call_count == 8 - - ret = main(["status"]) - assert ret == 0 -- assert file_md5_counter.mock.call_count == 10 -+ assert file_sha256_counter.mock.call_count == 10 - - - class TestAddCommit(TestDvc): -@@ -917,7 +917,7 @@ def test_add_preserve_fields(tmp_dir, dvc): - - path: foo # out comment - desc: out desc - remote: testremote -- md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - meta: some metadata - """ -diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py -index 44e636c1..69811c2e 100644 ---- a/tests/func/test_checkout.py -+++ b/tests/func/test_checkout.py -@@ -991,7 +991,7 @@ def test_checkout_dir_compat(tmp_dir, dvc): - textwrap.dedent( - f"""\ - outs: -- - md5: {stage.outs[0].hash_info.value} -+ - sha256: {stage.outs[0].hash_info.value} - path: data - """ - ), -diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py -index b1b40ce4..0a067d58 100644 ---- a/tests/func/test_commit.py -+++ b/tests/func/test_commit.py -@@ -62,7 +62,7 @@ def test_commit_preserve_fields(tmp_dir, dvc): - - path: foo # out comment - desc: out desc - remote: testremote -- md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - meta: some metadata - """ -@@ -88,19 +88,19 @@ def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw): - assert not stage.outs[0].changed_cache() - - --def test_commit_changed_md5(tmp_dir, dvc): -+def test_commit_changed_sha256(tmp_dir, dvc): - tmp_dir.gen({"file": "file content"}) - (stage,) = dvc.add("file", no_commit=True) - - stage_file_content = (tmp_dir / stage.path).parse() -- stage_file_content["md5"] = "1111111111" -+ stage_file_content["sha256"] = "1111111111" - (tmp_dir / stage.path).dump(stage_file_content) - - with pytest.raises(StageCommitError): - dvc.commit(stage.path) - - dvc.commit(stage.path, force=True) -- assert "md5" not in (tmp_dir / stage.path).parse() -+ assert "sha256" not in (tmp_dir / stage.path).parse() - - - def test_commit_no_exec(tmp_dir, dvc): -diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py -index 192e350f..012e6921 100644 ---- a/tests/func/test_data_cloud.py -+++ b/tests/func/test_data_cloud.py -@@ -131,7 +131,7 @@ def test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog): - - stage_file_path = stage.relpath - content = (tmp_dir / stage_file_path).parse() -- del content["outs"][0]["md5"] -+ del content["outs"][0]["sha256"] - (tmp_dir / stage_file_path).dump(content) - - with caplog.at_level(logging.WARNING, logger="dvc"): -@@ -148,7 +148,7 @@ def test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog): - - def test_hash_recalculation(mocker, dvc, tmp_dir, local_remote): - tmp_dir.gen({"foo": "foo"}) -- test_file_md5 = mocker.spy(dvc_data.hashfile.hash, "file_md5") -+ test_file_sha256 = mocker.spy(dvc_data.hashfile.hash, "file_sha256") - ret = main(["config", "cache.type", "hardlink"]) - assert ret == 0 - ret = main(["add", "foo"]) -@@ -157,7 +157,7 @@ def test_hash_recalculation(mocker, dvc, tmp_dir, local_remote): - assert ret == 0 - ret = main(["run", "--single-stage", "-d", "foo", "echo foo"]) - assert ret == 0 -- assert test_file_md5.mock.call_count == 1 -+ assert test_file_sha256.mock.call_count == 1 - - - def test_missing_cache(tmp_dir, dvc, local_remote, caplog): -@@ -170,8 +170,8 @@ def test_missing_cache(tmp_dir, dvc, local_remote, caplog): - "Some of the cache files do not exist " - "neither locally nor on remote. Missing cache files:\n" - ) -- foo = "name: bar, md5: 37b51d194a7513e45b56f6524f2d51f2\n" -- bar = "name: foo, md5: acbd18db4cc2f85cedef654fccc4a4d8\n" -+ foo = "name: bar, sha256: 37b51d194a7513e45b56f6524f2d51f2\n" -+ bar = "name: foo, sha256: acbd18db4cc2f85cedef654fccc4a4d8\n" - - caplog.clear() - dvc.push() -@@ -207,7 +207,7 @@ def test_verify_hashes( - remove("dir") - remove(dvc.odb.local.path) - -- hash_spy = mocker.spy(dvc_data.hashfile.hash, "file_md5") -+ hash_spy = mocker.spy(dvc_data.hashfile.hash, "file_sha256") - - dvc.pull() - assert hash_spy.call_count == 0 -diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py -index 5b93815f..7de8e775 100644 ---- a/tests/func/test_diff.py -+++ b/tests/func/test_diff.py -@@ -9,7 +9,7 @@ from dvc.utils.fs import remove - - - def digest(text): -- return hashlib.md5(bytes(text, "utf-8")).hexdigest() -+ return hashlib.sha256(bytes(text, "utf-8")).hexdigest() - - - def test_no_scm(tmp_dir, dvc): -diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py -index 0f034f88..c1a76b3f 100644 ---- a/tests/func/test_external_repo.py -+++ b/tests/func/test_external_repo.py -@@ -206,7 +206,7 @@ def test_subrepos_are_ignored(tmp_dir, erepo_dir): - repo.odb.local, - "dir", - repo.dvcfs, -- "md5", -+ "sha256", - ignore=repo.dvcignore, - ) - transfer( -diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py -index 27a2e9f6..40b0a357 100644 ---- a/tests/func/test_gc.py -+++ b/tests/func/test_gc.py -@@ -24,9 +24,9 @@ class TestGC(TestDvcGit): - raw_dir_hash = stages[0].outs[0].hash_info.as_raw().value - - self.good_cache = [ -- self.dvc.odb.local.oid_to_path(md5) -- for md5 in self.dvc.odb.local.all() -- if md5 != raw_dir_hash -+ self.dvc.odb.local.oid_to_path(sha256) -+ for sha256 in self.dvc.odb.local.all() -+ if sha256 != raw_dir_hash - ] - - self.bad_cache = [self.dvc.odb.local.oid_to_path(raw_dir_hash)] -diff --git a/tests/func/test_import_url.py b/tests/func/test_import_url.py -index 33e79e40..78550df5 100644 ---- a/tests/func/test_import_url.py -+++ b/tests/func/test_import_url.py -@@ -120,11 +120,11 @@ def test_import_url_with_no_exec(tmp_dir, dvc, erepo_dir): - - class TestImport(_TestImport): - @pytest.fixture -- def stage_md5(self): -+ def stage_sha256(self): - return "dc24e1271084ee317ac3c2656fb8812b" - - @pytest.fixture -- def dir_md5(self): -+ def dir_sha256(self): - return "b6dcab6ccd17ca0a8bf4a215a37d14cc.dir" - - @pytest.fixture -@@ -155,15 +155,15 @@ def test_import_url_preserve_meta(tmp_dir, dvc): - desc: top desc - deps: - - path: foo # dep comment -- md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - outs: - - path: bar # out comment - desc: out desc -- md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - meta: some metadata -- md5: be7ade0aa89cc8d56e320867a9de9740 -+ sha256: be7ade0aa89cc8d56e320867a9de9740 - frozen: true - """ - ) -@@ -229,7 +229,7 @@ def test_import_url_to_remote_directory(tmp_dir, dvc, workspace, local_remote): - - for file_part in file_parts: - with open( -- local_remote.oid_to_path(file_part["md5"]), encoding="utf-8" -+ local_remote.oid_to_path(file_part["sha256"]), encoding="utf-8" - ) as fobj: - assert fobj.read() == file_part["relpath"] - -@@ -258,7 +258,7 @@ def test_import_url_to_remote_status(tmp_dir, dvc, local_cloud, local_remote): - local_cloud.gen("foo", "foo") - - stage = dvc.imp_url(str(local_cloud / "foo"), to_remote=True) -- assert stage.md5 is not None -+ assert stage.sha256 is not None - - status = dvc.status() - assert status["foo.dvc"] == [{"changed outs": {"foo": "not in cache"}}] -diff --git a/tests/func/test_install.py b/tests/func/test_install.py -index a4a800c9..fb5ddbfe 100644 ---- a/tests/func/test_install.py -+++ b/tests/func/test_install.py -@@ -6,7 +6,7 @@ import pytest - from git import GitCommandError - - from dvc.exceptions import DvcException --from dvc_data.hashfile.hash import file_md5 -+from dvc_data.hashfile.hash import file_sha256 - from tests.func.parsing.test_errors import escape_ansi - - -@@ -76,7 +76,7 @@ class TestInstall: - conf["core"]["remote"] = "store" - tmp_dir.dvc_gen("file", "file_content", "commit message") - -- file_checksum = file_md5("file", dvc.fs) -+ file_checksum = file_sha256("file", dvc.fs) - expected_storage_path = ( - storage_path / file_checksum[:2] / file_checksum[2:] - ) -@@ -117,7 +117,7 @@ def test_merge_driver_no_ancestor(tmp_dir, scm, dvc): - assert (tmp_dir / "data").read_text() == {"bar": "bar"} - assert (tmp_dir / "data.dvc").read_text() == ( - "outs:\n" -- "- md5: 5ea40360f5b4ec688df672a4db9c17d1.dir\n" -+ "- sha256: 5ea40360f5b4ec688df672a4db9c17d1.dir\n" - " size: 6\n" - " nfiles: 2\n" - " path: data\n" -@@ -154,7 +154,7 @@ def test_merge_driver(tmp_dir, scm, dvc): - assert (tmp_dir / "data").read_text() == {"master": "master", "two": "two"} - assert (tmp_dir / "data.dvc").read_text() == ( - "outs:\n" -- "- md5: 839ef9371606817569c1ee0e5f4ed233.dir\n" -+ "- sha256: 839ef9371606817569c1ee0e5f4ed233.dir\n" - " size: 12\n" - " nfiles: 3\n" - " path: data\n" -diff --git a/tests/func/test_lockfile.py b/tests/func/test_lockfile.py -index 93974978..3fb2f1a1 100644 ---- a/tests/func/test_lockfile.py -+++ b/tests/func/test_lockfile.py -@@ -48,12 +48,12 @@ def test_deps_outs_are_sorted_by_path(tmp_dir, dvc, run_head): - # lock stage key order: - assert list(lock.keys()) == ["cmd", "deps", "outs"] - -- # `path` key appear first and then the `md5` -+ # `path` key appear first and then the `sha256` - assert all( -- list(dep.keys()) == ["path", "md5", "size"] for dep in lock["deps"] -+ list(dep.keys()) == ["path", "sha256", "size"] for dep in lock["deps"] - ) - assert all( -- list(out.keys()) == ["path", "md5", "size"] for out in lock["outs"] -+ list(out.keys()) == ["path", "sha256", "size"] for out in lock["outs"] - ) - - # deps are always sorted by the file path naming -@@ -167,7 +167,7 @@ def test_params_dump(tmp_dir, dvc, run_head): - def v1_repo_lock(tmp_dir, dvc): - """Generates a repo having v1 format lockfile""" - size = 5 if os.name == "nt" else 4 -- hi = HashInfo(name="md5", value="c157a79031e1c40f85931829bc5fc552") -+ hi = HashInfo(name="sha256", value="c157a79031e1c40f85931829bc5fc552") - v1_lockdata = { - "foo": {"cmd": "echo foo"}, - "bar": { -diff --git a/tests/func/test_merge_driver.py b/tests/func/test_merge_driver.py -index 113984f9..218e524a 100644 ---- a/tests/func/test_merge_driver.py -+++ b/tests/func/test_merge_driver.py -@@ -118,11 +118,11 @@ def test_merge_different_output_options(tmp_dir, dvc, caplog): - (tmp_dir / "ancestor").touch() - - (tmp_dir / "our").write_text( -- "outs:\n- md5: f123456789.dir\n path: path\n" -+ "outs:\n- sha256: f123456789.dir\n path: path\n" - ) - - (tmp_dir / "their").write_text( -- "outs:\n- md5: f987654321.dir\n path: path\n cache: false\n" -+ "outs:\n- sha256: f987654321.dir\n path: path\n cache: false\n" - ) - - assert ( -@@ -149,10 +149,10 @@ def test_merge_file(tmp_dir, dvc, caplog): - (tmp_dir / "ancestor").touch() - - (tmp_dir / "our").write_text( -- "outs:\n- md5: f123456789.dir\n path: path\n" -+ "outs:\n- sha256: f123456789.dir\n path: path\n" - ) - -- (tmp_dir / "their").write_text("outs:\n- md5: f987654321\n path: path\n") -+ (tmp_dir / "their").write_text("outs:\n- sha256: f987654321\n path: path\n") - - assert ( - main( -@@ -179,13 +179,13 @@ def test_merge_non_dvc_add(tmp_dir, dvc, caplog): - - (tmp_dir / "our").write_text( - "outs:\n" -- "- md5: f123456789.dir\n" -+ "- sha256: f123456789.dir\n" - " path: path\n" -- "- md5: ff123456789.dir\n" -+ "- sha256: ff123456789.dir\n" - " path: another\n" - ) - -- (tmp_dir / "their").write_text("outs:\n- md5: f987654321\n path: path\n") -+ (tmp_dir / "their").write_text("outs:\n- sha256: f987654321\n path: path\n") - - assert ( - main( -diff --git a/tests/func/test_move.py b/tests/func/test_move.py -index aad2003a..3e28e628 100644 ---- a/tests/func/test_move.py -+++ b/tests/func/test_move.py -@@ -261,7 +261,7 @@ def test_move_meta(tmp_dir, dvc): - assert res == textwrap.dedent( - """\ - outs: -- - md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ - sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - path: bar - meta: -diff --git a/tests/func/test_odb.py b/tests/func/test_odb.py -index c2ded255..501814de 100644 ---- a/tests/func/test_odb.py -+++ b/tests/func/test_odb.py -@@ -12,17 +12,17 @@ from dvc_objects.errors import ObjectFormatError - - - def test_cache(tmp_dir, dvc): -- cache1_md5 = "123" -- cache2_md5 = "234" -+ cache1_sha256 = "123" -+ cache2_sha256 = "234" - cache1 = os.path.join( - dvc.odb.local.path, -- cache1_md5[0:2], -- cache1_md5[2:], -+ cache1_sha256[0:2], -+ cache1_sha256[2:], - ) - cache2 = os.path.join( - dvc.odb.local.path, -- cache2_md5[0:2], -- cache2_md5[2:], -+ cache2_sha256[0:2], -+ cache2_sha256[2:], - ) - tmp_dir.gen({cache1: "1", cache2: "2"}) - -@@ -31,13 +31,13 @@ def test_cache(tmp_dir, dvc): - - odb = ODBManager(dvc) - -- md5_list = list(odb.local.all()) -- assert len(md5_list) == 2 -- assert cache1_md5 in md5_list -- assert cache2_md5 in md5_list -+ sha256_list = list(odb.local.all()) -+ assert len(sha256_list) == 2 -+ assert cache1_sha256 in sha256_list -+ assert cache2_sha256 in sha256_list - -- odb_cache1 = odb.local.oid_to_path(cache1_md5) -- odb_cache2 = odb.local.oid_to_path(cache2_md5) -+ odb_cache1 = odb.local.oid_to_path(cache1_sha256) -+ odb_cache2 = odb.local.oid_to_path(cache2_sha256) - assert os.fspath(odb_cache1) == cache1 - assert os.fspath(odb_cache2) == cache2 - -@@ -49,13 +49,13 @@ def test_cache_load_bad_dir_cache(tmp_dir, dvc): - fname = os.fspath(dvc.odb.local.oid_to_path(dir_hash)) - tmp_dir.gen({fname: "not,json"}) - with pytest.raises(ObjectFormatError): -- load(dvc.odb.local, HashInfo("md5", dir_hash)) -+ load(dvc.odb.local, HashInfo("sha256", dir_hash)) - - dir_hash = "234.dir" - fname = os.fspath(dvc.odb.local.oid_to_path(dir_hash)) - tmp_dir.gen({fname: '{"a": "b"}'}) - with pytest.raises(ObjectFormatError): -- load(dvc.odb.local, HashInfo("md5", dir_hash)) -+ load(dvc.odb.local, HashInfo("sha256", dir_hash)) - - - def test_external_cache_dir(tmp_dir, dvc, make_tmp_dir): -diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py -index aac08ce7..6164cfa8 100644 ---- a/tests/func/test_remote.py -+++ b/tests/func/test_remote.py -@@ -147,22 +147,22 @@ def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc): - path = (tmp_dir / "data").fs_path - - tree = Tree.from_list( -- [{"relpath": "1", "md5": "1"}, {"relpath": "2", "md5": "2"}] -+ [{"relpath": "1", "sha256": "1"}, {"relpath": "2", "sha256": "2"}] - ) - tree.digest() - with patch("dvc_data.build._build_tree", return_value=(None, tree)): -- _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "md5") -+ _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "sha256") - hash1 = obj.hash_info - - # remove the raw dir obj to force building the tree on the next build call - dvc.odb.local.fs.remove(dvc.odb.local.oid_to_path(hash1.as_raw().value)) - - tree = Tree.from_list( -- [{"md5": "1", "relpath": "1"}, {"md5": "2", "relpath": "2"}] -+ [{"sha256": "1", "relpath": "1"}, {"sha256": "2", "relpath": "2"}] - ) - tree.digest() - with patch("dvc_data.build._build_tree", return_value=(None, tree)): -- _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "md5") -+ _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "sha256") - hash2 = obj.hash_info - - assert hash1 == hash2 -@@ -248,7 +248,7 @@ def test_remote_modify_local_on_repo_config(tmp_dir, dvc): - - def test_external_dir_resource_on_no_cache(tmp_dir, dvc, tmp_path_factory): - # https://github.com/iterative/dvc/issues/2647, is some situations -- # (external dir dependency) cache is required to calculate dir md5 -+ # (external dir dependency) cache is required to calculate dir sha256 - external_dir = tmp_path_factory.mktemp("external_dir") - file = external_dir / "file" - -diff --git a/tests/func/test_repo_index.py b/tests/func/test_repo_index.py -index c419d88f..dfaee778 100644 ---- a/tests/func/test_repo_index.py -+++ b/tests/func/test_repo_index.py -@@ -269,17 +269,17 @@ def test_used_objs(tmp_dir, scm, dvc, run_copy, rev): - - expected_objs = [ - HashInfo( -- name="md5", -+ name="sha256", - value="acbd18db4cc2f85cedef654fccc4a4d8", - obj_name="bar", - ), - HashInfo( -- name="md5", -+ name="sha256", - value="8c7dd922ad47494fc02c388e12c00eac", - obj_name="dir/subdir/file", - ), - HashInfo( -- name="md5", -+ name="sha256", - value="d28c9e28591aeb7e303dc6772ffa6f6b.dir", - obj_name="dir", - ), -diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py -index ed405866..8060585c 100644 ---- a/tests/func/test_repro.py -+++ b/tests/func/test_repro.py -@@ -21,7 +21,7 @@ from dvc.stage.exceptions import StageFileDoesNotExistError - from dvc.utils import relpath - from dvc.utils.fs import remove - from dvc.utils.serialize import dump_yaml, load_yaml --from dvc_data.hashfile.hash import file_md5 -+from dvc_data.hashfile.hash import file_sha256 - from tests.basic_env import TestDvc - - -@@ -654,7 +654,7 @@ class TestReproDataSource(TestReproChangedData): - - self.assertTrue(filecmp.cmp(self.FOO, self.BAR, shallow=False)) - self.assertEqual( -- stages[0].outs[0].hash_info.value, file_md5(self.BAR, self.dvc.fs) -+ stages[0].outs[0].hash_info.value, file_sha256(self.BAR, self.dvc.fs) - ) - - -@@ -1127,21 +1127,21 @@ def test_dvc_formatting_retained(tmp_dir, dvc, run_copy): - (tmp_dir / "foo").write_text("new foo") - dvc.reproduce("foo_copy.dvc", force=True) - -- def _hide_md5(text): -- return re.sub(r"\b[a-f0-9]{32}\b", "", text) -+ def _hide_sha256(text): -+ return re.sub(r"\b[a-f0-9]{32}\b", "", text) - - def _hide_size(text): - return re.sub(r"size: [0-9]*\b", "size: ", text) - - def _mask(text): -- return _hide_size(_hide_md5(text)) -+ return _hide_size(_hide_sha256(text)) - - assert _mask(stage_text) == _mask(stage_path.read_text()) - - - def _format_dvc_line(line): -- # Add line comment for all cache and md5 keys -- if "cache:" in line or "md5:" in line: -+ # Add line comment for all cache and sha256 keys -+ if "cache:" in line or "sha256:" in line: - return line + " # line comment" - # Format command as one word per line - if line.startswith("cmd: "): -diff --git a/tests/func/test_run_multistage.py b/tests/func/test_run_multistage.py -index f83b7e18..569a86de 100644 ---- a/tests/func/test_run_multistage.py -+++ b/tests/func/test_run_multistage.py -@@ -355,7 +355,7 @@ def test_run_external_outputs( - dvc, - local_workspace, - ): -- hash_name = "md5" -+ hash_name = "sha256" - foo_hash = "acbd18db4cc2f85cedef654fccc4a4d8" - bar_hash = "37b51d194a7513e45b56f6524f2d51f2" - -diff --git a/tests/func/test_run_single_stage.py b/tests/func/test_run_single_stage.py -index db775d05..0b193b42 100644 ---- a/tests/func/test_run_single_stage.py -+++ b/tests/func/test_run_single_stage.py -@@ -31,7 +31,7 @@ from dvc.stage.exceptions import ( - StagePathOutsideError, - ) - from dvc.utils.serialize import load_yaml --from dvc_data.hashfile.hash import file_md5 -+from dvc_data.hashfile.hash import file_sha256 - from tests.basic_env import TestDvc, TestDvcGit - - -@@ -60,7 +60,7 @@ class TestRun(TestDvc): - self.assertEqual(len(stage.outs), len(outs + outs_no_cache)) - self.assertEqual(stage.outs[0].fspath, outs[0]) - self.assertEqual( -- stage.outs[0].hash_info.value, file_md5(self.FOO, self.dvc.fs) -+ stage.outs[0].hash_info.value, file_sha256(self.FOO, self.dvc.fs) - ) - self.assertTrue(stage.path, fname) - -@@ -987,20 +987,20 @@ def test_run_force_preserves_comments_and_meta(tmp_dir, dvc, run_copy): - cmd: python copy.py foo bar - deps: - - path: copy.py -- md5: 90c27dd80b698fe766f0c3ee0b6b9729 -+ sha256: 90c27dd80b698fe766f0c3ee0b6b9729 - size: {code_size} - - path: foo -- md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - outs: - # comment preserved - - path: bar - desc: out desc -- md5: acbd18db4cc2f85cedef654fccc4a4d8 -+ sha256: acbd18db4cc2f85cedef654fccc4a4d8 - size: 3 - meta: - name: copy-foo-bar -- md5: be659ce4a33cebb85d4e8e1335d394ad -+ sha256: be659ce4a33cebb85d4e8e1335d394ad - """ - ) - -@@ -1011,18 +1011,18 @@ def test_run_force_preserves_comments_and_meta(tmp_dir, dvc, run_copy): - cmd: python copy.py foo1 bar1 - deps: - - path: foo1 -- md5: 299a0be4a5a79e6a59fdd251b19d78bb -+ sha256: 299a0be4a5a79e6a59fdd251b19d78bb - size: 4 - - path: copy.py -- md5: 90c27dd80b698fe766f0c3ee0b6b9729 -+ sha256: 90c27dd80b698fe766f0c3ee0b6b9729 - size: {code_size} - outs: - # comment preserved - - path: bar1 -- md5: 299a0be4a5a79e6a59fdd251b19d78bb -+ sha256: 299a0be4a5a79e6a59fdd251b19d78bb - size: 4 - meta: - name: copy-foo-bar -- md5: 9e725b11cb393e6a7468369fa50328b7 -+ sha256: 9e725b11cb393e6a7468369fa50328b7 - """ - ) -diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py -index 77a35488..46ff6a02 100644 ---- a/tests/func/test_stage.py -+++ b/tests/func/test_stage.py -@@ -78,8 +78,8 @@ class TestReload(TestDvc): - d = load_yaml(stage.relpath) - - # NOTE: checking that reloaded stage didn't change its checksum -- md5 = "11111111111111111111111111111111" -- d[stage.PARAM_MD5] = md5 -+ sha256 = "11111111111111111111111111111111" -+ d[stage.PARAM_MD5] = sha256 - dump_yaml(stage.relpath, d) - - dvcfile = SingleStageFile(self.dvc, stage.relpath) -@@ -89,7 +89,7 @@ class TestReload(TestDvc): - dvcfile.dump(stage) - - d = load_yaml(stage.relpath) -- self.assertEqual(d[stage.PARAM_MD5], md5) -+ self.assertEqual(d[stage.PARAM_MD5], sha256) - - - class TestDefaultWorkingDirectory(TestDvc): -@@ -154,7 +154,7 @@ class TestExternalRemoteResolution(TestDvc): - assert os.path.exists("movie.txt") - - --def test_md5_ignores_comments(tmp_dir, dvc): -+def test_sha256_ignores_comments(tmp_dir, dvc): - (stage,) = tmp_dir.dvc_gen("foo", "foo content") - - with open(stage.path, "a", encoding="utf-8") as f: -diff --git a/tests/func/test_state.py b/tests/func/test_state.py -index 1e4f6ae6..4fa328d6 100644 ---- a/tests/func/test_state.py -+++ b/tests/func/test_state.py -@@ -2,7 +2,7 @@ import os - import re - - from dvc.repo import Repo --from dvc_data.hashfile.hash import file_md5 -+from dvc_data.hashfile.hash import file_sha256 - from dvc_data.hashfile.hash_info import HashInfo - from dvc_data.hashfile.state import State - -@@ -10,7 +10,7 @@ from dvc_data.hashfile.state import State - def test_state(tmp_dir, dvc): - tmp_dir.gen("foo", "foo content") - path = tmp_dir / "foo" -- hash_info = HashInfo("md5", file_md5(path, dvc.fs)) -+ hash_info = HashInfo("sha256", file_sha256(path, dvc.fs)) - - state = State(dvc.root_dir, dvc.tmp_dir, dvc.dvcignore) - -@@ -22,7 +22,7 @@ def test_state(tmp_dir, dvc): - - assert state.get(path, dvc.fs) == (None, None) - -- hash_info = HashInfo("md5", file_md5(path, dvc.fs)) -+ hash_info = HashInfo("sha256", file_sha256(path, dvc.fs)) - state.save(path, dvc.fs, hash_info) - - assert state.get(path, dvc.fs)[1] == hash_info -diff --git a/tests/func/test_utils.py b/tests/func/test_utils.py -index 0a1834ef..f9bf8d3a 100644 ---- a/tests/func/test_utils.py -+++ b/tests/func/test_utils.py -@@ -6,7 +6,7 @@ from dvc import utils - from dvc.exceptions import DvcException - - --def test_dict_md5(): -+def test_dict_sha256(): - d = { - "cmd": "python code.py foo file1", - "locked": "true", -@@ -15,18 +15,18 @@ def test_dict_md5(): - "path": "file1", - "metric": {"type": "raw"}, - "cache": False, -- "md5": "acbd18db4cc2f85cedef654fccc4a4d8", -+ "sha256": "acbd18db4cc2f85cedef654fccc4a4d8", - } - ], - "deps": [ -- {"path": "foo", "md5": "acbd18db4cc2f85cedef654fccc4a4d8"}, -- {"path": "code.py", "md5": "d05447644b89960913c7eee5fd776adb"}, -+ {"path": "foo", "sha256": "acbd18db4cc2f85cedef654fccc4a4d8"}, -+ {"path": "code.py", "sha256": "d05447644b89960913c7eee5fd776adb"}, - ], - } - -- md5 = "8b263fa05ede6c3145c164829be694b4" -+ sha256 = "8b263fa05ede6c3145c164829be694b4" - -- assert md5 == utils.dict_md5(d, exclude=["metric", "locked"]) -+ assert sha256 == utils.dict_sha256(d, exclude=["metric", "locked"]) - - - def test_boxify(): -diff --git a/tests/unit/cli/test_main.py b/tests/unit/cli/test_main.py -index 73cf3bf7..a83056d6 100644 ---- a/tests/unit/cli/test_main.py -+++ b/tests/unit/cli/test_main.py -@@ -14,7 +14,7 @@ def test_state_pickle_errors_are_correctly_raised(tmp_dir, caplog, mocker): - mocker.patch( - "dvc.cli.parse_args", - return_value=Namespace( -- func=raiser(DiskError(path, "md5s")), -+ func=raiser(DiskError(path, "sha256s")), - quiet=False, - verbose=True, - ), -@@ -22,7 +22,7 @@ def test_state_pickle_errors_are_correctly_raised(tmp_dir, caplog, mocker): - - assert main() == 255 - assert ( -- f"Could not open pickled 'md5s' cache.\n" -+ f"Could not open pickled 'sha256s' cache.\n" - f"Remove the '{path.relative_to(tmp_dir)}' directory " - "and then retry this command.\n" - "See for more information." -diff --git a/tests/unit/data/db/test_local.py b/tests/unit/data/db/test_local.py -index b755cf64..0175f179 100644 ---- a/tests/unit/data/db/test_local.py -+++ b/tests/unit/data/db/test_local.py -@@ -17,8 +17,8 @@ def test_status_download_optimization(mocker, dvc): - - odb = LocalHashFileDB(LocalFileSystem(), os.getcwd()) - obj_ids = { -- HashInfo("md5", "acbd18db4cc2f85cedef654fccc4a4d8"), -- HashInfo("md5", "37b51d194a7513e45b56f6524f2d51f2"), -+ HashInfo("sha256", "acbd18db4cc2f85cedef654fccc4a4d8"), -+ HashInfo("sha256", "37b51d194a7513e45b56f6524f2d51f2"), - } - - local_exists = [hash_info.value for hash_info in obj_ids] -@@ -94,7 +94,7 @@ def test_staging_file(tmp_dir, dvc): - - local_odb = dvc.odb.local - staging_odb, _, obj = build( -- local_odb, (tmp_dir / "foo").fs_path, fs, "md5" -+ local_odb, (tmp_dir / "foo").fs_path, fs, "sha256" - ) - - assert not local_odb.exists(obj.hash_info.value) -@@ -122,7 +122,7 @@ def test_staging_dir(tmp_dir, dvc): - local_odb = dvc.odb.local - - staging_odb, _, obj = build( -- local_odb, (tmp_dir / "dir").fs_path, fs, "md5" -+ local_odb, (tmp_dir / "dir").fs_path, fs, "sha256" - ) - - assert not local_odb.exists(obj.hash_info.value) -diff --git a/tests/unit/fs/test_data.py b/tests/unit/fs/test_data.py -index 0b838d1e..ad17ebd0 100644 ---- a/tests/unit/fs/test_data.py -+++ b/tests/unit/fs/test_data.py -@@ -222,7 +222,7 @@ def test_isdvc(tmp_dir, dvc): - def test_get_hash_file(tmp_dir, dvc): - tmp_dir.dvc_gen({"foo": "foo"}) - fs = DataFileSystem(index=dvc.index.data["repo"]) -- assert fs.info("foo")["md5"] == "acbd18db4cc2f85cedef654fccc4a4d8" -+ assert fs.info("foo")["sha256"] == "acbd18db4cc2f85cedef654fccc4a4d8" - - - def test_get_hash_dir(tmp_dir, dvc, mocker): -@@ -231,7 +231,7 @@ def test_get_hash_dir(tmp_dir, dvc, mocker): - ) - fs = DataFileSystem(index=dvc.index.data["repo"]) - hash_file_spy = mocker.spy(dvc_data.hashfile.hash, "hash_file") -- assert fs.info("dir")["md5"] == "8761c4e9acad696bee718615e23e22db.dir" -+ assert fs.info("dir")["sha256"] == "8761c4e9acad696bee718615e23e22db.dir" - assert not hash_file_spy.called - - -@@ -241,15 +241,15 @@ def test_get_hash_granular(tmp_dir, dvc): - ) - fs = DataFileSystem(index=dvc.index.data["repo"]) - subdir = "dir/subdir" -- assert fs.info(subdir).get("md5") is None -- _, _, obj = build(dvc.odb.local, subdir, fs, "md5", dry_run=True) -+ assert fs.info(subdir).get("sha256") is None -+ _, _, obj = build(dvc.odb.local, subdir, fs, "sha256", dry_run=True) - assert obj.hash_info == HashInfo( -- "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir" -+ "sha256", "af314506f1622d107e0ed3f14ec1a3b5.dir" - ) - data = posixpath.join(subdir, "data") -- assert fs.info(data)["md5"] == "8d777f385d3dfec8815d20f7496026dc" -- _, _, obj = build(dvc.odb.local, data, fs, "md5", dry_run=True) -- assert obj.hash_info == HashInfo("md5", "8d777f385d3dfec8815d20f7496026dc") -+ assert fs.info(data)["sha256"] == "8d777f385d3dfec8815d20f7496026dc" -+ _, _, obj = build(dvc.odb.local, data, fs, "sha256", dry_run=True) -+ assert obj.hash_info == HashInfo("sha256", "8d777f385d3dfec8815d20f7496026dc") - - - def test_get_hash_dirty_file(tmp_dir, dvc): -@@ -258,9 +258,9 @@ def test_get_hash_dirty_file(tmp_dir, dvc): - - fs = DataFileSystem(index=dvc.index.data["repo"]) - expected = "8c7dd922ad47494fc02c388e12c00eac" -- assert fs.info("file").get("md5") == expected -- _, _, obj = build(dvc.odb.local, "file", fs, "md5", dry_run=True) -- assert obj.hash_info == HashInfo("md5", expected) -+ assert fs.info("file").get("sha256") == expected -+ _, _, obj = build(dvc.odb.local, "file", fs, "sha256", dry_run=True) -+ assert obj.hash_info == HashInfo("sha256", expected) - - - def test_get_hash_dirty_dir(tmp_dir, dvc): -@@ -269,6 +269,6 @@ def test_get_hash_dirty_dir(tmp_dir, dvc): - - fs = DataFileSystem(index=dvc.index.data["repo"]) - expected = "5ea40360f5b4ec688df672a4db9c17d1.dir" -- assert fs.info("dir").get("md5") == expected -- _, _, obj = build(dvc.odb.local, "dir", fs, "md5", dry_run=True) -- assert obj.hash_info == HashInfo("md5", expected) -+ assert fs.info("dir").get("sha256") == expected -+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256", dry_run=True) -+ assert obj.hash_info == HashInfo("sha256", expected) -diff --git a/tests/unit/fs/test_dvc.py b/tests/unit/fs/test_dvc.py -index 17c8c9d5..d2087a5f 100644 ---- a/tests/unit/fs/test_dvc.py -+++ b/tests/unit/fs/test_dvc.py -@@ -498,11 +498,11 @@ def test_get_hash_cached_file(tmp_dir, dvc, mocker): - tmp_dir.dvc_gen({"foo": "foo"}) - fs = DvcFileSystem(repo=dvc) - expected = "acbd18db4cc2f85cedef654fccc4a4d8" -- assert fs.info("foo").get("md5") is None -- _, _, obj = build(dvc.odb.local, "foo", fs, "md5") -- assert obj.hash_info == HashInfo("md5", expected) -+ assert fs.info("foo").get("sha256") is None -+ _, _, obj = build(dvc.odb.local, "foo", fs, "sha256") -+ assert obj.hash_info == HashInfo("sha256", expected) - (tmp_dir / "foo").unlink() -- assert fs.info("foo")["md5"] == expected -+ assert fs.info("foo")["sha256"] == expected - - - def test_get_hash_cached_dir(tmp_dir, dvc, mocker): -@@ -511,17 +511,17 @@ def test_get_hash_cached_dir(tmp_dir, dvc, mocker): - ) - fs = DvcFileSystem(repo=dvc) - expected = "8761c4e9acad696bee718615e23e22db.dir" -- assert fs.info("dir").get("md5") is None -- _, _, obj = build(dvc.odb.local, "dir", fs, "md5") -+ assert fs.info("dir").get("sha256") is None -+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256") - assert obj.hash_info == HashInfo( -- "md5", "8761c4e9acad696bee718615e23e22db.dir" -+ "sha256", "8761c4e9acad696bee718615e23e22db.dir" - ) - - shutil.rmtree(tmp_dir / "dir") -- assert fs.info("dir")["md5"] == expected -- _, _, obj = build(dvc.odb.local, "dir", fs, "md5") -+ assert fs.info("dir")["sha256"] == expected -+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256") - assert obj.hash_info == HashInfo( -- "md5", "8761c4e9acad696bee718615e23e22db.dir" -+ "sha256", "8761c4e9acad696bee718615e23e22db.dir" - ) - - -@@ -531,17 +531,17 @@ def test_get_hash_cached_granular(tmp_dir, dvc, mocker): - ) - fs = DvcFileSystem(repo=dvc) - subdir = "dir/subdir" -- assert fs.info(subdir).get("md5") is None -- _, _, obj = build(dvc.odb.local, subdir, fs, "md5") -+ assert fs.info(subdir).get("sha256") is None -+ _, _, obj = build(dvc.odb.local, subdir, fs, "sha256") - assert obj.hash_info == HashInfo( -- "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir" -+ "sha256", "af314506f1622d107e0ed3f14ec1a3b5.dir" - ) -- assert fs.info(posixpath.join(subdir, "data")).get("md5") is None -- _, _, obj = build(dvc.odb.local, posixpath.join(subdir, "data"), fs, "md5") -- assert obj.hash_info == HashInfo("md5", "8d777f385d3dfec8815d20f7496026dc") -+ assert fs.info(posixpath.join(subdir, "data")).get("sha256") is None -+ _, _, obj = build(dvc.odb.local, posixpath.join(subdir, "data"), fs, "sha256") -+ assert obj.hash_info == HashInfo("sha256", "8d777f385d3dfec8815d20f7496026dc") - (tmp_dir / "dir" / "subdir" / "data").unlink() - assert ( -- fs.info(posixpath.join(subdir, "data"))["md5"] -+ fs.info(posixpath.join(subdir, "data"))["sha256"] - == "8d777f385d3dfec8815d20f7496026dc" - ) - -@@ -559,9 +559,9 @@ def test_get_hash_mixed_dir(tmp_dir, scm, dvc): - tmp_dir.scm.commit("add dir") - - fs = DvcFileSystem(repo=dvc) -- _, _, obj = build(dvc.odb.local, "dir", fs, "md5") -+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256") - assert obj.hash_info == HashInfo( -- "md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir" -+ "sha256", "e1d9e8eae5374860ae025ec84cfd85c7.dir" - ) - - -@@ -570,28 +570,28 @@ def test_get_hash_dirty_file(tmp_dir, dvc): - from dvc_data.hashfile.hash import hash_file - - tmp_dir.dvc_gen("file", "file") -- file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac") -+ file_hash_info = HashInfo("sha256", "8c7dd922ad47494fc02c388e12c00eac") - - (tmp_dir / "file").write_text("something") -- something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f") -+ something_hash_info = HashInfo("sha256", "437b930db84b8079c2dd804a71936b5f") - - # file is modified in workspace - # hash_file(file) should return workspace hash, not DVC cached hash - fs = DvcFileSystem(repo=dvc) -- assert fs.info("file").get("md5") is None -- staging, _, obj = build(dvc.odb.local, "file", fs, "md5") -+ assert fs.info("file").get("sha256") is None -+ staging, _, obj = build(dvc.odb.local, "file", fs, "sha256") - assert obj.hash_info == something_hash_info - check(staging, obj) - - # hash_file(file) should return DVC cached hash - (tmp_dir / "file").unlink() -- assert fs.info("file")["md5"] == file_hash_info.value -- _, hash_info = hash_file("file", fs, "md5", state=dvc.state) -+ assert fs.info("file")["sha256"] == file_hash_info.value -+ _, hash_info = hash_file("file", fs, "sha256", state=dvc.state) - assert hash_info == file_hash_info - - # tmp_dir/file can be built even though it is missing in workspace since - # repofs will use the DVC cached hash (and refer to the local cache object) -- _, _, obj = build(dvc.odb.local, "file", fs, "md5") -+ _, _, obj = build(dvc.odb.local, "file", fs, "sha256") - assert obj.hash_info == file_hash_info - - -@@ -600,9 +600,9 @@ def test_get_hash_dirty_dir(tmp_dir, dvc): - (tmp_dir / "dir" / "baz").write_text("baz") - - fs = DvcFileSystem(repo=dvc) -- _, meta, obj = build(dvc.odb.local, "dir", fs, "md5") -+ _, meta, obj = build(dvc.odb.local, "dir", fs, "sha256") - assert obj.hash_info == HashInfo( -- "md5", "ba75a2162ca9c29acecb7957105a0bc2.dir" -+ "sha256", "ba75a2162ca9c29acecb7957105a0bc2.dir" - ) - assert meta.nfiles == 3 - -diff --git a/tests/unit/output/test_local.py b/tests/unit/output/test_local.py -index 5d1ca10c..33ba3b46 100644 ---- a/tests/unit/output/test_local.py -+++ b/tests/unit/output/test_local.py -@@ -64,12 +64,12 @@ class TestGetFilesNumber(TestDvc): - - def test_return_multiple_for_dir(self): - o = self._get_output() -- o.hash_info = HashInfo("md5", "12345678.dir") -+ o.hash_info = HashInfo("sha256", "12345678.dir") - o.meta = Meta(nfiles=2) - self.assertEqual(2, o.get_files_number()) - - @patch.object(Output, "is_dir_checksum", False) - def test_return_1_on_single_file_cache(self): - o = self._get_output() -- o.hash_info = HashInfo("md5", "12345678") -+ o.hash_info = HashInfo("sha256", "12345678") - self.assertEqual(1, o.get_files_number()) -diff --git a/tests/unit/output/test_output.py b/tests/unit/output/test_output.py -index e7c0dcef..4e466d63 100644 ---- a/tests/unit/output/test_output.py -+++ b/tests/unit/output/test_output.py -@@ -31,7 +31,7 @@ def test_save_missing(dvc, mocker): - ( - "3cc286c534a71504476da009ed174423", - "3cc286c534a71504476da009ed174423", -- ), # md5 -+ ), # sha256 - ( - "d41d8cd98f00b204e9800998ecf8427e-38", - "d41d8cd98f00b204e9800998ecf8427e-38", -diff --git a/tests/unit/repo/test_repo.py b/tests/unit/repo/test_repo.py -index efb1ead4..7d1b7406 100644 ---- a/tests/unit/repo/test_repo.py -+++ b/tests/unit/repo/test_repo.py -@@ -48,8 +48,8 @@ def test_used_objs(tmp_dir, dvc, path): - tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}, "other": "other"}}) - - expected = { -- HashInfo("md5", "70922d6bf66eb073053a82f77d58c536.dir"), -- HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac"), -+ HashInfo("sha256", "70922d6bf66eb073053a82f77d58c536.dir"), -+ HashInfo("sha256", "8c7dd922ad47494fc02c388e12c00eac"), - } - - used = set() -diff --git a/tests/unit/stage/test_loader_pipeline_file.py b/tests/unit/stage/test_loader_pipeline_file.py -index 5ef37201..83a00b49 100644 ---- a/tests/unit/stage/test_loader_pipeline_file.py -+++ b/tests/unit/stage/test_loader_pipeline_file.py -@@ -20,8 +20,8 @@ def stage_data(): - def lock_data(): - return { - "cmd": "command", -- "deps": [{"path": "foo", "md5": "foo_checksum"}], -- "outs": [{"path": "bar", "md5": "bar_checksum"}], -+ "deps": [{"path": "foo", "sha256": "foo_checksum"}], -+ "outs": [{"path": "bar", "sha256": "bar_checksum"}], - } - - -@@ -35,8 +35,8 @@ def test_fill_from_lock_deps_outs(dvc, lock_data): - - StageLoader.fill_from_lock(stage, lock_data) - -- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - - - def test_fill_from_lock_outs_isexec(dvc): -@@ -48,12 +48,12 @@ def test_fill_from_lock_outs_isexec(dvc): - stage, - { - "cmd": "command", -- "outs": [{"path": "foo", "md5": "foo_checksum", "isexec": True}], -+ "outs": [{"path": "foo", "sha256": "foo_checksum", "isexec": True}], - }, - ) - - assert stage.outs[0].def_path == "foo" -- assert stage.outs[0].hash_info == HashInfo("md5", "foo_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "foo_checksum") - assert stage.outs[0].meta.isexec - - -@@ -118,8 +118,8 @@ def test_fill_from_lock_missing_checksums(dvc, lock_data): - - StageLoader.fill_from_lock(stage, lock_data) - -- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - assert not stage.deps[1].hash_info and not stage.outs[1].hash_info - - -@@ -134,7 +134,7 @@ def test_fill_from_lock_use_appropriate_checksum(dvc, lock_data): - lock_data["deps"] = [{"path": "s3://dvc-temp/foo", "etag": "e-tag"}] - StageLoader.fill_from_lock(stage, lock_data) - assert stage.deps[0].hash_info == HashInfo("etag", "e-tag") -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - - - def test_fill_from_lock_with_missing_sections(dvc, lock_data): -@@ -145,12 +145,12 @@ def test_fill_from_lock_with_missing_sections(dvc, lock_data): - del lock["deps"] - StageLoader.fill_from_lock(stage, lock) - assert not stage.deps[0].hash_info -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - - lock = deepcopy(lock_data) - del lock["outs"] - StageLoader.fill_from_lock(stage, lock) -- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") -+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum") - assert not stage.outs[0].hash_info - - -@@ -173,9 +173,9 @@ def test_load_stage(dvc, stage_data, lock_data): - assert stage.cmd == "command" - assert stage.path == os.path.abspath(PIPELINE_FILE) - assert stage.deps[0].def_path == "foo" -- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum") -+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum") - assert stage.outs[0].def_path == "bar" -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - - - def test_load_stage_cmd_with_list(dvc, stage_data, lock_data): -@@ -210,8 +210,8 @@ def test_load_stage_with_params(dvc, stage_data, lock_data): - assert deps[0].def_path == "foo" and stage.outs[0].def_path == "bar" - assert params[0].def_path == "params.yaml" - assert params[0].hash_info == HashInfo("params", {"lorem": "ipsum"}) -- assert deps[0].hash_info == HashInfo("md5", "foo_checksum") -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert deps[0].hash_info == HashInfo("sha256", "foo_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - - - @pytest.mark.parametrize("typ", ["metrics", "plots"]) -@@ -221,7 +221,7 @@ def test_load_stage_with_metrics_and_plots(dvc, stage_data, lock_data, typ): - stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data) - - assert stage.outs[0].def_path == "bar" -- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum") -+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum") - - - def test_load_changed_command(dvc, stage_data, lock_data): -diff --git a/tests/unit/stage/test_serialize_pipeline_lock.py b/tests/unit/stage/test_serialize_pipeline_lock.py -index c20fc19f..36846511 100644 ---- a/tests/unit/stage/test_serialize_pipeline_lock.py -+++ b/tests/unit/stage/test_serialize_pipeline_lock.py -@@ -31,11 +31,11 @@ def test_lock(dvc): - - def test_lock_deps(dvc): - stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs) -- stage.deps[0].hash_info = HashInfo("md5", "md-five") -+ stage.deps[0].hash_info = HashInfo("sha256", "md-five") - assert to_single_stage_lockfile(stage) == OrderedDict( - [ - ("cmd", "command"), -- ("deps", [OrderedDict([("path", "input"), ("md5", "md-five")])]), -+ ("deps", [OrderedDict([("path", "input"), ("sha256", "md-five")])]), - ] - ) - -@@ -44,16 +44,16 @@ def test_lock_deps_order(dvc): - stage = create_stage( - PipelineStage, dvc, deps=["input1", "input0"], **kwargs - ) -- stage.deps[0].hash_info = HashInfo("md5", "md-one1") -- stage.deps[1].hash_info = HashInfo("md5", "md-zer0") -+ stage.deps[0].hash_info = HashInfo("sha256", "md-one1") -+ stage.deps[1].hash_info = HashInfo("sha256", "md-zer0") - assert to_single_stage_lockfile(stage) == OrderedDict( - [ - ("cmd", "command"), - ( - "deps", - [ -- OrderedDict([("path", "input0"), ("md5", "md-zer0")]), -- OrderedDict([("path", "input1"), ("md5", "md-one1")]), -+ OrderedDict([("path", "input0"), ("sha256", "md-zer0")]), -+ OrderedDict([("path", "input1"), ("sha256", "md-one1")]), - ], - ), - ] -@@ -142,11 +142,11 @@ def test_lock_params_without_targets(dvc, info, expected): - @pytest.mark.parametrize("typ", ["plots", "metrics", "outs"]) - def test_lock_outs(dvc, typ): - stage = create_stage(PipelineStage, dvc, **{typ: ["input"]}, **kwargs) -- stage.outs[0].hash_info = HashInfo("md5", "md-five") -+ stage.outs[0].hash_info = HashInfo("sha256", "md-five") - assert to_single_stage_lockfile(stage) == OrderedDict( - [ - ("cmd", "command"), -- ("outs", [OrderedDict([("path", "input"), ("md5", "md-five")])]), -+ ("outs", [OrderedDict([("path", "input"), ("sha256", "md-five")])]), - ] - ) - -@@ -154,7 +154,7 @@ def test_lock_outs(dvc, typ): - @pytest.mark.parametrize("typ", ["plots", "metrics", "outs"]) - def test_lock_outs_isexec(dvc, typ): - stage = create_stage(PipelineStage, dvc, **{typ: ["input"]}, **kwargs) -- stage.outs[0].hash_info = HashInfo("md5", "md-five") -+ stage.outs[0].hash_info = HashInfo("sha256", "md-five") - stage.outs[0].meta.isexec = True - assert to_single_stage_lockfile(stage) == OrderedDict( - [ -@@ -165,7 +165,7 @@ def test_lock_outs_isexec(dvc, typ): - OrderedDict( - [ - ("path", "input"), -- ("md5", "md-five"), -+ ("sha256", "md-five"), - ("isexec", True), - ] - ) -@@ -180,16 +180,16 @@ def test_lock_outs_order(dvc, typ): - stage = create_stage( - PipelineStage, dvc, **{typ: ["input1", "input0"]}, **kwargs - ) -- stage.outs[0].hash_info = HashInfo("md5", "md-one1") -- stage.outs[1].hash_info = HashInfo("md5", "md-zer0") -+ stage.outs[0].hash_info = HashInfo("sha256", "md-one1") -+ stage.outs[1].hash_info = HashInfo("sha256", "md-zer0") - assert to_single_stage_lockfile(stage) == OrderedDict( - [ - ("cmd", "command"), - ( - "outs", - [ -- OrderedDict([("path", "input0"), ("md5", "md-zer0")]), -- OrderedDict([("path", "input1"), ("md5", "md-one1")]), -+ OrderedDict([("path", "input0"), ("sha256", "md-zer0")]), -+ OrderedDict([("path", "input1"), ("sha256", "md-one1")]), - ], - ), - ] -@@ -200,7 +200,7 @@ def test_dump_nondefault_hash(dvc): - stage = create_stage( - PipelineStage, dvc, deps=["s3://dvc-temp/file"], **kwargs - ) -- stage.deps[0].hash_info = HashInfo("md5", "value") -+ stage.deps[0].hash_info = HashInfo("sha256", "value") - assert to_single_stage_lockfile(stage) == OrderedDict( - [ - ("cmd", "command"), -@@ -208,7 +208,7 @@ def test_dump_nondefault_hash(dvc): - "deps", - [ - OrderedDict( -- [("path", "s3://dvc-temp/file"), ("md5", "value")] -+ [("path", "s3://dvc-temp/file"), ("sha256", "value")] - ) - ], - ), -@@ -227,23 +227,23 @@ def test_order(dvc): - ) - params, deps = split_params_deps(stage) - -- deps[0].hash_info = HashInfo("md5", "md-five") -+ deps[0].hash_info = HashInfo("sha256", "md-five") - params[0].hash_info = HashInfo("params", {"foo-param": "value"}) -- stage.outs[0].hash_info = HashInfo("md5", "md5-output") -+ stage.outs[0].hash_info = HashInfo("sha256", "sha256-output") - - assert to_single_stage_lockfile(stage) == OrderedDict( - [ - ("cmd", "command"), -- ("deps", [{"path": "input", "md5": "md-five"}]), -+ ("deps", [{"path": "input", "sha256": "md-five"}]), - ("params", {"params.yaml": {"foo-param": "value"}}), -- ("outs", [{"path": "output", "md5": "md5-output"}]), -+ ("outs", [{"path": "output", "sha256": "sha256-output"}]), - ] - ) - - - def test_to_lockfile(dvc): - stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs) -- stage.deps[0].hash_info = HashInfo("md5", "md-five") -+ stage.deps[0].hash_info = HashInfo("sha256", "md-five") - entry = to_lockfile(stage) - assert len(entry) == 1 - _Schema(LOCKFILE_STAGES_SCHEMA)(entry) -@@ -251,7 +251,7 @@ def test_to_lockfile(dvc): - "something": OrderedDict( - [ - ("cmd", "command"), -- ("deps", [{"path": "input", "md5": "md-five"}]), -+ ("deps", [{"path": "input", "sha256": "md-five"}]), - ] - ) - } -diff --git a/tests/unit/stage/test_stage.py b/tests/unit/stage/test_stage.py -index f564448a..fb6ac3d2 100644 ---- a/tests/unit/stage/test_stage.py -+++ b/tests/unit/stage/test_stage.py -@@ -10,10 +10,10 @@ from dvc.stage import Stage - from dvc.stage.exceptions import StageUpdateError - - TEST_STAGE_DICT = { -- "md5": "123456", -+ "sha256": "123456", - "cmd": "mycmd", -- "outs": [{"path": "a", "md5": "123456789"}], -- "deps": [{"path": "b", "md5": "987654321"}], -+ "outs": [{"path": "a", "sha256": "123456789"}], -+ "deps": [{"path": "b", "sha256": "987654321"}], - } - - -@@ -21,7 +21,7 @@ def test_stage_checksum(mocker): - stage = Stage(None, "path", cmd="mycmd") - - mocker.patch.object(stage, "dumpd", return_value=TEST_STAGE_DICT) -- assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b" -+ assert stage.compute_sha256() == "e9521a22111493406ea64a88cda63e0b" - - - def test_wdir_default_ignored(mocker): -@@ -29,7 +29,7 @@ def test_wdir_default_ignored(mocker): - d = dict(TEST_STAGE_DICT, wdir=".") - - mocker.patch.object(stage, "dumpd", return_value=d) -- assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b" -+ assert stage.compute_sha256() == "e9521a22111493406ea64a88cda63e0b" - - - def test_wdir_non_default_is_not_ignored(mocker): -@@ -37,7 +37,7 @@ def test_wdir_non_default_is_not_ignored(mocker): - d = dict(TEST_STAGE_DICT, wdir="..") - - mocker.patch.object(stage, "dumpd", return_value=d) -- assert stage.compute_md5() == "2ceba15e87f6848aa756502c1e6d24e9" -+ assert stage.compute_sha256() == "2ceba15e87f6848aa756502c1e6d24e9" - - - def test_meta_ignored(mocker): -@@ -45,7 +45,7 @@ def test_meta_ignored(mocker): - d = dict(TEST_STAGE_DICT, meta={"author": "Suor"}) - - mocker.patch.object(stage, "dumpd", return_value=d) -- assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b" -+ assert stage.compute_sha256() == "e9521a22111493406ea64a88cda63e0b" - - - def test_path_conversion(dvc): -diff --git a/tests/unit/test_hashinfo.py b/tests/unit/test_hashinfo.py -index c7da09f3..776cb371 100644 ---- a/tests/unit/test_hashinfo.py -+++ b/tests/unit/test_hashinfo.py -@@ -3,15 +3,15 @@ from dvc_data.hashfile.hash_info import HashInfo - - def test_as_raw(): - hash_info = HashInfo( -- "md5", "a1d0c6e83f027327d8461063f4ac58a6.dir", "objname" -+ "sha256", "a1d0c6e83f027327d8461063f4ac58a6.dir", "objname" - ) - - raw = hash_info.as_raw() - -- assert hash_info.name == "md5" -+ assert hash_info.name == "sha256" - assert hash_info.value == "a1d0c6e83f027327d8461063f4ac58a6.dir" - assert hash_info.obj_name == "objname" - -- assert raw.name == "md5" -+ assert raw.name == "sha256" - assert raw.value == "a1d0c6e83f027327d8461063f4ac58a6" - assert raw.obj_name == "objname" -diff --git a/tests/unit/test_lockfile.py b/tests/unit/test_lockfile.py -index ff42a775..831f9c45 100644 ---- a/tests/unit/test_lockfile.py -+++ b/tests/unit/test_lockfile.py -@@ -31,8 +31,8 @@ def test_stage_dump_with_deps_and_outs(tmp_dir, dvc): - data = { - "s1": { - "cmd": "command", -- "deps": [{"md5": "1.txt", "path": "checksum"}], -- "outs": [{"md5": "2.txt", "path": "checksum"}], -+ "deps": [{"sha256": "1.txt", "path": "checksum"}], -+ "outs": [{"sha256": "2.txt", "path": "checksum"}], - } - } - (tmp_dir / "path.lock").dump(data) -@@ -70,11 +70,11 @@ def test_load_when_lockfile_does_not_exist(tmp_dir, dvc): - "s1": { - "cmd": "command", - "outs": [ -- {"md5": "checksum", "path": "path", "random": "value"} -+ {"sha256": "checksum", "path": "path", "random": "value"} - ], - } - }, -- {"s1": {"cmd": "command", "deps": [{"md5": "checksum"}]}}, -+ {"s1": {"cmd": "command", "deps": [{"sha256": "checksum"}]}}, - ], - ) - def test_load_when_lockfile_is_corrupted(tmp_dir, dvc, corrupt_data): diff --git a/patches/base/dvc/no-analytics.patch b/patches/base/dvc/no-analytics.patch index 532e0db..817fab9 100644 --- a/patches/base/dvc/no-analytics.patch +++ b/patches/base/dvc/no-analytics.patch @@ -1,14 +1,9 @@ -commit de4f3a29629628c24ca9b69533c83b571c92c73f -Author: Max -Date: Sat Dec 17 13:47:49 2022 +0100 - - no analytics for 2.17.0 - diff --git a/dvc/analytics.py b/dvc/analytics.py -index af4823ea..7effc0b8 100644 +deleted file mode 100644 +index 6e3dc91..0000000 --- a/dvc/analytics.py -+++ b/dvc/analytics.py -@@ -1,12 +1,3 @@ ++++ /dev/null +@@ -1,156 +0,0 @@ -import json -import logging -import os @@ -18,13 +13,18 @@ index af4823ea..7effc0b8 100644 -logger = logging.getLogger(__name__) - - - def collect_and_send_report(args=None, return_code=None): - """ - Collect information from the runtime/environment and the command -@@ -19,40 +10,11 @@ def collect_and_send_report(args=None, return_code=None): - report as a JSON, where the _collector_ generates it and the _sender_ - removes it after sending it. - """ +-def collect_and_send_report(args=None, return_code=None): +- """ +- Collect information from the runtime/environment and the command +- being executed into a report and send it over the network. +- +- To prevent analytics from blocking the execution of the main thread, +- sending the report is done in a separate process. +- +- The inter-process communication happens through a file containing the +- report as a JSON, where the _collector_ generates it and the _sender_ +- removes it after sending it. +- """ - import tempfile - - from dvc.daemon import daemon @@ -41,10 +41,9 @@ index af4823ea..7effc0b8 100644 - with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj: - json.dump(report, fobj) - daemon(["analytics", fobj.name]) -+ raise NotImplementedError - - - def is_enabled(): +- +- +-def is_enabled(): - from dvc.config import Config, to_bool - from dvc.utils import env2bool - @@ -54,20 +53,22 @@ index af4823ea..7effc0b8 100644 - enabled = not os.getenv(DVC_NO_ANALYTICS) - if enabled: - enabled = to_bool( -- Config(validate=False).get("core", {}).get("analytics", "true") +- Config.from_cwd(validate=False).get("core", {}).get("analytics", "true") - ) - - logger.debug("Analytics is %sabled.", "en" if enabled else "dis") - - return enabled -+ return False - - - def send(path): -@@ -63,88 +25,22 @@ def send(path): - `collect_and_send_report`. Sending happens on another process, - thus, the need of removing such file afterwards. - """ +- +- +-def send(path): +- """ +- Side effect: Removes the report after sending it. +- +- The report is generated and stored in a temporary file, see: +- `collect_and_send_report`. Sending happens on another process, +- thus, the need of removing such file afterwards. +- """ - import requests - - url = "https://analytics.dvc.org" @@ -84,14 +85,12 @@ index af4823ea..7effc0b8 100644 - logger.debug("failed to send analytics report", exc_info=True) - - os.remove(path) -+ raise NotImplementedError - - - def _scm_in_use(): -- from scmrepo.noscm import NoSCM - +- +-def _scm_in_use(): - from dvc.exceptions import NotDvcRepoError - from dvc.repo import Repo +- from dvc.scm import NoSCM - - from .scm import SCM, SCMError - @@ -102,27 +101,34 @@ index af4823ea..7effc0b8 100644 - return NoSCM.__name__ - except NotDvcRepoError: - pass -+ raise NotImplementedError - - - def _runtime_info(): - """ - Gather information from the environment where DVC runs to fill a report. - """ +- +- +-def _runtime_info(): +- """ +- Gather information from the environment where DVC runs to fill a report. +- """ +- from iterative_telemetry import _generate_ci_id, find_or_create_user_id +- - from dvc import __version__ - from dvc.utils import is_binary - +- ci_id = _generate_ci_id() +- if ci_id: +- group_id, user_id = ci_id +- else: +- group_id, user_id = None, find_or_create_user_id() +- - return { - "dvc_version": __version__, - "is_binary": is_binary(), - "scm_class": _scm_in_use(), - "system_info": _system_info(), -- "user_id": _find_or_create_user_id(), +- "user_id": user_id, +- "group_id": group_id, - } -+ raise NotImplementedError - - - def _system_info(): +- +- +-def _system_info(): - import platform - import sys - @@ -131,7 +137,7 @@ index af4823ea..7effc0b8 100644 - system = platform.system() - - if system == "Windows": -- version = sys.getwindowsversion() +- version = sys.getwindowsversion() # type: ignore[attr-defined] - - return { - "os": "windows", @@ -153,50 +159,12 @@ index af4823ea..7effc0b8 100644 - } - - # We don't collect data for any other system. -+ # We don't collect data :) - raise NotImplementedError - - -@@ -158,33 +54,4 @@ def _find_or_create_user_id(): - - IDs are generated randomly with UUID. - """ -- import uuid -- -- from dvc.config import Config -- from dvc.lock import Lock, LockError -- from dvc.utils.fs import makedirs -- -- config_dir = Config.get_dir("global") -- fname = os.path.join(config_dir, "user_id") -- lockfile = os.path.join(config_dir, "user_id.lock") -- -- # Since the `fname` and `lockfile` are under the global config, -- # we need to make sure such directory exist already. -- makedirs(config_dir, exist_ok=True) -- -- try: -- with Lock(lockfile): -- try: -- with open(fname, encoding="utf-8") as fobj: -- user_id = json.load(fobj)["user_id"] -- -- except (FileNotFoundError, ValueError, KeyError): -- user_id = str(uuid.uuid4()) -- -- with open(fname, "w", encoding="utf-8") as fobj: -- json.dump({"user_id": user_id}, fobj) -- -- return user_id -- -- except LockError: -- logger.debug("Failed to acquire '%s'", lockfile) -+ raise NotImplementedError +- raise NotImplementedError diff --git a/dvc/cli/__init__.py b/dvc/cli/__init__.py -index e206befd..590b0790 100644 +index 274b564..b601d84 100644 --- a/dvc/cli/__init__.py +++ b/dvc/cli/__init__.py -@@ -211,11 +211,6 @@ def main(argv=None): # noqa: C901 +@@ -236,11 +236,6 @@ def main(argv=None): # noqa: C901, PLR0912, PLR0915 ret = _log_exceptions(exc) or 255 try: @@ -207,9 +175,9 @@ index e206befd..590b0790 100644 - return ret finally: - logger.setLevel(outerLogLevel) + logger.setLevel(outer_log_level) diff --git a/dvc/commands/daemon.py b/dvc/commands/daemon.py -index 2a22de3d..d64a6404 100644 +index 35d6e90..d5a7b6e 100644 --- a/dvc/commands/daemon.py +++ b/dvc/commands/daemon.py @@ -26,15 +26,6 @@ class CmdDaemonUpdater(CmdDaemonBase): @@ -245,7 +213,7 @@ index 2a22de3d..d64a6404 100644 - ).complete = completion.FILE - daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics) diff --git a/dvc/commands/init.py b/dvc/commands/init.py -index 3d0c774b..19b86b50 100644 +index ca44919..05730aa 100644 --- a/dvc/commands/init.py +++ b/dvc/commands/init.py @@ -3,7 +3,6 @@ import logging @@ -274,10 +242,10 @@ index 3d0c774b..19b86b50 100644 "{yellow}What's next?{nc}\n" "{yellow}------------{nc}\n" diff --git a/dvc/config_schema.py b/dvc/config_schema.py -index bd514c61..e358b949 100644 +index 2e36e90..3d9e402 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py -@@ -114,7 +114,6 @@ SCHEMA = { +@@ -144,7 +144,6 @@ SCHEMA = { "remote": Lower, "checksum_jobs": All(Coerce(int), Range(1)), Optional("interactive", default=False): Bool, @@ -285,3 +253,15 @@ index bd514c61..e358b949 100644 Optional("hardlink_lock", default=False): Bool, Optional("no_scm", default=False): Bool, Optional("autostage", default=False): Bool, +diff --git a/dvc/env.py b/dvc/env.py +index 081ec9d..06c1332 100644 +--- a/dvc/env.py ++++ b/dvc/env.py +@@ -7,7 +7,6 @@ DVC_EXP_GIT_REMOTE = "DVC_EXP_GIT_REMOTE" + DVC_EXP_NAME = "DVC_EXP_NAME" + DVC_GLOBAL_CONFIG_DIR = "DVC_GLOBAL_CONFIG_DIR" + DVC_IGNORE_ISATTY = "DVC_IGNORE_ISATTY" +-DVC_NO_ANALYTICS = "DVC_NO_ANALYTICS" + DVC_PAGER = "DVC_PAGER" + DVC_ROOT = "DVC_ROOT" + DVC_SHOW_TRACEBACK = "DVC_SHOW_TRACEBACK" diff --git a/patches/base/dvc/yaml-to-json.patch b/patches/base/dvc/yaml-to-json.patch deleted file mode 100644 index 640a0d5..0000000 --- a/patches/base/dvc/yaml-to-json.patch +++ /dev/null @@ -1,127 +0,0 @@ -commit eceb8d19ba9da3c7d07fc5a12636027d499a3a06 -Author: Max -Date: Sat Dec 17 13:57:49 2022 +0100 - - yaml to json for 2.17.0 - -diff --git a/dvc/dvcfile.py b/dvc/dvcfile.py -index 04db6d5f..4eb40e90 100644 ---- a/dvc/dvcfile.py -+++ b/dvc/dvcfile.py -@@ -14,7 +14,7 @@ from dvc.stage.exceptions import ( - from dvc.types import AnyPath - from dvc.utils import relpath - from dvc.utils.collections import apply_diff --from dvc.utils.serialize import dump_yaml, modify_yaml -+from dvc.utils.serialize import dump_json, modify_json - - if TYPE_CHECKING: - from dvc.repo import Repo -@@ -24,7 +24,7 @@ _T = TypeVar("_T") - - DVC_FILE = "Dvcfile" - DVC_FILE_SUFFIX = ".dvc" --PIPELINE_FILE = "dvc.yaml" -+PIPELINE_FILE = "dvc.json" - PIPELINE_LOCK = "dvc.lock" - - -@@ -147,7 +147,7 @@ class FileMixin: - raise StageFileIsNotDvcFileError(self.path) - - self._check_gitignored() -- return self._load_yaml(**kwargs) -+ return self._load_json(**kwargs) - - @classmethod - def validate(cls, d: _T, fname: str = None) -> _T: -@@ -155,7 +155,7 @@ class FileMixin: - - return validate(d, cls.SCHEMA, path=fname) # type: ignore[arg-type] - -- def _load_yaml(self, **kwargs: Any) -> Tuple[Any, str]: -+ def _load_json(self, **kwargs: Any) -> Tuple[Any, str]: - from dvc.utils import strictyaml - - return strictyaml.load( -@@ -198,7 +198,7 @@ class SingleStageFile(FileMixin): - if self.verify: - check_dvcfile_path(self.repo, self.path) - logger.debug("Saving information to '%s'.", relpath(self.path)) -- dump_yaml(self.path, serialize.to_single_stage_file(stage)) -+ dump_json(self.path, serialize.to_single_stage_file(stage)) - self.repo.scm_context.track_file(self.relpath) - - def remove_stage(self, stage): # pylint: disable=unused-argument -@@ -214,7 +214,7 @@ class SingleStageFile(FileMixin): - - - class PipelineFile(FileMixin): -- """Abstraction for pipelines file, .yaml + .lock combined.""" -+ """Abstraction for pipelines file, .json + .lock combined.""" - - from dvc.schema import COMPILED_MULTI_STAGE_SCHEMA as SCHEMA - from dvc.stage.loader import StageLoader as LOADER -@@ -251,7 +251,7 @@ class PipelineFile(FileMixin): - self._check_if_parametrized(stage) - stage_data = serialize.to_pipeline_file(stage) - -- with modify_yaml(self.path, fs=self.repo.fs) as data: -+ with modify_json(self.path, fs=self.repo.fs) as data: - if not data: - logger.info("Creating '%s'", self.relpath) - -@@ -295,7 +295,7 @@ class PipelineFile(FileMixin): - if not self.exists(): - return - -- d, _ = self._load_yaml(round_trip=True) -+ d, _ = self._load_json(round_trip=True) - if stage.name not in d.get("stages", {}): - return - -@@ -303,7 +303,7 @@ class PipelineFile(FileMixin): - del d["stages"][stage.name] - - if d["stages"]: -- dump_yaml(self.path, d) -+ dump_json(self.path, d) - else: - super().remove() - -@@ -365,7 +365,7 @@ class Lockfile(FileMixin): - def dump(self, stage, **kwargs): - stage_data = serialize.to_lockfile(stage) - -- with modify_yaml(self.path, fs=self.repo.fs) as data: -+ with modify_json(self.path, fs=self.repo.fs) as data: - version = LOCKFILE_VERSION.from_dict(data) - if version == LOCKFILE_VERSION.V1: - logger.info( -@@ -394,7 +394,7 @@ class Lockfile(FileMixin): - if not self.exists(): - return - -- d, _ = self._load_yaml(round_trip=True) -+ d, _ = self._load_json(round_trip=True) - version = LOCKFILE_VERSION.from_dict(d) - data = d if version == LOCKFILE_VERSION.V1 else d.get("stages", {}) - if stage.name not in data: -@@ -404,7 +404,7 @@ class Lockfile(FileMixin): - del data[stage.name] - - if data: -- dump_yaml(self.path, d) -+ dump_json(self.path, d) - else: - self.remove() - -@@ -425,7 +425,7 @@ DVCFile = Union["PipelineFile", "SingleStageFile"] - - def make_dvcfile(repo: "Repo", path: AnyPath, **kwargs: Any) -> DVCFile: - _, ext = os.path.splitext(str(path)) -- if ext in [".yaml", ".yml"]: -+ if ext in [".json", ".yml"]: - return PipelineFile(repo, path, **kwargs) - # fallback to single stage file for better error messages - return SingleStageFile(repo, path, **kwargs)