depot/patches/base/dvc/md5-to-sha256.patch

1894 lines
70 KiB
Diff
Raw Normal View History

2022-12-17 14:52:18 +02:00
commit 31347d4f51e60c708cb7baf8cb3360c7cdbda2e2
Author: Max <max@privatevoid.net>
Date: Sat Dec 17 13:39:54 2022 +0100
md5 to sha256 for 2.17.0
diff --git a/dvc/api/data.py b/dvc/api/data.py
index a063612f..71fd715b 100644
--- a/dvc/api/data.py
+++ b/dvc/api/data.py
@@ -28,9 +28,9 @@ def get_url(path, repo=None, rev=None, remote=None):
raise OutputNotFoundError(path, repo)
dvc_repo = info["repo"]
- md5 = dvc_info["md5"]
+ sha256 = dvc_info["sha256"]
- return dvc_repo.cloud.get_url_for(remote, checksum=md5)
+ return dvc_repo.cloud.get_url_for(remote, checksum=sha256)
class _OpenContextManager(GCM):
diff --git a/dvc/fs/data.py b/dvc/fs/data.py
index c3612aed..dec0d386 100644
--- a/dvc/fs/data.py
+++ b/dvc/fs/data.py
@@ -14,7 +14,7 @@ logger = logging.getLogger(__name__)
class DataFileSystem(FileSystem):
protocol = "local"
2022-08-05 15:27:14 +03:00
- PARAM_CHECKSUM = "md5"
+ PARAM_CHECKSUM = "sha256"
2022-12-17 14:52:18 +02:00
def _prepare_credentials(self, **config):
return config
diff --git a/dvc/fs/dvc.py b/dvc/fs/dvc.py
index 87a9ad50..fc997e9f 100644
--- a/dvc/fs/dvc.py
+++ b/dvc/fs/dvc.py
@@ -56,8 +56,8 @@ def _merge_info(repo, fs_info, dvc_info):
ret["dvc_info"] = dvc_info
ret["type"] = dvc_info["type"]
ret["size"] = dvc_info["size"]
- if not fs_info and "md5" in dvc_info:
- ret["md5"] = dvc_info["md5"]
+ if not fs_info and "sha256" in dvc_info:
+ ret["sha256"] = dvc_info["sha256"]
if fs_info:
ret["type"] = fs_info["type"]
@@ -443,7 +443,7 @@ class _DvcFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
class DvcFileSystem(FileSystem):
protocol = "local"
2022-08-05 15:27:14 +03:00
- PARAM_CHECKSUM = "md5"
+ PARAM_CHECKSUM = "sha256"
2022-12-17 14:52:18 +02:00
def _prepare_credentials(self, **config):
return config
2022-08-05 15:27:14 +03:00
diff --git a/dvc/lock.py b/dvc/lock.py
index 3360001c..706a1f10 100644
--- a/dvc/lock.py
+++ b/dvc/lock.py
@@ -181,7 +181,7 @@ class HardlinkLock(flufl.lock.Lock, LockBase):
if self._tmp_dir is not None:
# Under Windows file path length is limited so we hash it
- filename = hashlib.md5(self._claimfile.encode()).hexdigest()
+ filename = hashlib.sha256(self._claimfile.encode()).hexdigest()
self._claimfile = os.path.join(self._tmp_dir, filename + ".lock")
diff --git a/dvc/output.py b/dvc/output.py
2022-12-17 14:52:18 +02:00
index fb7c0be5..f763468b 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/output.py
+++ b/dvc/output.py
2022-12-17 14:52:18 +02:00
@@ -59,7 +59,7 @@ CASE_SENSITIVE_CHECKSUM_SCHEMA = Any(
2022-08-05 15:27:14 +03:00
# NOTE: currently there are only 3 possible checksum names:
#
-# 1) md5 (LOCAL, SSH);
+# 1) sha256 (LOCAL, SSH);
# 2) etag (S3, GS, OSS, AZURE, HTTP);
# 3) checksum (HDFS);
#
2022-12-17 14:52:18 +02:00
@@ -844,7 +844,7 @@ class Output:
2022-08-05 15:27:14 +03:00
odb,
from_info,
from_fs,
- "md5",
+ "sha256",
upload=upload,
no_progress_bar=no_progress_bar,
2022-12-17 14:52:18 +02:00
)
2022-08-05 15:27:14 +03:00
diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py
2022-12-17 14:52:18 +02:00
index eb63e41b..b084cd74 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/repo/__init__.py
+++ b/dvc/repo/__init__.py
2022-12-17 14:52:18 +02:00
@@ -117,7 +117,7 @@ class Repo:
2022-08-05 15:27:14 +03:00
def _get_database_dir(self, db_name):
# NOTE: by default, store SQLite-based remote indexes and state's
- # `links` and `md5s` caches in the repository itself to avoid any
+ # `links` and `sha256s` caches in the repository itself to avoid any
# possible state corruption in 'shared cache dir' scenario, but allow
# user to override this through config when, say, the repository is
# located on a mounted volume — see
2022-12-17 14:52:18 +02:00
@@ -513,7 +513,7 @@ class Repo:
try:
if remote:
remote_odb = self.cloud.get_remote_odb(name=remote)
- oid = fs.info(fs_path)["dvc_info"]["md5"]
+ oid = fs.info(fs_path)["dvc_info"]["sha256"]
fs = remote_odb.fs
fs_path = remote_odb.oid_to_path(oid)
2022-08-05 15:27:14 +03:00
diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py
2022-12-17 14:52:18 +02:00
index 648a837e..f4264496 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/repo/diff.py
+++ b/dvc/repo/diff.py
2022-12-17 14:52:18 +02:00
@@ -143,7 +143,7 @@ def _output_paths(repo, targets):
repo.odb.repo,
2022-08-05 15:27:14 +03:00
output.fs_path,
2022-12-17 14:52:18 +02:00
repo.odb.repo.fs,
2022-08-05 15:27:14 +03:00
- "md5",
+ "sha256",
dry_run=True,
2022-12-17 14:52:18 +02:00
ignore=output.dvcignore,
2022-08-05 15:27:14 +03:00
)
diff --git a/dvc/repo/imp_url.py b/dvc/repo/imp_url.py
2022-12-17 14:52:18 +02:00
index 35a684f6..ed6328f9 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/repo/imp_url.py
+++ b/dvc/repo/imp_url.py
@@ -78,7 +78,7 @@ def imp_url(
remote_odb = self.cloud.get_remote_odb(remote, "import-url")
stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs)
stage.save_deps()
- stage.md5 = stage.compute_md5()
+ stage.sha256 = stage.compute_sha256()
else:
stage.run(jobs=jobs)
diff --git a/dvc/repo/index.py b/dvc/repo/index.py
2022-12-17 14:52:18 +02:00
index 9e3fa1a0..a6919abd 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/repo/index.py
+++ b/dvc/repo/index.py
@@ -13,7 +13,7 @@ from typing import (
from funcy import cached_property, nullcontext
-from dvc.utils import dict_md5
+from dvc.utils import dict_sha256
if TYPE_CHECKING:
from networkx import DiGraph
2022-12-17 14:52:18 +02:00
@@ -308,7 +308,7 @@ class Index:
2022-08-05 15:27:14 +03:00
Currently, it is unique to the platform (windows vs posix).
"""
- return dict_md5(self.dumpd())
+ return dict_sha256(self.dumpd())
if __name__ == "__main__":
diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py
2022-12-17 14:52:18 +02:00
index 5f4f02f7..f763d4eb 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/stage/__init__.py
+++ b/dvc/stage/__init__.py
@@ -28,7 +28,7 @@ from .utils import (
check_missing_outputs,
check_no_externals,
check_stage_path,
- compute_md5,
+ compute_sha256,
fill_stage_dependencies,
fill_stage_outputs,
get_dump,
2022-12-17 14:52:18 +02:00
@@ -133,7 +133,7 @@ class Stage(params.StageParams):
2022-08-05 15:27:14 +03:00
wdir=os.curdir,
deps=None,
outs=None,
- md5=None,
+ sha256=None,
locked=False, # backward compatibility
frozen=False,
always_changed=False,
2022-12-17 14:52:18 +02:00
@@ -153,7 +153,7 @@ class Stage(params.StageParams):
2022-08-05 15:27:14 +03:00
self.wdir = wdir
self.outs = outs
self.deps = deps
- self.md5 = md5
+ self.sha256 = sha256
self.frozen = locked or frozen
self.always_changed = always_changed
self._stage_text = stage_text
2022-12-17 14:52:18 +02:00
@@ -347,7 +347,7 @@ class Stage(params.StageParams):
2022-08-05 15:27:14 +03:00
return False
def changed_stage(self):
- changed = self.md5 != self.compute_md5()
+ changed = self.sha256 != self.compute_sha256()
if changed:
logger.debug(self._changed_stage_entry())
return changed
2022-12-17 14:52:18 +02:00
@@ -355,7 +355,7 @@ class Stage(params.StageParams):
2022-08-05 15:27:14 +03:00
@rwlocked(read=["deps", "outs"])
def changed(self):
is_changed = (
- # Short-circuit order: stage md5 is fast,
+ # Short-circuit order: stage sha256 is fast,
# deps are expected to change
self.changed_stage()
or self.changed_deps()
2022-12-17 14:52:18 +02:00
@@ -446,19 +446,19 @@ class Stage(params.StageParams):
2022-08-05 15:27:14 +03:00
def dumpd(self):
return get_dump(self)
- def compute_md5(self):
- # `dvc add`ed files don't need stage md5
+ def compute_sha256(self):
+ # `dvc add`ed files don't need stage sha256
if self.is_data_source and not (self.is_import or self.is_repo_import):
m = None
else:
- m = compute_md5(self)
2022-12-17 14:52:18 +02:00
- logger.debug("Computed %s md5: '%s'", self, m)
2022-08-05 15:27:14 +03:00
+ m = compute_sha256(self)
2022-12-17 14:52:18 +02:00
+ logger.debug("Computed %s sha256: '%s'", self, m)
2022-08-05 15:27:14 +03:00
return m
def save(self, allow_missing=False):
self.save_deps(allow_missing=allow_missing)
self.save_outs(allow_missing=allow_missing)
- self.md5 = self.compute_md5()
+ self.sha256 = self.compute_sha256()
self.repo.stage_cache.save(self)
2022-12-17 14:52:18 +02:00
@@ -491,7 +491,7 @@ class Stage(params.StageParams):
2022-08-05 15:27:14 +03:00
return [str(entry) for entry in entries if entry.workspace_status()]
def _changed_stage_entry(self):
- return f"'md5' of {self} changed."
+ return f"'sha256' of {self} changed."
def changed_entries(self):
changed_deps = self._changed_entries(self.deps)
diff --git a/dvc/stage/params.py b/dvc/stage/params.py
index c43a75b1..961a8168 100644
--- a/dvc/stage/params.py
+++ b/dvc/stage/params.py
@@ -1,5 +1,5 @@
class StageParams:
- PARAM_MD5 = "md5"
+ PARAM_MD5 = "sha256"
PARAM_CMD = "cmd"
PARAM_WDIR = "wdir"
PARAM_DEPS = "deps"
diff --git a/dvc/stage/utils.py b/dvc/stage/utils.py
2022-12-17 14:52:18 +02:00
index abd63020..f140d808 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/stage/utils.py
+++ b/dvc/stage/utils.py
2022-12-17 14:52:18 +02:00
@@ -172,26 +172,26 @@ def check_missing_outputs(stage):
2022-08-05 15:27:14 +03:00
raise MissingDataSource(paths)
-def compute_md5(stage):
+def compute_sha256(stage):
from dvc.output import Output
- from ..utils import dict_md5
+ from ..utils import dict_sha256
d = stage.dumpd()
- # Remove md5 and meta, these should not affect stage md5
+ # Remove sha256 and meta, these should not affect stage sha256
d.pop(stage.PARAM_MD5, None)
d.pop(stage.PARAM_META, None)
d.pop(stage.PARAM_DESC, None)
# Ignore the wdir default value. In this case DVC file w/o
- # wdir has the same md5 as a file with the default value specified.
+ # wdir has the same sha256 as a file with the default value specified.
# It's important for backward compatibility with pipelines that
# didn't have WDIR in their DVC files.
if d.get(stage.PARAM_WDIR) == ".":
del d[stage.PARAM_WDIR]
- return dict_md5(
+ return dict_sha256(
d,
exclude=[
stage.PARAM_LOCKED, # backward compatibility
2022-12-17 14:52:18 +02:00
@@ -226,7 +226,7 @@ def get_dump(stage):
2022-08-05 15:27:14 +03:00
key: value
for key, value in {
stage.PARAM_DESC: stage.desc,
- stage.PARAM_MD5: stage.md5,
+ stage.PARAM_MD5: stage.sha256,
stage.PARAM_CMD: stage.cmd,
stage.PARAM_WDIR: resolve_wdir(stage.wdir, stage.path),
stage.PARAM_FROZEN: stage.frozen,
diff --git a/dvc/testing/test_workspace.py b/dvc/testing/test_workspace.py
2022-12-17 14:52:18 +02:00
index 0b883a7b..088e5795 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/testing/test_workspace.py
+++ b/dvc/testing/test_workspace.py
@@ -12,14 +12,14 @@ class TestImport:
assert dvc.status() == {}
@pytest.fixture
- def stage_md5(self):
+ def stage_sha256(self):
pytest.skip()
@pytest.fixture
- def dir_md5(self):
+ def dir_sha256(self):
pytest.skip()
- def test_import_dir(self, tmp_dir, dvc, workspace, stage_md5, dir_md5):
+ def test_import_dir(self, tmp_dir, dvc, workspace, stage_sha256, dir_sha256):
2022-12-17 14:52:18 +02:00
from dvc.odbmgr import ODBManager
2022-08-05 15:27:14 +03:00
workspace.gen(
@@ -43,17 +43,17 @@ class TestImport:
assert dvc.status() == {}
- if stage_md5 is not None and dir_md5 is not None:
+ if stage_sha256 is not None and dir_sha256 is not None:
assert (tmp_dir / "dir.dvc").read_text() == (
- f"md5: {stage_md5}\n"
+ f"sha256: {stage_sha256}\n"
"frozen: true\n"
"deps:\n"
- f"- md5: {dir_md5}\n"
+ f"- sha256: {dir_sha256}\n"
" size: 11\n"
" nfiles: 2\n"
" path: remote://workspace/dir\n"
"outs:\n"
- "- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n"
+ "- sha256: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\n"
" size: 11\n"
" nfiles: 2\n"
" path: dir\n"
diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py
2022-12-17 14:52:18 +02:00
index e9c1dfa1..b3ecf520 100644
2022-08-05 15:27:14 +03:00
--- a/dvc/utils/__init__.py
+++ b/dvc/utils/__init__.py
2022-12-17 14:52:18 +02:00
@@ -43,8 +43,8 @@ def dict_hash(d, typ, exclude=()):
2022-08-05 15:27:14 +03:00
return bytes_hash(byts, typ)
-def dict_md5(d, **kwargs):
- return dict_hash(d, "md5", **kwargs)
+def dict_sha256(d, **kwargs):
+ return dict_hash(d, "sha256", **kwargs)
def dict_sha256(d, **kwargs):
diff --git a/scripts/innosetup/dvc.ico.dvc b/scripts/innosetup/dvc.ico.dvc
index e8ca30f5..78b76603 100644
--- a/scripts/innosetup/dvc.ico.dvc
+++ b/scripts/innosetup/dvc.ico.dvc
@@ -1,3 +1,3 @@
outs:
-- md5: 90104d9e83cfb825cf45507e90aadd27
+- sha256: 90104d9e83cfb825cf45507e90aadd27
path: dvc.ico
diff --git a/scripts/innosetup/dvc_left.bmp.dvc b/scripts/innosetup/dvc_left.bmp.dvc
index be60334b..c97e16f8 100644
--- a/scripts/innosetup/dvc_left.bmp.dvc
+++ b/scripts/innosetup/dvc_left.bmp.dvc
@@ -1,3 +1,3 @@
outs:
-- md5: 9106cda08aa427e73492389a0f17c72d
+- sha256: 9106cda08aa427e73492389a0f17c72d
path: dvc_left.bmp
diff --git a/scripts/innosetup/dvc_up.bmp.dvc b/scripts/innosetup/dvc_up.bmp.dvc
index 7fb5ae55..59df4a87 100644
--- a/scripts/innosetup/dvc_up.bmp.dvc
+++ b/scripts/innosetup/dvc_up.bmp.dvc
@@ -1,3 +1,3 @@
outs:
-- md5: 94614d6650e062655f9f77507dc9c1f2
+- sha256: 94614d6650e062655f9f77507dc9c1f2
path: dvc_up.bmp
diff --git a/tests/func/test_add.py b/tests/func/test_add.py
2022-12-17 14:52:18 +02:00
index b096bbf5..b2c3fa23 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_add.py
+++ b/tests/func/test_add.py
2022-12-17 14:52:18 +02:00
@@ -37,7 +37,7 @@ from dvc.testing.test_workspace import TestAdd
from dvc.utils import LARGE_DIR_SIZE, relpath
2022-08-05 15:27:14 +03:00
from dvc.utils.fs import path_isin
from dvc.utils.serialize import YAMLFileCorruptedError, load_yaml
2022-12-17 14:52:18 +02:00
-from dvc_data.hashfile.hash import file_md5
+from dvc_data.hashfile.hash import file_sha256
from dvc_data.hashfile.hash_info import HashInfo
2022-08-05 15:27:14 +03:00
from tests.basic_env import TestDvc
2022-12-17 14:52:18 +02:00
from tests.utils import get_gitignore_content
@@ -45,7 +45,7 @@ from tests.utils import get_gitignore_content
2022-08-05 15:27:14 +03:00
def test_add(tmp_dir, dvc):
(stage,) = tmp_dir.dvc_gen({"foo": "foo"})
- md5 = file_md5("foo", dvc.fs)
+ sha256 = file_sha256("foo", dvc.fs)
assert stage is not None
2022-12-17 14:52:18 +02:00
@@ -54,13 +54,13 @@ def test_add(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
assert len(stage.outs) == 1
assert len(stage.deps) == 0
assert stage.cmd is None
- assert stage.outs[0].hash_info == HashInfo("md5", md5)
- assert stage.md5 is None
+ assert stage.outs[0].hash_info == HashInfo("sha256", sha256)
+ assert stage.sha256 is None
assert (tmp_dir / "foo.dvc").parse() == {
"outs": [
{
- "md5": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "sha256": "acbd18db4cc2f85cedef654fccc4a4d8",
"path": "foo",
"size": 3,
}
2022-12-17 14:52:18 +02:00
@@ -78,7 +78,7 @@ def test_add_executable(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
assert (tmp_dir / "foo.dvc").parse() == {
"outs": [
{
- "md5": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "sha256": "acbd18db4cc2f85cedef654fccc4a4d8",
"path": "foo",
"size": 3,
"isexec": True,
2022-12-17 14:52:18 +02:00
@@ -296,7 +296,7 @@ def test_add_filtered_files_in_dir(
2022-08-05 15:27:14 +03:00
class TestAddExternal(TestAdd):
@pytest.fixture
def hash_name(self):
- return "md5"
+ return "sha256"
@pytest.fixture
def hash_value(self):
2022-12-17 14:52:18 +02:00
@@ -317,7 +317,7 @@ def test_add_external_relpath(tmp_dir, dvc, local_cloud):
2022-08-05 15:27:14 +03:00
dvc.add(rel, external=True)
assert (tmp_dir / "file.dvc").read_text() == (
"outs:\n"
- "- md5: 8c7dd922ad47494fc02c388e12c00eac\n"
+ "- sha256: 8c7dd922ad47494fc02c388e12c00eac\n"
" size: 4\n"
f" path: {rel}\n"
)
2022-12-17 14:52:18 +02:00
@@ -379,7 +379,7 @@ class TestDoubleAddUnchanged(TestDvc):
2022-08-05 15:27:14 +03:00
def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):
2022-12-17 14:52:18 +02:00
- file_md5_counter = mocker.spy(dvc_data.hashfile.hash, "file_md5")
+ file_sha256_counter = mocker.spy(dvc_data.hashfile.hash, "file_sha256")
2022-08-05 15:27:14 +03:00
tmp_dir.gen("foo", "foo")
ret = main(["config", "cache.type", "copy"])
2022-12-17 14:52:18 +02:00
@@ -387,30 +387,30 @@ def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):
2022-08-05 15:27:14 +03:00
ret = main(["add", "foo"])
assert ret == 0
- assert file_md5_counter.mock.call_count == 1
+ assert file_sha256_counter.mock.call_count == 1
ret = main(["status"])
assert ret == 0
- assert file_md5_counter.mock.call_count == 1
+ assert file_sha256_counter.mock.call_count == 1
ret = main(["run", "--single-stage", "-d", "foo", "echo foo"])
assert ret == 0
- assert file_md5_counter.mock.call_count == 1
+ assert file_sha256_counter.mock.call_count == 1
os.rename("foo", "foo.back")
ret = main(["checkout"])
assert ret == 0
- assert file_md5_counter.mock.call_count == 1
+ assert file_sha256_counter.mock.call_count == 1
ret = main(["status"])
assert ret == 0
- assert file_md5_counter.mock.call_count == 1
+ assert file_sha256_counter.mock.call_count == 1
def test_should_update_state_entry_for_directory_after_add(
mocker, dvc, tmp_dir
):
2022-12-17 14:52:18 +02:00
- file_md5_counter = mocker.spy(dvc_data.hashfile.hash, "file_md5")
+ file_sha256_counter = mocker.spy(dvc_data.hashfile.hash, "file_sha256")
2022-08-05 15:27:14 +03:00
tmp_dir.gen({"data/data": "foo", "data/data_sub/sub_data": "foo"})
2022-12-17 14:52:18 +02:00
@@ -419,27 +419,27 @@ def test_should_update_state_entry_for_directory_after_add(
2022-08-05 15:27:14 +03:00
ret = main(["add", "data"])
assert ret == 0
2022-12-17 14:52:18 +02:00
- assert file_md5_counter.mock.call_count == 5
+ assert file_sha256_counter.mock.call_count == 5
2022-08-05 15:27:14 +03:00
ret = main(["status"])
assert ret == 0
2022-12-17 14:52:18 +02:00
- assert file_md5_counter.mock.call_count == 6
+ assert file_sha256_counter.mock.call_count == 6
2022-08-05 15:27:14 +03:00
ls = "dir" if os.name == "nt" else "ls"
ret = main(
["run", "--single-stage", "-d", "data", "{} {}".format(ls, "data")]
)
assert ret == 0
2022-12-17 14:52:18 +02:00
- assert file_md5_counter.mock.call_count == 8
+ assert file_sha256_counter.mock.call_count == 8
2022-08-05 15:27:14 +03:00
os.rename("data", "data" + ".back")
ret = main(["checkout"])
assert ret == 0
2022-12-17 14:52:18 +02:00
- assert file_md5_counter.mock.call_count == 8
+ assert file_sha256_counter.mock.call_count == 8
2022-08-05 15:27:14 +03:00
ret = main(["status"])
assert ret == 0
2022-12-17 14:52:18 +02:00
- assert file_md5_counter.mock.call_count == 10
+ assert file_sha256_counter.mock.call_count == 10
2022-08-05 15:27:14 +03:00
class TestAddCommit(TestDvc):
2022-12-17 14:52:18 +02:00
@@ -917,7 +917,7 @@ def test_add_preserve_fields(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
- path: foo # out comment
desc: out desc
2022-12-17 14:52:18 +02:00
remote: testremote
2022-08-05 15:27:14 +03:00
- md5: acbd18db4cc2f85cedef654fccc4a4d8
+ sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
meta: some metadata
"""
diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py
2022-12-17 14:52:18 +02:00
index 44e636c1..69811c2e 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_checkout.py
+++ b/tests/func/test_checkout.py
2022-12-17 14:52:18 +02:00
@@ -991,7 +991,7 @@ def test_checkout_dir_compat(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
textwrap.dedent(
f"""\
outs:
- - md5: {stage.outs[0].hash_info.value}
+ - sha256: {stage.outs[0].hash_info.value}
path: data
"""
),
diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py
2022-12-17 14:52:18 +02:00
index b1b40ce4..0a067d58 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_commit.py
+++ b/tests/func/test_commit.py
2022-12-17 14:52:18 +02:00
@@ -62,7 +62,7 @@ def test_commit_preserve_fields(tmp_dir, dvc):
- path: foo # out comment
desc: out desc
remote: testremote
- md5: acbd18db4cc2f85cedef654fccc4a4d8
+ sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
meta: some metadata
"""
@@ -88,19 +88,19 @@ def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw):
2022-08-05 15:27:14 +03:00
assert not stage.outs[0].changed_cache()
-def test_commit_changed_md5(tmp_dir, dvc):
+def test_commit_changed_sha256(tmp_dir, dvc):
tmp_dir.gen({"file": "file content"})
(stage,) = dvc.add("file", no_commit=True)
stage_file_content = (tmp_dir / stage.path).parse()
- stage_file_content["md5"] = "1111111111"
+ stage_file_content["sha256"] = "1111111111"
(tmp_dir / stage.path).dump(stage_file_content)
2022-12-17 14:52:18 +02:00
with pytest.raises(StageCommitError):
2022-08-05 15:27:14 +03:00
dvc.commit(stage.path)
dvc.commit(stage.path, force=True)
- assert "md5" not in (tmp_dir / stage.path).parse()
+ assert "sha256" not in (tmp_dir / stage.path).parse()
def test_commit_no_exec(tmp_dir, dvc):
diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py
2022-12-17 14:52:18 +02:00
index 192e350f..012e6921 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_data_cloud.py
+++ b/tests/func/test_data_cloud.py
2022-12-17 14:52:18 +02:00
@@ -131,7 +131,7 @@ def test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog):
2022-08-05 15:27:14 +03:00
stage_file_path = stage.relpath
content = (tmp_dir / stage_file_path).parse()
- del content["outs"][0]["md5"]
+ del content["outs"][0]["sha256"]
(tmp_dir / stage_file_path).dump(content)
with caplog.at_level(logging.WARNING, logger="dvc"):
2022-12-17 14:52:18 +02:00
@@ -148,7 +148,7 @@ def test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog):
2022-08-05 15:27:14 +03:00
def test_hash_recalculation(mocker, dvc, tmp_dir, local_remote):
tmp_dir.gen({"foo": "foo"})
2022-12-17 14:52:18 +02:00
- test_file_md5 = mocker.spy(dvc_data.hashfile.hash, "file_md5")
+ test_file_sha256 = mocker.spy(dvc_data.hashfile.hash, "file_sha256")
2022-08-05 15:27:14 +03:00
ret = main(["config", "cache.type", "hardlink"])
assert ret == 0
ret = main(["add", "foo"])
2022-12-17 14:52:18 +02:00
@@ -157,7 +157,7 @@ def test_hash_recalculation(mocker, dvc, tmp_dir, local_remote):
2022-08-05 15:27:14 +03:00
assert ret == 0
ret = main(["run", "--single-stage", "-d", "foo", "echo foo"])
assert ret == 0
- assert test_file_md5.mock.call_count == 1
+ assert test_file_sha256.mock.call_count == 1
def test_missing_cache(tmp_dir, dvc, local_remote, caplog):
2022-12-17 14:52:18 +02:00
@@ -170,8 +170,8 @@ def test_missing_cache(tmp_dir, dvc, local_remote, caplog):
2022-08-05 15:27:14 +03:00
"Some of the cache files do not exist "
"neither locally nor on remote. Missing cache files:\n"
)
- foo = "name: bar, md5: 37b51d194a7513e45b56f6524f2d51f2\n"
- bar = "name: foo, md5: acbd18db4cc2f85cedef654fccc4a4d8\n"
+ foo = "name: bar, sha256: 37b51d194a7513e45b56f6524f2d51f2\n"
+ bar = "name: foo, sha256: acbd18db4cc2f85cedef654fccc4a4d8\n"
caplog.clear()
dvc.push()
2022-12-17 14:52:18 +02:00
@@ -207,7 +207,7 @@ def test_verify_hashes(
2022-08-05 15:27:14 +03:00
remove("dir")
2022-12-17 14:52:18 +02:00
remove(dvc.odb.local.path)
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
- hash_spy = mocker.spy(dvc_data.hashfile.hash, "file_md5")
+ hash_spy = mocker.spy(dvc_data.hashfile.hash, "file_sha256")
2022-08-05 15:27:14 +03:00
dvc.pull()
assert hash_spy.call_count == 0
diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py
2022-12-17 14:52:18 +02:00
index 5b93815f..7de8e775 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_diff.py
+++ b/tests/func/test_diff.py
@@ -9,7 +9,7 @@ from dvc.utils.fs import remove
def digest(text):
- return hashlib.md5(bytes(text, "utf-8")).hexdigest()
+ return hashlib.sha256(bytes(text, "utf-8")).hexdigest()
def test_no_scm(tmp_dir, dvc):
diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py
2022-12-17 14:52:18 +02:00
index 0f034f88..c1a76b3f 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_external_repo.py
+++ b/tests/func/test_external_repo.py
2022-12-17 14:52:18 +02:00
@@ -206,7 +206,7 @@ def test_subrepos_are_ignored(tmp_dir, erepo_dir):
2022-08-05 15:27:14 +03:00
repo.odb.local,
2022-12-17 14:52:18 +02:00
"dir",
repo.dvcfs,
2022-08-05 15:27:14 +03:00
- "md5",
+ "sha256",
2022-12-17 14:52:18 +02:00
ignore=repo.dvcignore,
2022-08-05 15:27:14 +03:00
)
transfer(
diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py
2022-12-17 14:52:18 +02:00
index 27a2e9f6..40b0a357 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_gc.py
+++ b/tests/func/test_gc.py
2022-12-17 14:52:18 +02:00
@@ -24,9 +24,9 @@ class TestGC(TestDvcGit):
raw_dir_hash = stages[0].outs[0].hash_info.as_raw().value
2022-08-05 15:27:14 +03:00
self.good_cache = [
2022-12-17 14:52:18 +02:00
- self.dvc.odb.local.oid_to_path(md5)
2022-08-05 15:27:14 +03:00
- for md5 in self.dvc.odb.local.all()
2022-12-17 14:52:18 +02:00
- if md5 != raw_dir_hash
+ self.dvc.odb.local.oid_to_path(sha256)
2022-08-05 15:27:14 +03:00
+ for sha256 in self.dvc.odb.local.all()
2022-12-17 14:52:18 +02:00
+ if sha256 != raw_dir_hash
2022-08-05 15:27:14 +03:00
]
2022-12-17 14:52:18 +02:00
self.bad_cache = [self.dvc.odb.local.oid_to_path(raw_dir_hash)]
2022-08-05 15:27:14 +03:00
diff --git a/tests/func/test_import_url.py b/tests/func/test_import_url.py
2022-12-17 14:52:18 +02:00
index 33e79e40..78550df5 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_import_url.py
+++ b/tests/func/test_import_url.py
@@ -120,11 +120,11 @@ def test_import_url_with_no_exec(tmp_dir, dvc, erepo_dir):
class TestImport(_TestImport):
@pytest.fixture
- def stage_md5(self):
+ def stage_sha256(self):
return "dc24e1271084ee317ac3c2656fb8812b"
@pytest.fixture
- def dir_md5(self):
+ def dir_sha256(self):
return "b6dcab6ccd17ca0a8bf4a215a37d14cc.dir"
@pytest.fixture
@@ -155,15 +155,15 @@ def test_import_url_preserve_meta(tmp_dir, dvc):
desc: top desc
deps:
- path: foo # dep comment
- md5: acbd18db4cc2f85cedef654fccc4a4d8
+ sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
outs:
- path: bar # out comment
desc: out desc
- md5: acbd18db4cc2f85cedef654fccc4a4d8
+ sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
meta: some metadata
- md5: be7ade0aa89cc8d56e320867a9de9740
+ sha256: be7ade0aa89cc8d56e320867a9de9740
frozen: true
"""
)
@@ -229,7 +229,7 @@ def test_import_url_to_remote_directory(tmp_dir, dvc, workspace, local_remote):
for file_part in file_parts:
with open(
2022-12-17 14:52:18 +02:00
- local_remote.oid_to_path(file_part["md5"]), encoding="utf-8"
+ local_remote.oid_to_path(file_part["sha256"]), encoding="utf-8"
2022-08-05 15:27:14 +03:00
) as fobj:
assert fobj.read() == file_part["relpath"]
2022-12-17 14:52:18 +02:00
@@ -258,7 +258,7 @@ def test_import_url_to_remote_status(tmp_dir, dvc, local_cloud, local_remote):
2022-08-05 15:27:14 +03:00
local_cloud.gen("foo", "foo")
stage = dvc.imp_url(str(local_cloud / "foo"), to_remote=True)
- assert stage.md5 is not None
+ assert stage.sha256 is not None
status = dvc.status()
assert status["foo.dvc"] == [{"changed outs": {"foo": "not in cache"}}]
diff --git a/tests/func/test_install.py b/tests/func/test_install.py
2022-12-17 14:52:18 +02:00
index a4a800c9..fb5ddbfe 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_install.py
+++ b/tests/func/test_install.py
@@ -6,7 +6,7 @@ import pytest
from git import GitCommandError
from dvc.exceptions import DvcException
2022-12-17 14:52:18 +02:00
-from dvc_data.hashfile.hash import file_md5
+from dvc_data.hashfile.hash import file_sha256
2022-08-05 15:27:14 +03:00
from tests.func.parsing.test_errors import escape_ansi
@@ -76,7 +76,7 @@ class TestInstall:
conf["core"]["remote"] = "store"
tmp_dir.dvc_gen("file", "file_content", "commit message")
- file_checksum = file_md5("file", dvc.fs)
+ file_checksum = file_sha256("file", dvc.fs)
expected_storage_path = (
storage_path / file_checksum[:2] / file_checksum[2:]
)
@@ -117,7 +117,7 @@ def test_merge_driver_no_ancestor(tmp_dir, scm, dvc):
assert (tmp_dir / "data").read_text() == {"bar": "bar"}
assert (tmp_dir / "data.dvc").read_text() == (
"outs:\n"
- "- md5: 5ea40360f5b4ec688df672a4db9c17d1.dir\n"
+ "- sha256: 5ea40360f5b4ec688df672a4db9c17d1.dir\n"
" size: 6\n"
" nfiles: 2\n"
" path: data\n"
@@ -154,7 +154,7 @@ def test_merge_driver(tmp_dir, scm, dvc):
assert (tmp_dir / "data").read_text() == {"master": "master", "two": "two"}
assert (tmp_dir / "data.dvc").read_text() == (
"outs:\n"
- "- md5: 839ef9371606817569c1ee0e5f4ed233.dir\n"
+ "- sha256: 839ef9371606817569c1ee0e5f4ed233.dir\n"
" size: 12\n"
" nfiles: 3\n"
" path: data\n"
diff --git a/tests/func/test_lockfile.py b/tests/func/test_lockfile.py
2022-12-17 14:52:18 +02:00
index 93974978..3fb2f1a1 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_lockfile.py
+++ b/tests/func/test_lockfile.py
@@ -48,12 +48,12 @@ def test_deps_outs_are_sorted_by_path(tmp_dir, dvc, run_head):
# lock stage key order:
assert list(lock.keys()) == ["cmd", "deps", "outs"]
- # `path` key appear first and then the `md5`
+ # `path` key appear first and then the `sha256`
assert all(
- list(dep.keys()) == ["path", "md5", "size"] for dep in lock["deps"]
+ list(dep.keys()) == ["path", "sha256", "size"] for dep in lock["deps"]
)
assert all(
- list(out.keys()) == ["path", "md5", "size"] for out in lock["outs"]
+ list(out.keys()) == ["path", "sha256", "size"] for out in lock["outs"]
)
# deps are always sorted by the file path naming
@@ -167,7 +167,7 @@ def test_params_dump(tmp_dir, dvc, run_head):
def v1_repo_lock(tmp_dir, dvc):
"""Generates a repo having v1 format lockfile"""
size = 5 if os.name == "nt" else 4
- hi = HashInfo(name="md5", value="c157a79031e1c40f85931829bc5fc552")
+ hi = HashInfo(name="sha256", value="c157a79031e1c40f85931829bc5fc552")
v1_lockdata = {
"foo": {"cmd": "echo foo"},
"bar": {
diff --git a/tests/func/test_merge_driver.py b/tests/func/test_merge_driver.py
index 113984f9..218e524a 100644
--- a/tests/func/test_merge_driver.py
+++ b/tests/func/test_merge_driver.py
@@ -118,11 +118,11 @@ def test_merge_different_output_options(tmp_dir, dvc, caplog):
(tmp_dir / "ancestor").touch()
(tmp_dir / "our").write_text(
- "outs:\n- md5: f123456789.dir\n path: path\n"
+ "outs:\n- sha256: f123456789.dir\n path: path\n"
)
(tmp_dir / "their").write_text(
- "outs:\n- md5: f987654321.dir\n path: path\n cache: false\n"
+ "outs:\n- sha256: f987654321.dir\n path: path\n cache: false\n"
)
assert (
@@ -149,10 +149,10 @@ def test_merge_file(tmp_dir, dvc, caplog):
(tmp_dir / "ancestor").touch()
(tmp_dir / "our").write_text(
- "outs:\n- md5: f123456789.dir\n path: path\n"
+ "outs:\n- sha256: f123456789.dir\n path: path\n"
)
- (tmp_dir / "their").write_text("outs:\n- md5: f987654321\n path: path\n")
+ (tmp_dir / "their").write_text("outs:\n- sha256: f987654321\n path: path\n")
assert (
main(
@@ -179,13 +179,13 @@ def test_merge_non_dvc_add(tmp_dir, dvc, caplog):
(tmp_dir / "our").write_text(
"outs:\n"
- "- md5: f123456789.dir\n"
+ "- sha256: f123456789.dir\n"
" path: path\n"
- "- md5: ff123456789.dir\n"
+ "- sha256: ff123456789.dir\n"
" path: another\n"
)
- (tmp_dir / "their").write_text("outs:\n- md5: f987654321\n path: path\n")
+ (tmp_dir / "their").write_text("outs:\n- sha256: f987654321\n path: path\n")
assert (
main(
diff --git a/tests/func/test_move.py b/tests/func/test_move.py
index aad2003a..3e28e628 100644
--- a/tests/func/test_move.py
+++ b/tests/func/test_move.py
@@ -261,7 +261,7 @@ def test_move_meta(tmp_dir, dvc):
assert res == textwrap.dedent(
"""\
outs:
- - md5: acbd18db4cc2f85cedef654fccc4a4d8
+ - sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
path: bar
meta:
diff --git a/tests/func/test_odb.py b/tests/func/test_odb.py
2022-12-17 14:52:18 +02:00
index c2ded255..501814de 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_odb.py
+++ b/tests/func/test_odb.py
2022-12-17 14:52:18 +02:00
@@ -12,17 +12,17 @@ from dvc_objects.errors import ObjectFormatError
2022-08-05 15:27:14 +03:00
def test_cache(tmp_dir, dvc):
- cache1_md5 = "123"
- cache2_md5 = "234"
+ cache1_sha256 = "123"
+ cache2_sha256 = "234"
cache1 = os.path.join(
2022-12-17 14:52:18 +02:00
dvc.odb.local.path,
2022-08-05 15:27:14 +03:00
- cache1_md5[0:2],
- cache1_md5[2:],
+ cache1_sha256[0:2],
+ cache1_sha256[2:],
)
cache2 = os.path.join(
2022-12-17 14:52:18 +02:00
dvc.odb.local.path,
2022-08-05 15:27:14 +03:00
- cache2_md5[0:2],
- cache2_md5[2:],
+ cache2_sha256[0:2],
+ cache2_sha256[2:],
)
tmp_dir.gen({cache1: "1", cache2: "2"})
@@ -31,13 +31,13 @@ def test_cache(tmp_dir, dvc):
odb = ODBManager(dvc)
- md5_list = list(odb.local.all())
- assert len(md5_list) == 2
- assert cache1_md5 in md5_list
- assert cache2_md5 in md5_list
+ sha256_list = list(odb.local.all())
+ assert len(sha256_list) == 2
+ assert cache1_sha256 in sha256_list
+ assert cache2_sha256 in sha256_list
2022-12-17 14:52:18 +02:00
- odb_cache1 = odb.local.oid_to_path(cache1_md5)
- odb_cache2 = odb.local.oid_to_path(cache2_md5)
+ odb_cache1 = odb.local.oid_to_path(cache1_sha256)
+ odb_cache2 = odb.local.oid_to_path(cache2_sha256)
2022-08-05 15:27:14 +03:00
assert os.fspath(odb_cache1) == cache1
assert os.fspath(odb_cache2) == cache2
@@ -49,13 +49,13 @@ def test_cache_load_bad_dir_cache(tmp_dir, dvc):
2022-12-17 14:52:18 +02:00
fname = os.fspath(dvc.odb.local.oid_to_path(dir_hash))
2022-08-05 15:27:14 +03:00
tmp_dir.gen({fname: "<clearly>not,json"})
with pytest.raises(ObjectFormatError):
- load(dvc.odb.local, HashInfo("md5", dir_hash))
+ load(dvc.odb.local, HashInfo("sha256", dir_hash))
dir_hash = "234.dir"
2022-12-17 14:52:18 +02:00
fname = os.fspath(dvc.odb.local.oid_to_path(dir_hash))
2022-08-05 15:27:14 +03:00
tmp_dir.gen({fname: '{"a": "b"}'})
with pytest.raises(ObjectFormatError):
- load(dvc.odb.local, HashInfo("md5", dir_hash))
+ load(dvc.odb.local, HashInfo("sha256", dir_hash))
def test_external_cache_dir(tmp_dir, dvc, make_tmp_dir):
diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py
2022-12-17 14:52:18 +02:00
index aac08ce7..6164cfa8 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_remote.py
+++ b/tests/func/test_remote.py
2022-12-17 14:52:18 +02:00
@@ -147,22 +147,22 @@ def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
path = (tmp_dir / "data").fs_path
tree = Tree.from_list(
- [{"relpath": "1", "md5": "1"}, {"relpath": "2", "md5": "2"}]
+ [{"relpath": "1", "sha256": "1"}, {"relpath": "2", "sha256": "2"}]
)
tree.digest()
2022-12-17 14:52:18 +02:00
with patch("dvc_data.build._build_tree", return_value=(None, tree)):
- _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "md5")
+ _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "sha256")
2022-08-05 15:27:14 +03:00
hash1 = obj.hash_info
2022-12-17 14:52:18 +02:00
# remove the raw dir obj to force building the tree on the next build call
dvc.odb.local.fs.remove(dvc.odb.local.oid_to_path(hash1.as_raw().value))
2022-08-05 15:27:14 +03:00
tree = Tree.from_list(
- [{"md5": "1", "relpath": "1"}, {"md5": "2", "relpath": "2"}]
+ [{"sha256": "1", "relpath": "1"}, {"sha256": "2", "relpath": "2"}]
)
tree.digest()
2022-12-17 14:52:18 +02:00
with patch("dvc_data.build._build_tree", return_value=(None, tree)):
- _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "md5")
+ _, _, obj = build(dvc.odb.local, path, dvc.odb.local.fs, "sha256")
2022-08-05 15:27:14 +03:00
hash2 = obj.hash_info
assert hash1 == hash2
2022-12-17 14:52:18 +02:00
@@ -248,7 +248,7 @@ def test_remote_modify_local_on_repo_config(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
def test_external_dir_resource_on_no_cache(tmp_dir, dvc, tmp_path_factory):
# https://github.com/iterative/dvc/issues/2647, is some situations
- # (external dir dependency) cache is required to calculate dir md5
+ # (external dir dependency) cache is required to calculate dir sha256
external_dir = tmp_path_factory.mktemp("external_dir")
file = external_dir / "file"
diff --git a/tests/func/test_repo_index.py b/tests/func/test_repo_index.py
2022-12-17 14:52:18 +02:00
index c419d88f..dfaee778 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_repo_index.py
+++ b/tests/func/test_repo_index.py
@@ -269,17 +269,17 @@ def test_used_objs(tmp_dir, scm, dvc, run_copy, rev):
expected_objs = [
HashInfo(
- name="md5",
+ name="sha256",
value="acbd18db4cc2f85cedef654fccc4a4d8",
obj_name="bar",
),
HashInfo(
- name="md5",
+ name="sha256",
value="8c7dd922ad47494fc02c388e12c00eac",
obj_name="dir/subdir/file",
),
HashInfo(
- name="md5",
+ name="sha256",
value="d28c9e28591aeb7e303dc6772ffa6f6b.dir",
obj_name="dir",
),
diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py
2022-12-17 14:52:18 +02:00
index ed405866..8060585c 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_repro.py
+++ b/tests/func/test_repro.py
2022-12-17 14:52:18 +02:00
@@ -21,7 +21,7 @@ from dvc.stage.exceptions import StageFileDoesNotExistError
from dvc.utils import relpath
2022-08-05 15:27:14 +03:00
from dvc.utils.fs import remove
from dvc.utils.serialize import dump_yaml, load_yaml
2022-12-17 14:52:18 +02:00
-from dvc_data.hashfile.hash import file_md5
+from dvc_data.hashfile.hash import file_sha256
2022-08-05 15:27:14 +03:00
from tests.basic_env import TestDvc
2022-12-17 14:52:18 +02:00
2022-08-05 15:27:14 +03:00
@@ -654,7 +654,7 @@ class TestReproDataSource(TestReproChangedData):
self.assertTrue(filecmp.cmp(self.FOO, self.BAR, shallow=False))
self.assertEqual(
- stages[0].outs[0].hash_info.value, file_md5(self.BAR, self.dvc.fs)
+ stages[0].outs[0].hash_info.value, file_sha256(self.BAR, self.dvc.fs)
)
@@ -1127,21 +1127,21 @@ def test_dvc_formatting_retained(tmp_dir, dvc, run_copy):
(tmp_dir / "foo").write_text("new foo")
dvc.reproduce("foo_copy.dvc", force=True)
- def _hide_md5(text):
- return re.sub(r"\b[a-f0-9]{32}\b", "<md5>", text)
+ def _hide_sha256(text):
+ return re.sub(r"\b[a-f0-9]{32}\b", "<sha256>", text)
def _hide_size(text):
return re.sub(r"size: [0-9]*\b", "size: <size>", text)
def _mask(text):
- return _hide_size(_hide_md5(text))
+ return _hide_size(_hide_sha256(text))
assert _mask(stage_text) == _mask(stage_path.read_text())
def _format_dvc_line(line):
- # Add line comment for all cache and md5 keys
- if "cache:" in line or "md5:" in line:
+ # Add line comment for all cache and sha256 keys
+ if "cache:" in line or "sha256:" in line:
return line + " # line comment"
# Format command as one word per line
if line.startswith("cmd: "):
diff --git a/tests/func/test_run_multistage.py b/tests/func/test_run_multistage.py
index f83b7e18..569a86de 100644
--- a/tests/func/test_run_multistage.py
+++ b/tests/func/test_run_multistage.py
@@ -355,7 +355,7 @@ def test_run_external_outputs(
dvc,
local_workspace,
):
- hash_name = "md5"
+ hash_name = "sha256"
foo_hash = "acbd18db4cc2f85cedef654fccc4a4d8"
bar_hash = "37b51d194a7513e45b56f6524f2d51f2"
diff --git a/tests/func/test_run_single_stage.py b/tests/func/test_run_single_stage.py
2022-12-17 14:52:18 +02:00
index db775d05..0b193b42 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_run_single_stage.py
+++ b/tests/func/test_run_single_stage.py
2022-12-17 14:52:18 +02:00
@@ -31,7 +31,7 @@ from dvc.stage.exceptions import (
2022-08-05 15:27:14 +03:00
StagePathOutsideError,
)
from dvc.utils.serialize import load_yaml
2022-12-17 14:52:18 +02:00
-from dvc_data.hashfile.hash import file_md5
+from dvc_data.hashfile.hash import file_sha256
2022-08-05 15:27:14 +03:00
from tests.basic_env import TestDvc, TestDvcGit
2022-12-17 14:52:18 +02:00
2022-08-05 15:27:14 +03:00
@@ -60,7 +60,7 @@ class TestRun(TestDvc):
self.assertEqual(len(stage.outs), len(outs + outs_no_cache))
self.assertEqual(stage.outs[0].fspath, outs[0])
self.assertEqual(
- stage.outs[0].hash_info.value, file_md5(self.FOO, self.dvc.fs)
+ stage.outs[0].hash_info.value, file_sha256(self.FOO, self.dvc.fs)
)
self.assertTrue(stage.path, fname)
2022-12-17 14:52:18 +02:00
@@ -987,20 +987,20 @@ def test_run_force_preserves_comments_and_meta(tmp_dir, dvc, run_copy):
2022-08-05 15:27:14 +03:00
cmd: python copy.py foo bar
deps:
- path: copy.py
- md5: 90c27dd80b698fe766f0c3ee0b6b9729
+ sha256: 90c27dd80b698fe766f0c3ee0b6b9729
size: {code_size}
- path: foo
- md5: acbd18db4cc2f85cedef654fccc4a4d8
+ sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
outs:
# comment preserved
- path: bar
desc: out desc
- md5: acbd18db4cc2f85cedef654fccc4a4d8
+ sha256: acbd18db4cc2f85cedef654fccc4a4d8
size: 3
meta:
name: copy-foo-bar
- md5: be659ce4a33cebb85d4e8e1335d394ad
+ sha256: be659ce4a33cebb85d4e8e1335d394ad
"""
)
2022-12-17 14:52:18 +02:00
@@ -1011,18 +1011,18 @@ def test_run_force_preserves_comments_and_meta(tmp_dir, dvc, run_copy):
2022-08-05 15:27:14 +03:00
cmd: python copy.py foo1 bar1
deps:
- path: foo1
- md5: 299a0be4a5a79e6a59fdd251b19d78bb
+ sha256: 299a0be4a5a79e6a59fdd251b19d78bb
size: 4
- path: copy.py
- md5: 90c27dd80b698fe766f0c3ee0b6b9729
+ sha256: 90c27dd80b698fe766f0c3ee0b6b9729
size: {code_size}
outs:
# comment preserved
- path: bar1
- md5: 299a0be4a5a79e6a59fdd251b19d78bb
+ sha256: 299a0be4a5a79e6a59fdd251b19d78bb
size: 4
meta:
name: copy-foo-bar
- md5: 9e725b11cb393e6a7468369fa50328b7
+ sha256: 9e725b11cb393e6a7468369fa50328b7
"""
)
diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py
2022-12-17 14:52:18 +02:00
index 77a35488..46ff6a02 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_stage.py
+++ b/tests/func/test_stage.py
@@ -78,8 +78,8 @@ class TestReload(TestDvc):
d = load_yaml(stage.relpath)
# NOTE: checking that reloaded stage didn't change its checksum
- md5 = "11111111111111111111111111111111"
- d[stage.PARAM_MD5] = md5
+ sha256 = "11111111111111111111111111111111"
+ d[stage.PARAM_MD5] = sha256
dump_yaml(stage.relpath, d)
dvcfile = SingleStageFile(self.dvc, stage.relpath)
@@ -89,7 +89,7 @@ class TestReload(TestDvc):
dvcfile.dump(stage)
d = load_yaml(stage.relpath)
- self.assertEqual(d[stage.PARAM_MD5], md5)
+ self.assertEqual(d[stage.PARAM_MD5], sha256)
class TestDefaultWorkingDirectory(TestDvc):
@@ -154,7 +154,7 @@ class TestExternalRemoteResolution(TestDvc):
assert os.path.exists("movie.txt")
-def test_md5_ignores_comments(tmp_dir, dvc):
+def test_sha256_ignores_comments(tmp_dir, dvc):
(stage,) = tmp_dir.dvc_gen("foo", "foo content")
with open(stage.path, "a", encoding="utf-8") as f:
diff --git a/tests/func/test_state.py b/tests/func/test_state.py
2022-12-17 14:52:18 +02:00
index 1e4f6ae6..4fa328d6 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_state.py
+++ b/tests/func/test_state.py
2022-12-17 14:52:18 +02:00
@@ -2,7 +2,7 @@ import os
import re
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
from dvc.repo import Repo
-from dvc_data.hashfile.hash import file_md5
+from dvc_data.hashfile.hash import file_sha256
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.state import State
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
@@ -10,7 +10,7 @@ from dvc_data.hashfile.state import State
2022-08-05 15:27:14 +03:00
def test_state(tmp_dir, dvc):
tmp_dir.gen("foo", "foo content")
path = tmp_dir / "foo"
- hash_info = HashInfo("md5", file_md5(path, dvc.fs))
+ hash_info = HashInfo("sha256", file_sha256(path, dvc.fs))
state = State(dvc.root_dir, dvc.tmp_dir, dvc.dvcignore)
@@ -22,7 +22,7 @@ def test_state(tmp_dir, dvc):
assert state.get(path, dvc.fs) == (None, None)
- hash_info = HashInfo("md5", file_md5(path, dvc.fs))
+ hash_info = HashInfo("sha256", file_sha256(path, dvc.fs))
state.save(path, dvc.fs, hash_info)
assert state.get(path, dvc.fs)[1] == hash_info
diff --git a/tests/func/test_utils.py b/tests/func/test_utils.py
2022-12-17 14:52:18 +02:00
index 0a1834ef..f9bf8d3a 100644
2022-08-05 15:27:14 +03:00
--- a/tests/func/test_utils.py
+++ b/tests/func/test_utils.py
2022-12-17 14:52:18 +02:00
@@ -6,7 +6,7 @@ from dvc import utils
from dvc.exceptions import DvcException
2022-08-05 15:27:14 +03:00
-def test_dict_md5():
+def test_dict_sha256():
d = {
"cmd": "python code.py foo file1",
"locked": "true",
2022-12-17 14:52:18 +02:00
@@ -15,18 +15,18 @@ def test_dict_md5():
2022-08-05 15:27:14 +03:00
"path": "file1",
"metric": {"type": "raw"},
"cache": False,
- "md5": "acbd18db4cc2f85cedef654fccc4a4d8",
+ "sha256": "acbd18db4cc2f85cedef654fccc4a4d8",
}
],
"deps": [
- {"path": "foo", "md5": "acbd18db4cc2f85cedef654fccc4a4d8"},
- {"path": "code.py", "md5": "d05447644b89960913c7eee5fd776adb"},
+ {"path": "foo", "sha256": "acbd18db4cc2f85cedef654fccc4a4d8"},
+ {"path": "code.py", "sha256": "d05447644b89960913c7eee5fd776adb"},
],
}
- md5 = "8b263fa05ede6c3145c164829be694b4"
+ sha256 = "8b263fa05ede6c3145c164829be694b4"
- assert md5 == utils.dict_md5(d, exclude=["metric", "locked"])
+ assert sha256 == utils.dict_sha256(d, exclude=["metric", "locked"])
def test_boxify():
2022-12-17 14:52:18 +02:00
diff --git a/tests/unit/cli/test_main.py b/tests/unit/cli/test_main.py
index 73cf3bf7..a83056d6 100644
--- a/tests/unit/cli/test_main.py
+++ b/tests/unit/cli/test_main.py
@@ -14,7 +14,7 @@ def test_state_pickle_errors_are_correctly_raised(tmp_dir, caplog, mocker):
mocker.patch(
"dvc.cli.parse_args",
return_value=Namespace(
- func=raiser(DiskError(path, "md5s")),
+ func=raiser(DiskError(path, "sha256s")),
quiet=False,
verbose=True,
),
@@ -22,7 +22,7 @@ def test_state_pickle_errors_are_correctly_raised(tmp_dir, caplog, mocker):
assert main() == 255
assert (
- f"Could not open pickled 'md5s' cache.\n"
+ f"Could not open pickled 'sha256s' cache.\n"
f"Remove the '{path.relative_to(tmp_dir)}' directory "
"and then retry this command.\n"
"See <https://error.dvc.org/pickle> for more information."
diff --git a/tests/unit/data/db/test_local.py b/tests/unit/data/db/test_local.py
index b755cf64..0175f179 100644
--- a/tests/unit/data/db/test_local.py
+++ b/tests/unit/data/db/test_local.py
@@ -17,8 +17,8 @@ def test_status_download_optimization(mocker, dvc):
odb = LocalHashFileDB(LocalFileSystem(), os.getcwd())
obj_ids = {
- HashInfo("md5", "acbd18db4cc2f85cedef654fccc4a4d8"),
- HashInfo("md5", "37b51d194a7513e45b56f6524f2d51f2"),
+ HashInfo("sha256", "acbd18db4cc2f85cedef654fccc4a4d8"),
+ HashInfo("sha256", "37b51d194a7513e45b56f6524f2d51f2"),
}
local_exists = [hash_info.value for hash_info in obj_ids]
@@ -94,7 +94,7 @@ def test_staging_file(tmp_dir, dvc):
local_odb = dvc.odb.local
staging_odb, _, obj = build(
- local_odb, (tmp_dir / "foo").fs_path, fs, "md5"
+ local_odb, (tmp_dir / "foo").fs_path, fs, "sha256"
)
assert not local_odb.exists(obj.hash_info.value)
@@ -122,7 +122,7 @@ def test_staging_dir(tmp_dir, dvc):
local_odb = dvc.odb.local
staging_odb, _, obj = build(
- local_odb, (tmp_dir / "dir").fs_path, fs, "md5"
+ local_odb, (tmp_dir / "dir").fs_path, fs, "sha256"
)
assert not local_odb.exists(obj.hash_info.value)
diff --git a/tests/unit/fs/test_data.py b/tests/unit/fs/test_data.py
index 0b838d1e..ad17ebd0 100644
--- a/tests/unit/fs/test_data.py
+++ b/tests/unit/fs/test_data.py
@@ -222,7 +222,7 @@ def test_isdvc(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
def test_get_hash_file(tmp_dir, dvc):
tmp_dir.dvc_gen({"foo": "foo"})
2022-12-17 14:52:18 +02:00
fs = DataFileSystem(index=dvc.index.data["repo"])
2022-08-05 15:27:14 +03:00
- assert fs.info("foo")["md5"] == "acbd18db4cc2f85cedef654fccc4a4d8"
+ assert fs.info("foo")["sha256"] == "acbd18db4cc2f85cedef654fccc4a4d8"
def test_get_hash_dir(tmp_dir, dvc, mocker):
2022-12-17 14:52:18 +02:00
@@ -231,7 +231,7 @@ def test_get_hash_dir(tmp_dir, dvc, mocker):
2022-08-05 15:27:14 +03:00
)
2022-12-17 14:52:18 +02:00
fs = DataFileSystem(index=dvc.index.data["repo"])
hash_file_spy = mocker.spy(dvc_data.hashfile.hash, "hash_file")
2022-08-05 15:27:14 +03:00
- assert fs.info("dir")["md5"] == "8761c4e9acad696bee718615e23e22db.dir"
+ assert fs.info("dir")["sha256"] == "8761c4e9acad696bee718615e23e22db.dir"
2022-12-17 14:52:18 +02:00
assert not hash_file_spy.called
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
@@ -241,15 +241,15 @@ def test_get_hash_granular(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
)
2022-12-17 14:52:18 +02:00
fs = DataFileSystem(index=dvc.index.data["repo"])
subdir = "dir/subdir"
2022-08-05 15:27:14 +03:00
- assert fs.info(subdir).get("md5") is None
2022-12-17 14:52:18 +02:00
- _, _, obj = build(dvc.odb.local, subdir, fs, "md5", dry_run=True)
2022-08-05 15:27:14 +03:00
+ assert fs.info(subdir).get("sha256") is None
2022-12-17 14:52:18 +02:00
+ _, _, obj = build(dvc.odb.local, subdir, fs, "sha256", dry_run=True)
2022-08-05 15:27:14 +03:00
assert obj.hash_info == HashInfo(
- "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir"
+ "sha256", "af314506f1622d107e0ed3f14ec1a3b5.dir"
)
2022-12-17 14:52:18 +02:00
data = posixpath.join(subdir, "data")
2022-08-05 15:27:14 +03:00
- assert fs.info(data)["md5"] == "8d777f385d3dfec8815d20f7496026dc"
2022-12-17 14:52:18 +02:00
- _, _, obj = build(dvc.odb.local, data, fs, "md5", dry_run=True)
2022-08-05 15:27:14 +03:00
- assert obj.hash_info == HashInfo("md5", "8d777f385d3dfec8815d20f7496026dc")
+ assert fs.info(data)["sha256"] == "8d777f385d3dfec8815d20f7496026dc"
2022-12-17 14:52:18 +02:00
+ _, _, obj = build(dvc.odb.local, data, fs, "sha256", dry_run=True)
2022-08-05 15:27:14 +03:00
+ assert obj.hash_info == HashInfo("sha256", "8d777f385d3dfec8815d20f7496026dc")
def test_get_hash_dirty_file(tmp_dir, dvc):
2022-12-17 14:52:18 +02:00
@@ -258,9 +258,9 @@ def test_get_hash_dirty_file(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
fs = DataFileSystem(index=dvc.index.data["repo"])
2022-08-05 15:27:14 +03:00
expected = "8c7dd922ad47494fc02c388e12c00eac"
- assert fs.info("file").get("md5") == expected
2022-12-17 14:52:18 +02:00
- _, _, obj = build(dvc.odb.local, "file", fs, "md5", dry_run=True)
2022-08-05 15:27:14 +03:00
- assert obj.hash_info == HashInfo("md5", expected)
+ assert fs.info("file").get("sha256") == expected
2022-12-17 14:52:18 +02:00
+ _, _, obj = build(dvc.odb.local, "file", fs, "sha256", dry_run=True)
2022-08-05 15:27:14 +03:00
+ assert obj.hash_info == HashInfo("sha256", expected)
def test_get_hash_dirty_dir(tmp_dir, dvc):
2022-12-17 14:52:18 +02:00
@@ -269,6 +269,6 @@ def test_get_hash_dirty_dir(tmp_dir, dvc):
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
fs = DataFileSystem(index=dvc.index.data["repo"])
2022-08-05 15:27:14 +03:00
expected = "5ea40360f5b4ec688df672a4db9c17d1.dir"
- assert fs.info("dir").get("md5") == expected
2022-12-17 14:52:18 +02:00
- _, _, obj = build(dvc.odb.local, "dir", fs, "md5", dry_run=True)
2022-08-05 15:27:14 +03:00
- assert obj.hash_info == HashInfo("md5", expected)
+ assert fs.info("dir").get("sha256") == expected
2022-12-17 14:52:18 +02:00
+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256", dry_run=True)
2022-08-05 15:27:14 +03:00
+ assert obj.hash_info == HashInfo("sha256", expected)
2022-12-17 14:52:18 +02:00
diff --git a/tests/unit/fs/test_dvc.py b/tests/unit/fs/test_dvc.py
index 17c8c9d5..d2087a5f 100644
--- a/tests/unit/fs/test_dvc.py
+++ b/tests/unit/fs/test_dvc.py
@@ -498,11 +498,11 @@ def test_get_hash_cached_file(tmp_dir, dvc, mocker):
2022-08-05 15:27:14 +03:00
tmp_dir.dvc_gen({"foo": "foo"})
2022-12-17 14:52:18 +02:00
fs = DvcFileSystem(repo=dvc)
2022-08-05 15:27:14 +03:00
expected = "acbd18db4cc2f85cedef654fccc4a4d8"
2022-12-17 14:52:18 +02:00
- assert fs.info("foo").get("md5") is None
- _, _, obj = build(dvc.odb.local, "foo", fs, "md5")
2022-08-05 15:27:14 +03:00
- assert obj.hash_info == HashInfo("md5", expected)
2022-12-17 14:52:18 +02:00
+ assert fs.info("foo").get("sha256") is None
+ _, _, obj = build(dvc.odb.local, "foo", fs, "sha256")
2022-08-05 15:27:14 +03:00
+ assert obj.hash_info == HashInfo("sha256", expected)
(tmp_dir / "foo").unlink()
2022-12-17 14:52:18 +02:00
- assert fs.info("foo")["md5"] == expected
+ assert fs.info("foo")["sha256"] == expected
2022-08-05 15:27:14 +03:00
def test_get_hash_cached_dir(tmp_dir, dvc, mocker):
2022-12-17 14:52:18 +02:00
@@ -511,17 +511,17 @@ def test_get_hash_cached_dir(tmp_dir, dvc, mocker):
2022-08-05 15:27:14 +03:00
)
2022-12-17 14:52:18 +02:00
fs = DvcFileSystem(repo=dvc)
2022-08-05 15:27:14 +03:00
expected = "8761c4e9acad696bee718615e23e22db.dir"
2022-12-17 14:52:18 +02:00
- assert fs.info("dir").get("md5") is None
- _, _, obj = build(dvc.odb.local, "dir", fs, "md5")
+ assert fs.info("dir").get("sha256") is None
+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == HashInfo(
- "md5", "8761c4e9acad696bee718615e23e22db.dir"
+ "sha256", "8761c4e9acad696bee718615e23e22db.dir"
)
shutil.rmtree(tmp_dir / "dir")
2022-12-17 14:52:18 +02:00
- assert fs.info("dir")["md5"] == expected
- _, _, obj = build(dvc.odb.local, "dir", fs, "md5")
+ assert fs.info("dir")["sha256"] == expected
+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == HashInfo(
- "md5", "8761c4e9acad696bee718615e23e22db.dir"
+ "sha256", "8761c4e9acad696bee718615e23e22db.dir"
)
2022-12-17 14:52:18 +02:00
@@ -531,17 +531,17 @@ def test_get_hash_cached_granular(tmp_dir, dvc, mocker):
2022-08-05 15:27:14 +03:00
)
2022-12-17 14:52:18 +02:00
fs = DvcFileSystem(repo=dvc)
subdir = "dir/subdir"
- assert fs.info(subdir).get("md5") is None
- _, _, obj = build(dvc.odb.local, subdir, fs, "md5")
+ assert fs.info(subdir).get("sha256") is None
+ _, _, obj = build(dvc.odb.local, subdir, fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == HashInfo(
- "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir"
+ "sha256", "af314506f1622d107e0ed3f14ec1a3b5.dir"
)
2022-12-17 14:52:18 +02:00
- assert fs.info(posixpath.join(subdir, "data")).get("md5") is None
- _, _, obj = build(dvc.odb.local, posixpath.join(subdir, "data"), fs, "md5")
2022-08-05 15:27:14 +03:00
- assert obj.hash_info == HashInfo("md5", "8d777f385d3dfec8815d20f7496026dc")
2022-12-17 14:52:18 +02:00
+ assert fs.info(posixpath.join(subdir, "data")).get("sha256") is None
+ _, _, obj = build(dvc.odb.local, posixpath.join(subdir, "data"), fs, "sha256")
2022-08-05 15:27:14 +03:00
+ assert obj.hash_info == HashInfo("sha256", "8d777f385d3dfec8815d20f7496026dc")
(tmp_dir / "dir" / "subdir" / "data").unlink()
assert (
2022-12-17 14:52:18 +02:00
- fs.info(posixpath.join(subdir, "data"))["md5"]
+ fs.info(posixpath.join(subdir, "data"))["sha256"]
2022-08-05 15:27:14 +03:00
== "8d777f385d3dfec8815d20f7496026dc"
)
2022-12-17 14:52:18 +02:00
@@ -559,9 +559,9 @@ def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
tmp_dir.scm.commit("add dir")
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
fs = DvcFileSystem(repo=dvc)
- _, _, obj = build(dvc.odb.local, "dir", fs, "md5")
+ _, _, obj = build(dvc.odb.local, "dir", fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == HashInfo(
- "md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir"
+ "sha256", "e1d9e8eae5374860ae025ec84cfd85c7.dir"
)
2022-12-17 14:52:18 +02:00
@@ -570,28 +570,28 @@ def test_get_hash_dirty_file(tmp_dir, dvc):
from dvc_data.hashfile.hash import hash_file
2022-08-05 15:27:14 +03:00
tmp_dir.dvc_gen("file", "file")
- file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")
+ file_hash_info = HashInfo("sha256", "8c7dd922ad47494fc02c388e12c00eac")
(tmp_dir / "file").write_text("something")
- something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")
+ something_hash_info = HashInfo("sha256", "437b930db84b8079c2dd804a71936b5f")
# file is modified in workspace
2022-12-17 14:52:18 +02:00
# hash_file(file) should return workspace hash, not DVC cached hash
fs = DvcFileSystem(repo=dvc)
- assert fs.info("file").get("md5") is None
- staging, _, obj = build(dvc.odb.local, "file", fs, "md5")
+ assert fs.info("file").get("sha256") is None
+ staging, _, obj = build(dvc.odb.local, "file", fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == something_hash_info
check(staging, obj)
2022-12-17 14:52:18 +02:00
# hash_file(file) should return DVC cached hash
(tmp_dir / "file").unlink()
- assert fs.info("file")["md5"] == file_hash_info.value
- _, hash_info = hash_file("file", fs, "md5", state=dvc.state)
+ assert fs.info("file")["sha256"] == file_hash_info.value
+ _, hash_info = hash_file("file", fs, "sha256", state=dvc.state)
2022-08-05 15:27:14 +03:00
assert hash_info == file_hash_info
2022-12-17 14:52:18 +02:00
# tmp_dir/file can be built even though it is missing in workspace since
2022-08-05 15:27:14 +03:00
# repofs will use the DVC cached hash (and refer to the local cache object)
2022-12-17 14:52:18 +02:00
- _, _, obj = build(dvc.odb.local, "file", fs, "md5")
+ _, _, obj = build(dvc.odb.local, "file", fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == file_hash_info
2022-12-17 14:52:18 +02:00
@@ -600,9 +600,9 @@ def test_get_hash_dirty_dir(tmp_dir, dvc):
(tmp_dir / "dir" / "baz").write_text("baz")
2022-08-05 15:27:14 +03:00
2022-12-17 14:52:18 +02:00
fs = DvcFileSystem(repo=dvc)
- _, meta, obj = build(dvc.odb.local, "dir", fs, "md5")
+ _, meta, obj = build(dvc.odb.local, "dir", fs, "sha256")
2022-08-05 15:27:14 +03:00
assert obj.hash_info == HashInfo(
- "md5", "ba75a2162ca9c29acecb7957105a0bc2.dir"
+ "sha256", "ba75a2162ca9c29acecb7957105a0bc2.dir"
)
assert meta.nfiles == 3
diff --git a/tests/unit/output/test_local.py b/tests/unit/output/test_local.py
2022-12-17 14:52:18 +02:00
index 5d1ca10c..33ba3b46 100644
2022-08-05 15:27:14 +03:00
--- a/tests/unit/output/test_local.py
+++ b/tests/unit/output/test_local.py
@@ -64,12 +64,12 @@ class TestGetFilesNumber(TestDvc):
def test_return_multiple_for_dir(self):
o = self._get_output()
- o.hash_info = HashInfo("md5", "12345678.dir")
+ o.hash_info = HashInfo("sha256", "12345678.dir")
o.meta = Meta(nfiles=2)
self.assertEqual(2, o.get_files_number())
@patch.object(Output, "is_dir_checksum", False)
def test_return_1_on_single_file_cache(self):
o = self._get_output()
- o.hash_info = HashInfo("md5", "12345678")
+ o.hash_info = HashInfo("sha256", "12345678")
self.assertEqual(1, o.get_files_number())
diff --git a/tests/unit/output/test_output.py b/tests/unit/output/test_output.py
2022-12-17 14:52:18 +02:00
index e7c0dcef..4e466d63 100644
2022-08-05 15:27:14 +03:00
--- a/tests/unit/output/test_output.py
+++ b/tests/unit/output/test_output.py
2022-12-17 14:52:18 +02:00
@@ -31,7 +31,7 @@ def test_save_missing(dvc, mocker):
2022-08-05 15:27:14 +03:00
(
"3cc286c534a71504476da009ed174423",
"3cc286c534a71504476da009ed174423",
- ), # md5
+ ), # sha256
(
"d41d8cd98f00b204e9800998ecf8427e-38",
"d41d8cd98f00b204e9800998ecf8427e-38",
diff --git a/tests/unit/repo/test_repo.py b/tests/unit/repo/test_repo.py
2022-12-17 14:52:18 +02:00
index efb1ead4..7d1b7406 100644
2022-08-05 15:27:14 +03:00
--- a/tests/unit/repo/test_repo.py
+++ b/tests/unit/repo/test_repo.py
@@ -48,8 +48,8 @@ def test_used_objs(tmp_dir, dvc, path):
tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}, "other": "other"}})
expected = {
- HashInfo("md5", "70922d6bf66eb073053a82f77d58c536.dir"),
- HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac"),
+ HashInfo("sha256", "70922d6bf66eb073053a82f77d58c536.dir"),
+ HashInfo("sha256", "8c7dd922ad47494fc02c388e12c00eac"),
}
used = set()
diff --git a/tests/unit/stage/test_loader_pipeline_file.py b/tests/unit/stage/test_loader_pipeline_file.py
2022-12-17 14:52:18 +02:00
index 5ef37201..83a00b49 100644
2022-08-05 15:27:14 +03:00
--- a/tests/unit/stage/test_loader_pipeline_file.py
+++ b/tests/unit/stage/test_loader_pipeline_file.py
@@ -20,8 +20,8 @@ def stage_data():
def lock_data():
return {
"cmd": "command",
- "deps": [{"path": "foo", "md5": "foo_checksum"}],
- "outs": [{"path": "bar", "md5": "bar_checksum"}],
+ "deps": [{"path": "foo", "sha256": "foo_checksum"}],
+ "outs": [{"path": "bar", "sha256": "bar_checksum"}],
}
@@ -35,8 +35,8 @@ def test_fill_from_lock_deps_outs(dvc, lock_data):
StageLoader.fill_from_lock(stage, lock_data)
- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum")
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
def test_fill_from_lock_outs_isexec(dvc):
@@ -48,12 +48,12 @@ def test_fill_from_lock_outs_isexec(dvc):
stage,
{
"cmd": "command",
- "outs": [{"path": "foo", "md5": "foo_checksum", "isexec": True}],
+ "outs": [{"path": "foo", "sha256": "foo_checksum", "isexec": True}],
},
)
assert stage.outs[0].def_path == "foo"
- assert stage.outs[0].hash_info == HashInfo("md5", "foo_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "foo_checksum")
assert stage.outs[0].meta.isexec
@@ -118,8 +118,8 @@ def test_fill_from_lock_missing_checksums(dvc, lock_data):
StageLoader.fill_from_lock(stage, lock_data)
- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum")
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
assert not stage.deps[1].hash_info and not stage.outs[1].hash_info
@@ -134,7 +134,7 @@ def test_fill_from_lock_use_appropriate_checksum(dvc, lock_data):
lock_data["deps"] = [{"path": "s3://dvc-temp/foo", "etag": "e-tag"}]
StageLoader.fill_from_lock(stage, lock_data)
assert stage.deps[0].hash_info == HashInfo("etag", "e-tag")
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
def test_fill_from_lock_with_missing_sections(dvc, lock_data):
@@ -145,12 +145,12 @@ def test_fill_from_lock_with_missing_sections(dvc, lock_data):
del lock["deps"]
StageLoader.fill_from_lock(stage, lock)
assert not stage.deps[0].hash_info
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
lock = deepcopy(lock_data)
del lock["outs"]
StageLoader.fill_from_lock(stage, lock)
- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum")
+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum")
assert not stage.outs[0].hash_info
@@ -173,9 +173,9 @@ def test_load_stage(dvc, stage_data, lock_data):
assert stage.cmd == "command"
assert stage.path == os.path.abspath(PIPELINE_FILE)
assert stage.deps[0].def_path == "foo"
- assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum")
+ assert stage.deps[0].hash_info == HashInfo("sha256", "foo_checksum")
assert stage.outs[0].def_path == "bar"
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
def test_load_stage_cmd_with_list(dvc, stage_data, lock_data):
@@ -210,8 +210,8 @@ def test_load_stage_with_params(dvc, stage_data, lock_data):
assert deps[0].def_path == "foo" and stage.outs[0].def_path == "bar"
assert params[0].def_path == "params.yaml"
assert params[0].hash_info == HashInfo("params", {"lorem": "ipsum"})
- assert deps[0].hash_info == HashInfo("md5", "foo_checksum")
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert deps[0].hash_info == HashInfo("sha256", "foo_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
@pytest.mark.parametrize("typ", ["metrics", "plots"])
@@ -221,7 +221,7 @@ def test_load_stage_with_metrics_and_plots(dvc, stage_data, lock_data, typ):
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
assert stage.outs[0].def_path == "bar"
- assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
+ assert stage.outs[0].hash_info == HashInfo("sha256", "bar_checksum")
def test_load_changed_command(dvc, stage_data, lock_data):
diff --git a/tests/unit/stage/test_serialize_pipeline_lock.py b/tests/unit/stage/test_serialize_pipeline_lock.py
2022-12-17 14:52:18 +02:00
index c20fc19f..36846511 100644
2022-08-05 15:27:14 +03:00
--- a/tests/unit/stage/test_serialize_pipeline_lock.py
+++ b/tests/unit/stage/test_serialize_pipeline_lock.py
@@ -31,11 +31,11 @@ def test_lock(dvc):
def test_lock_deps(dvc):
stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs)
- stage.deps[0].hash_info = HashInfo("md5", "md-five")
+ stage.deps[0].hash_info = HashInfo("sha256", "md-five")
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
- ("deps", [OrderedDict([("path", "input"), ("md5", "md-five")])]),
+ ("deps", [OrderedDict([("path", "input"), ("sha256", "md-five")])]),
]
)
@@ -44,16 +44,16 @@ def test_lock_deps_order(dvc):
stage = create_stage(
PipelineStage, dvc, deps=["input1", "input0"], **kwargs
)
- stage.deps[0].hash_info = HashInfo("md5", "md-one1")
- stage.deps[1].hash_info = HashInfo("md5", "md-zer0")
+ stage.deps[0].hash_info = HashInfo("sha256", "md-one1")
+ stage.deps[1].hash_info = HashInfo("sha256", "md-zer0")
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
(
"deps",
[
- OrderedDict([("path", "input0"), ("md5", "md-zer0")]),
- OrderedDict([("path", "input1"), ("md5", "md-one1")]),
+ OrderedDict([("path", "input0"), ("sha256", "md-zer0")]),
+ OrderedDict([("path", "input1"), ("sha256", "md-one1")]),
],
),
]
2022-12-17 14:52:18 +02:00
@@ -142,11 +142,11 @@ def test_lock_params_without_targets(dvc, info, expected):
2022-08-05 15:27:14 +03:00
@pytest.mark.parametrize("typ", ["plots", "metrics", "outs"])
def test_lock_outs(dvc, typ):
stage = create_stage(PipelineStage, dvc, **{typ: ["input"]}, **kwargs)
- stage.outs[0].hash_info = HashInfo("md5", "md-five")
+ stage.outs[0].hash_info = HashInfo("sha256", "md-five")
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
- ("outs", [OrderedDict([("path", "input"), ("md5", "md-five")])]),
+ ("outs", [OrderedDict([("path", "input"), ("sha256", "md-five")])]),
]
)
2022-12-17 14:52:18 +02:00
@@ -154,7 +154,7 @@ def test_lock_outs(dvc, typ):
2022-08-05 15:27:14 +03:00
@pytest.mark.parametrize("typ", ["plots", "metrics", "outs"])
def test_lock_outs_isexec(dvc, typ):
stage = create_stage(PipelineStage, dvc, **{typ: ["input"]}, **kwargs)
- stage.outs[0].hash_info = HashInfo("md5", "md-five")
+ stage.outs[0].hash_info = HashInfo("sha256", "md-five")
stage.outs[0].meta.isexec = True
assert to_single_stage_lockfile(stage) == OrderedDict(
[
2022-12-17 14:52:18 +02:00
@@ -165,7 +165,7 @@ def test_lock_outs_isexec(dvc, typ):
2022-08-05 15:27:14 +03:00
OrderedDict(
[
("path", "input"),
- ("md5", "md-five"),
+ ("sha256", "md-five"),
("isexec", True),
]
)
2022-12-17 14:52:18 +02:00
@@ -180,16 +180,16 @@ def test_lock_outs_order(dvc, typ):
2022-08-05 15:27:14 +03:00
stage = create_stage(
PipelineStage, dvc, **{typ: ["input1", "input0"]}, **kwargs
)
- stage.outs[0].hash_info = HashInfo("md5", "md-one1")
- stage.outs[1].hash_info = HashInfo("md5", "md-zer0")
+ stage.outs[0].hash_info = HashInfo("sha256", "md-one1")
+ stage.outs[1].hash_info = HashInfo("sha256", "md-zer0")
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
(
"outs",
[
- OrderedDict([("path", "input0"), ("md5", "md-zer0")]),
- OrderedDict([("path", "input1"), ("md5", "md-one1")]),
+ OrderedDict([("path", "input0"), ("sha256", "md-zer0")]),
+ OrderedDict([("path", "input1"), ("sha256", "md-one1")]),
],
),
]
2022-12-17 14:52:18 +02:00
@@ -200,7 +200,7 @@ def test_dump_nondefault_hash(dvc):
2022-08-05 15:27:14 +03:00
stage = create_stage(
PipelineStage, dvc, deps=["s3://dvc-temp/file"], **kwargs
)
- stage.deps[0].hash_info = HashInfo("md5", "value")
+ stage.deps[0].hash_info = HashInfo("sha256", "value")
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
2022-12-17 14:52:18 +02:00
@@ -208,7 +208,7 @@ def test_dump_nondefault_hash(dvc):
2022-08-05 15:27:14 +03:00
"deps",
[
OrderedDict(
- [("path", "s3://dvc-temp/file"), ("md5", "value")]
+ [("path", "s3://dvc-temp/file"), ("sha256", "value")]
)
],
),
2022-12-17 14:52:18 +02:00
@@ -227,23 +227,23 @@ def test_order(dvc):
2022-08-05 15:27:14 +03:00
)
params, deps = split_params_deps(stage)
- deps[0].hash_info = HashInfo("md5", "md-five")
+ deps[0].hash_info = HashInfo("sha256", "md-five")
params[0].hash_info = HashInfo("params", {"foo-param": "value"})
- stage.outs[0].hash_info = HashInfo("md5", "md5-output")
+ stage.outs[0].hash_info = HashInfo("sha256", "sha256-output")
assert to_single_stage_lockfile(stage) == OrderedDict(
[
("cmd", "command"),
- ("deps", [{"path": "input", "md5": "md-five"}]),
+ ("deps", [{"path": "input", "sha256": "md-five"}]),
("params", {"params.yaml": {"foo-param": "value"}}),
- ("outs", [{"path": "output", "md5": "md5-output"}]),
+ ("outs", [{"path": "output", "sha256": "sha256-output"}]),
]
)
def test_to_lockfile(dvc):
stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs)
- stage.deps[0].hash_info = HashInfo("md5", "md-five")
+ stage.deps[0].hash_info = HashInfo("sha256", "md-five")
entry = to_lockfile(stage)
assert len(entry) == 1
_Schema(LOCKFILE_STAGES_SCHEMA)(entry)
2022-12-17 14:52:18 +02:00
@@ -251,7 +251,7 @@ def test_to_lockfile(dvc):
2022-08-05 15:27:14 +03:00
"something": OrderedDict(
[
("cmd", "command"),
- ("deps", [{"path": "input", "md5": "md-five"}]),
+ ("deps", [{"path": "input", "sha256": "md-five"}]),
]
)
}
diff --git a/tests/unit/stage/test_stage.py b/tests/unit/stage/test_stage.py
index f564448a..fb6ac3d2 100644
--- a/tests/unit/stage/test_stage.py
+++ b/tests/unit/stage/test_stage.py
@@ -10,10 +10,10 @@ from dvc.stage import Stage
from dvc.stage.exceptions import StageUpdateError
TEST_STAGE_DICT = {
- "md5": "123456",
+ "sha256": "123456",
"cmd": "mycmd",
- "outs": [{"path": "a", "md5": "123456789"}],
- "deps": [{"path": "b", "md5": "987654321"}],
+ "outs": [{"path": "a", "sha256": "123456789"}],
+ "deps": [{"path": "b", "sha256": "987654321"}],
}
@@ -21,7 +21,7 @@ def test_stage_checksum(mocker):
stage = Stage(None, "path", cmd="mycmd")
mocker.patch.object(stage, "dumpd", return_value=TEST_STAGE_DICT)
- assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b"
+ assert stage.compute_sha256() == "e9521a22111493406ea64a88cda63e0b"
def test_wdir_default_ignored(mocker):
@@ -29,7 +29,7 @@ def test_wdir_default_ignored(mocker):
d = dict(TEST_STAGE_DICT, wdir=".")
mocker.patch.object(stage, "dumpd", return_value=d)
- assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b"
+ assert stage.compute_sha256() == "e9521a22111493406ea64a88cda63e0b"
def test_wdir_non_default_is_not_ignored(mocker):
@@ -37,7 +37,7 @@ def test_wdir_non_default_is_not_ignored(mocker):
d = dict(TEST_STAGE_DICT, wdir="..")
mocker.patch.object(stage, "dumpd", return_value=d)
- assert stage.compute_md5() == "2ceba15e87f6848aa756502c1e6d24e9"
+ assert stage.compute_sha256() == "2ceba15e87f6848aa756502c1e6d24e9"
def test_meta_ignored(mocker):
@@ -45,7 +45,7 @@ def test_meta_ignored(mocker):
d = dict(TEST_STAGE_DICT, meta={"author": "Suor"})
mocker.patch.object(stage, "dumpd", return_value=d)
- assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b"
+ assert stage.compute_sha256() == "e9521a22111493406ea64a88cda63e0b"
def test_path_conversion(dvc):
2022-12-17 14:52:18 +02:00
diff --git a/tests/unit/test_hashinfo.py b/tests/unit/test_hashinfo.py
index c7da09f3..776cb371 100644
--- a/tests/unit/test_hashinfo.py
+++ b/tests/unit/test_hashinfo.py
@@ -3,15 +3,15 @@ from dvc_data.hashfile.hash_info import HashInfo
def test_as_raw():
hash_info = HashInfo(
- "md5", "a1d0c6e83f027327d8461063f4ac58a6.dir", "objname"
+ "sha256", "a1d0c6e83f027327d8461063f4ac58a6.dir", "objname"
)
raw = hash_info.as_raw()
- assert hash_info.name == "md5"
+ assert hash_info.name == "sha256"
assert hash_info.value == "a1d0c6e83f027327d8461063f4ac58a6.dir"
assert hash_info.obj_name == "objname"
- assert raw.name == "md5"
+ assert raw.name == "sha256"
assert raw.value == "a1d0c6e83f027327d8461063f4ac58a6"
assert raw.obj_name == "objname"
2022-08-05 15:27:14 +03:00
diff --git a/tests/unit/test_lockfile.py b/tests/unit/test_lockfile.py
index ff42a775..831f9c45 100644
--- a/tests/unit/test_lockfile.py
+++ b/tests/unit/test_lockfile.py
@@ -31,8 +31,8 @@ def test_stage_dump_with_deps_and_outs(tmp_dir, dvc):
data = {
"s1": {
"cmd": "command",
- "deps": [{"md5": "1.txt", "path": "checksum"}],
- "outs": [{"md5": "2.txt", "path": "checksum"}],
+ "deps": [{"sha256": "1.txt", "path": "checksum"}],
+ "outs": [{"sha256": "2.txt", "path": "checksum"}],
}
}
(tmp_dir / "path.lock").dump(data)
@@ -70,11 +70,11 @@ def test_load_when_lockfile_does_not_exist(tmp_dir, dvc):
"s1": {
"cmd": "command",
"outs": [
- {"md5": "checksum", "path": "path", "random": "value"}
+ {"sha256": "checksum", "path": "path", "random": "value"}
],
}
},
- {"s1": {"cmd": "command", "deps": [{"md5": "checksum"}]}},
+ {"s1": {"cmd": "command", "deps": [{"sha256": "checksum"}]}},
],
)
def test_load_when_lockfile_is_corrupted(tmp_dir, dvc, corrupt_data):