613 lines
24 KiB
Diff
613 lines
24 KiB
Diff
|
commit d7d093fcb91b0d21faf36dbf62924f23b45abb9b
|
||
|
Author: Max <max@privatevoid.net>
|
||
|
Date: Sat Dec 17 14:23:59 2022 +0100
|
||
|
|
||
|
md5 to sha256 for 2.17.0
|
||
|
|
||
|
diff --git a/src/dvc_data/build.py b/src/dvc_data/build.py
|
||
|
index 3656ca5..3837763 100644
|
||
|
--- a/src/dvc_data/build.py
|
||
|
+++ b/src/dvc_data/build.py
|
||
|
@@ -63,7 +63,7 @@ def _build_file(path, fs, name, odb=None, upload_odb=None, dry_run=False):
|
||
|
state = odb.state if odb else None
|
||
|
meta, hash_info = hash_file(path, fs, name, state=state)
|
||
|
if upload_odb and not dry_run:
|
||
|
- assert odb and name == "md5"
|
||
|
+ assert odb and name == "sha256"
|
||
|
return _upload_file(path, fs, odb, upload_odb)
|
||
|
|
||
|
oid = hash_info.value
|
||
|
@@ -195,9 +195,9 @@ def _get_staging(odb: "HashFileDB") -> "ReferenceHashFileDB":
|
||
|
def _build_external_tree_info(odb, tree, name):
|
||
|
# NOTE: used only for external outputs. Initial reasoning was to be
|
||
|
# able to validate .dir files right in the workspace (e.g. check s3
|
||
|
- # etag), but could be dropped for manual validation with regular md5,
|
||
|
+ # etag), but could be dropped for manual validation with regular sha256,
|
||
|
# that would be universal for all clouds.
|
||
|
- assert odb and name != "md5"
|
||
|
+ assert odb and name != "sha256"
|
||
|
|
||
|
oid = tree.hash_info.value
|
||
|
odb.add(tree.path, tree.fs, oid)
|
||
|
@@ -253,7 +253,7 @@ def build(
|
||
|
**kwargs,
|
||
|
)
|
||
|
logger.debug("built tree '%s'", obj)
|
||
|
- if name != "md5":
|
||
|
+ if name != "sha256":
|
||
|
obj = _build_external_tree_info(odb, obj, name)
|
||
|
else:
|
||
|
meta, obj = _build_file(
|
||
|
diff --git a/src/dvc_data/cli.py b/src/dvc_data/cli.py
|
||
|
index 2348875..ece639a 100644
|
||
|
--- a/src/dvc_data/cli.py
|
||
|
+++ b/src/dvc_data/cli.py
|
||
|
@@ -29,8 +29,8 @@ from dvc_data.diff import ROOT
|
||
|
from dvc_data.diff import diff as _diff
|
||
|
from dvc_data.hashfile.db import HashFileDB
|
||
|
from dvc_data.hashfile.hash import algorithms_available
|
||
|
-from dvc_data.hashfile.hash import file_md5 as _file_md5
|
||
|
-from dvc_data.hashfile.hash import fobj_md5 as _fobj_md5
|
||
|
+from dvc_data.hashfile.hash import file_sha256 as _file_sha256
|
||
|
+from dvc_data.hashfile.hash import fobj_sha256 as _fobj_sha256
|
||
|
from dvc_data.hashfile.hash_info import HashInfo
|
||
|
from dvc_data.hashfile.obj import HashFile
|
||
|
from dvc_data.hashfile.state import State
|
||
|
@@ -93,7 +93,7 @@ app = Application(
|
||
|
@app.command(name="hash", help="Compute checksum of the file")
|
||
|
def hash_file(
|
||
|
file: Path = file_type,
|
||
|
- name: HashEnum = typer.Option("md5", "-n", "--name"),
|
||
|
+ name: HashEnum = typer.Option("sha256", "-n", "--name"),
|
||
|
progress: bool = typer.Option(False, "--progress", "-p"),
|
||
|
text: Optional[bool] = typer.Option(None, "--text/--binary", "-t/-b"),
|
||
|
):
|
||
|
@@ -108,9 +108,9 @@ def hash_file(
|
||
|
with callback:
|
||
|
if path == "-":
|
||
|
fobj = callback.wrap_attr(sys.stdin.buffer)
|
||
|
- hash_value = _fobj_md5(fobj, text=text, name=hash_name)
|
||
|
+ hash_value = _fobj_sha256(fobj, text=text, name=hash_name)
|
||
|
else:
|
||
|
- hash_value = _file_md5(
|
||
|
+ hash_value = _file_sha256(
|
||
|
path, name=hash_name, callback=callback, text=text
|
||
|
)
|
||
|
print(hash_name, hash_value, sep=": ")
|
||
|
@@ -262,7 +262,7 @@ def build(
|
||
|
fs = MemoryFileSystem()
|
||
|
fs.put_file(sys.stdin.buffer, fs_path)
|
||
|
|
||
|
- object_store, _, obj = _build(odb, fs_path, fs, name="md5")
|
||
|
+ object_store, _, obj = _build(odb, fs_path, fs, name="sha256")
|
||
|
if write:
|
||
|
_transfer(
|
||
|
object_store,
|
||
|
@@ -285,7 +285,7 @@ def ls(oid: str = typer.Argument(..., allow_dash=True)):
|
||
|
odb = get_odb()
|
||
|
oid = from_shortoid(odb, oid)
|
||
|
try:
|
||
|
- tree = Tree.load(odb, HashInfo("md5", oid))
|
||
|
+ tree = Tree.load(odb, HashInfo("sha256", oid))
|
||
|
except ObjectFormatError as exc:
|
||
|
typer.echo(exc, err=True)
|
||
|
raise typer.Exit(1) from exc
|
||
|
@@ -454,7 +454,7 @@ def apply_op(odb, obj, application):
|
||
|
)
|
||
|
|
||
|
fs = LocalFileSystem()
|
||
|
- _, meta, new_obj = _build(odb, path, fs, "md5")
|
||
|
+ _, meta, new_obj = _build(odb, path, fs, "sha256")
|
||
|
odb.add(path, fs, new_obj.hash_info.value, hardlink=False)
|
||
|
return obj.add(new, meta, new_obj.hash_info)
|
||
|
|
||
|
diff --git a/src/dvc_data/fs.py b/src/dvc_data/fs.py
|
||
|
index c972981..ac45ad3 100644
|
||
|
--- a/src/dvc_data/fs.py
|
||
|
+++ b/src/dvc_data/fs.py
|
||
|
@@ -47,7 +47,7 @@ class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
|
||
|
if info["type"] == "directory":
|
||
|
raise IsADirectoryError
|
||
|
|
||
|
- value = info.get("md5")
|
||
|
+ value = info.get("sha256")
|
||
|
if not value:
|
||
|
raise FileNotFoundError
|
||
|
|
||
|
@@ -142,7 +142,7 @@ class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
|
||
|
|
||
|
def checksum(self, path):
|
||
|
info = self.info(path)
|
||
|
- md5 = info.get("md5")
|
||
|
- if md5:
|
||
|
- return md5
|
||
|
+ sha256 = info.get("sha256")
|
||
|
+ if sha256:
|
||
|
+ return sha256
|
||
|
raise NotImplementedError
|
||
|
diff --git a/src/dvc_data/hashfile/hash.py b/src/dvc_data/hashfile/hash.py
|
||
|
index 9bef01d..03f731c 100644
|
||
|
--- a/src/dvc_data/hashfile/hash.py
|
||
|
+++ b/src/dvc_data/hashfile/hash.py
|
||
|
@@ -42,7 +42,7 @@ class HashStreamFile(io.IOBase):
|
||
|
def __init__(
|
||
|
self,
|
||
|
fobj: BinaryIO,
|
||
|
- hash_name: str = "md5",
|
||
|
+ hash_name: str = "sha256",
|
||
|
text: Optional[bool] = None,
|
||
|
) -> None:
|
||
|
self.fobj = fobj
|
||
|
@@ -77,11 +77,11 @@ class HashStreamFile(io.IOBase):
|
||
|
return self.hasher.name
|
||
|
|
||
|
|
||
|
-def fobj_md5(
|
||
|
+def fobj_sha256(
|
||
|
fobj: BinaryIO,
|
||
|
chunk_size: int = 2**20,
|
||
|
text: Optional[bool] = None,
|
||
|
- name="md5",
|
||
|
+ name="sha256",
|
||
|
) -> str:
|
||
|
# ideally, we want the heuristics to be applied in a similar way,
|
||
|
# regardless of the size of the first chunk,
|
||
|
@@ -95,17 +95,17 @@ def fobj_md5(
|
||
|
return stream.hash_value
|
||
|
|
||
|
|
||
|
-def file_md5(
|
||
|
+def file_sha256(
|
||
|
fname: "AnyFSPath",
|
||
|
fs: "FileSystem" = localfs,
|
||
|
callback: "Callback" = DEFAULT_CALLBACK,
|
||
|
text: Optional[bool] = None,
|
||
|
- name: str = "md5",
|
||
|
+ name: str = "sha256",
|
||
|
) -> str:
|
||
|
size = fs.size(fname) or 0
|
||
|
callback.set_size(size)
|
||
|
with fs.open(fname, "rb") as fobj:
|
||
|
- return fobj_md5(callback.wrap_attr(fobj), text=text, name=name)
|
||
|
+ return fobj_sha256(callback.wrap_attr(fobj), text=text, name=name)
|
||
|
|
||
|
|
||
|
def _adapt_info(info: Dict[str, Any], scheme: str) -> Dict[str, Any]:
|
||
|
@@ -139,8 +139,8 @@ def _hash_file(
|
||
|
func = getattr(fs, name)
|
||
|
return str(func(path)), info
|
||
|
|
||
|
- if name == "md5":
|
||
|
- return file_md5(path, fs, callback=callback), info
|
||
|
+ if name == "sha256":
|
||
|
+ return file_sha256(path, fs, callback=callback), info
|
||
|
raise NotImplementedError
|
||
|
|
||
|
|
||
|
@@ -162,7 +162,7 @@ class LargeFileHashingCallback(TqdmCallback):
|
||
|
if self.size and self.size > self.LARGE_FILE_SIZE:
|
||
|
if not self._logged:
|
||
|
logger.info(
|
||
|
- f"Computing md5 for a large file '{self.fname}'. "
|
||
|
+ f"Computing sha256 for a large file '{self.fname}'. "
|
||
|
"This is only done once."
|
||
|
)
|
||
|
self._logged = True
|
||
|
diff --git a/src/dvc_data/hashfile/utils.py b/src/dvc_data/hashfile/utils.py
|
||
|
index ea2da9c..b1e7726 100644
|
||
|
--- a/src/dvc_data/hashfile/utils.py
|
||
|
+++ b/src/dvc_data/hashfile/utils.py
|
||
|
@@ -38,7 +38,7 @@ def get_mtime_and_size(
|
||
|
|
||
|
# We track file changes and moves, which cannot be detected with simply
|
||
|
# max(mtime(f) for f in non_ignored_files)
|
||
|
- hasher = hashlib.md5()
|
||
|
+ hasher = hashlib.sha256()
|
||
|
hasher.update(json.dumps(files_mtimes, sort_keys=True).encode("utf-8"))
|
||
|
mtime = hasher.hexdigest()
|
||
|
return mtime, size
|
||
|
diff --git a/src/dvc_data/objects/tree.py b/src/dvc_data/objects/tree.py
|
||
|
index 4f11fa4..7c8b417 100644
|
||
|
--- a/src/dvc_data/objects/tree.py
|
||
|
+++ b/src/dvc_data/objects/tree.py
|
||
|
@@ -81,7 +81,7 @@ class Tree(HashFile):
|
||
|
memfs.pipe_file(path, self.as_bytes())
|
||
|
self.fs = memfs
|
||
|
self.path = path
|
||
|
- _, self.hash_info = hash_file(path, memfs, "md5")
|
||
|
+ _, self.hash_info = hash_file(path, memfs, "sha256")
|
||
|
assert self.hash_info.value
|
||
|
self.hash_info.value += ".dir"
|
||
|
self.oid = self.hash_info.value
|
||
|
diff --git a/tests/hashfile/test_hash.py b/tests/hashfile/test_hash.py
|
||
|
index ca920d8..59bf765 100644
|
||
|
--- a/tests/hashfile/test_hash.py
|
||
|
+++ b/tests/hashfile/test_hash.py
|
||
|
@@ -2,21 +2,21 @@ from os import fspath
|
||
|
|
||
|
from dvc_objects.fs import LocalFileSystem
|
||
|
|
||
|
-from dvc_data.hashfile.hash import file_md5
|
||
|
+from dvc_data.hashfile.hash import file_sha256
|
||
|
|
||
|
|
||
|
-def test_file_md5(tmp_path):
|
||
|
+def test_file_sha256(tmp_path):
|
||
|
foo = tmp_path / "foo"
|
||
|
foo.write_text("foo content", encoding="utf8")
|
||
|
|
||
|
fs = LocalFileSystem()
|
||
|
- assert file_md5(fspath(foo), fs) == file_md5(fspath(foo), fs)
|
||
|
+ assert file_sha256(fspath(foo), fs) == file_sha256(fspath(foo), fs)
|
||
|
|
||
|
|
||
|
-def test_file_md5_crlf(tmp_path):
|
||
|
+def test_file_sha256_crlf(tmp_path):
|
||
|
fs = LocalFileSystem()
|
||
|
cr = tmp_path / "cr"
|
||
|
crlf = tmp_path / "crlf"
|
||
|
cr.write_bytes(b"a\nb\nc")
|
||
|
crlf.write_bytes(b"a\r\nb\r\nc")
|
||
|
- assert file_md5(fspath(cr), fs) == file_md5(fspath(crlf), fs)
|
||
|
+ assert file_sha256(fspath(cr), fs) == file_sha256(fspath(crlf), fs)
|
||
|
diff --git a/tests/hashfile/test_hash_stream.py b/tests/hashfile/test_hash_stream.py
|
||
|
index a003a29..e67b7c1 100644
|
||
|
--- a/tests/hashfile/test_hash_stream.py
|
||
|
+++ b/tests/hashfile/test_hash_stream.py
|
||
|
@@ -3,7 +3,7 @@ from os import fspath
|
||
|
import pytest
|
||
|
from dvc_objects.fs import LocalFileSystem
|
||
|
|
||
|
-from dvc_data.hashfile.hash import HashStreamFile, file_md5
|
||
|
+from dvc_data.hashfile.hash import HashStreamFile, file_sha256
|
||
|
from dvc_data.hashfile.istextfile import DEFAULT_CHUNK_SIZE, istextfile
|
||
|
|
||
|
|
||
|
@@ -23,7 +23,7 @@ def test_hashed_stream_reader(tmp_path):
|
||
|
assert stream_reader.read(1) == b"o"
|
||
|
assert stream_reader.tell() == 3
|
||
|
|
||
|
- hex_digest = file_md5(fspath(foo), LocalFileSystem())
|
||
|
+ hex_digest = file_sha256(fspath(foo), LocalFileSystem())
|
||
|
assert stream_reader.is_text
|
||
|
assert hex_digest == stream_reader.hash_value
|
||
|
|
||
|
@@ -46,7 +46,7 @@ def test_hashed_stream_reader_as_chunks(tmp_path):
|
||
|
|
||
|
assert stream_reader.tell() == actual_size == total_read
|
||
|
|
||
|
- hex_digest = file_md5(fspath(foo), LocalFileSystem())
|
||
|
+ hex_digest = file_sha256(fspath(foo), LocalFileSystem())
|
||
|
assert not stream_reader.is_text
|
||
|
assert hex_digest == stream_reader.hash_value
|
||
|
|
||
|
@@ -68,7 +68,7 @@ def test_hashed_stream_reader_compatibility(tmp_path, contents):
|
||
|
stream_reader.read(chunk_size)
|
||
|
|
||
|
local_fs = LocalFileSystem()
|
||
|
- hex_digest = file_md5(fspath(data), local_fs)
|
||
|
+ hex_digest = file_sha256(fspath(data), local_fs)
|
||
|
|
||
|
assert stream_reader.is_text is istextfile(fspath(data), local_fs)
|
||
|
assert stream_reader.hash_value == hex_digest
|
||
|
diff --git a/tests/hashfile/test_obj.py b/tests/hashfile/test_obj.py
|
||
|
index 01e9fc2..6c47b3c 100644
|
||
|
--- a/tests/hashfile/test_obj.py
|
||
|
+++ b/tests/hashfile/test_obj.py
|
||
|
@@ -3,7 +3,7 @@ from dvc_data.hashfile.obj import HashFile
|
||
|
|
||
|
|
||
|
def test_obj(tmp_upath):
|
||
|
- hash_info = HashInfo("md5", "123456")
|
||
|
+ hash_info = HashInfo("sha256", "123456")
|
||
|
obj = HashFile(tmp_upath, tmp_upath.fs, hash_info)
|
||
|
assert obj.path == tmp_upath
|
||
|
assert obj.fs == tmp_upath.fs
|
||
|
diff --git a/tests/objects/test_tree.py b/tests/objects/test_tree.py
|
||
|
index 6c514ba..611a72f 100644
|
||
|
--- a/tests/objects/test_tree.py
|
||
|
+++ b/tests/objects/test_tree.py
|
||
|
@@ -13,57 +13,57 @@ from dvc_data.objects.tree import Tree, _merge
|
||
|
([], {}),
|
||
|
(
|
||
|
[
|
||
|
- {"md5": "def", "relpath": "zzz"},
|
||
|
- {"md5": "123", "relpath": "foo"},
|
||
|
- {"md5": "abc", "relpath": "aaa"},
|
||
|
- {"md5": "456", "relpath": "bar"},
|
||
|
+ {"sha256": "def", "relpath": "zzz"},
|
||
|
+ {"sha256": "123", "relpath": "foo"},
|
||
|
+ {"sha256": "abc", "relpath": "aaa"},
|
||
|
+ {"sha256": "456", "relpath": "bar"},
|
||
|
],
|
||
|
{
|
||
|
- ("zzz",): (None, HashInfo("md5", "def")),
|
||
|
- ("foo",): (None, HashInfo("md5", "123")),
|
||
|
- ("bar",): (None, HashInfo("md5", "456")),
|
||
|
- ("aaa",): (None, HashInfo("md5", "abc")),
|
||
|
+ ("zzz",): (None, HashInfo("sha256", "def")),
|
||
|
+ ("foo",): (None, HashInfo("sha256", "123")),
|
||
|
+ ("bar",): (None, HashInfo("sha256", "456")),
|
||
|
+ ("aaa",): (None, HashInfo("sha256", "abc")),
|
||
|
},
|
||
|
),
|
||
|
(
|
||
|
[
|
||
|
- {"md5": "123", "relpath": "dir/b"},
|
||
|
- {"md5": "456", "relpath": "dir/z"},
|
||
|
- {"md5": "789", "relpath": "dir/a"},
|
||
|
- {"md5": "abc", "relpath": "b"},
|
||
|
- {"md5": "def", "relpath": "a"},
|
||
|
- {"md5": "ghi", "relpath": "z"},
|
||
|
- {"md5": "jkl", "relpath": "dir/subdir/b"},
|
||
|
- {"md5": "mno", "relpath": "dir/subdir/z"},
|
||
|
- {"md5": "pqr", "relpath": "dir/subdir/a"},
|
||
|
+ {"sha256": "123", "relpath": "dir/b"},
|
||
|
+ {"sha256": "456", "relpath": "dir/z"},
|
||
|
+ {"sha256": "789", "relpath": "dir/a"},
|
||
|
+ {"sha256": "abc", "relpath": "b"},
|
||
|
+ {"sha256": "def", "relpath": "a"},
|
||
|
+ {"sha256": "ghi", "relpath": "z"},
|
||
|
+ {"sha256": "jkl", "relpath": "dir/subdir/b"},
|
||
|
+ {"sha256": "mno", "relpath": "dir/subdir/z"},
|
||
|
+ {"sha256": "pqr", "relpath": "dir/subdir/a"},
|
||
|
],
|
||
|
{
|
||
|
("dir", "b"): (
|
||
|
None,
|
||
|
- HashInfo("md5", "123"),
|
||
|
+ HashInfo("sha256", "123"),
|
||
|
),
|
||
|
("dir", "z"): (
|
||
|
None,
|
||
|
- HashInfo("md5", "456"),
|
||
|
+ HashInfo("sha256", "456"),
|
||
|
),
|
||
|
("dir", "a"): (
|
||
|
None,
|
||
|
- HashInfo("md5", "789"),
|
||
|
+ HashInfo("sha256", "789"),
|
||
|
),
|
||
|
- ("b",): (None, HashInfo("md5", "abc")),
|
||
|
- ("a",): (None, HashInfo("md5", "def")),
|
||
|
- ("z",): (None, HashInfo("md5", "ghi")),
|
||
|
+ ("b",): (None, HashInfo("sha256", "abc")),
|
||
|
+ ("a",): (None, HashInfo("sha256", "def")),
|
||
|
+ ("z",): (None, HashInfo("sha256", "ghi")),
|
||
|
("dir", "subdir", "b"): (
|
||
|
None,
|
||
|
- HashInfo("md5", "jkl"),
|
||
|
+ HashInfo("sha256", "jkl"),
|
||
|
),
|
||
|
("dir", "subdir", "z"): (
|
||
|
None,
|
||
|
- HashInfo("md5", "mno"),
|
||
|
+ HashInfo("sha256", "mno"),
|
||
|
),
|
||
|
("dir", "subdir", "a"): (
|
||
|
None,
|
||
|
- HashInfo("md5", "pqr"),
|
||
|
+ HashInfo("sha256", "pqr"),
|
||
|
),
|
||
|
},
|
||
|
),
|
||
|
@@ -81,19 +81,19 @@ def test_list(lst, trie_dict):
|
||
|
({}, 0),
|
||
|
(
|
||
|
{
|
||
|
- ("a",): (Meta(size=1), HashInfo("md5", "abc")),
|
||
|
- ("b",): (Meta(size=2), HashInfo("md5", "def")),
|
||
|
- ("c",): (Meta(size=3), HashInfo("md5", "ghi")),
|
||
|
- ("dir", "foo"): (Meta(size=4), HashInfo("md5", "jkl")),
|
||
|
- ("dir", "bar"): (Meta(size=5), HashInfo("md5", "mno")),
|
||
|
- ("dir", "baz"): (Meta(size=6), HashInfo("md5", "pqr")),
|
||
|
+ ("a",): (Meta(size=1), HashInfo("sha256", "abc")),
|
||
|
+ ("b",): (Meta(size=2), HashInfo("sha256", "def")),
|
||
|
+ ("c",): (Meta(size=3), HashInfo("sha256", "ghi")),
|
||
|
+ ("dir", "foo"): (Meta(size=4), HashInfo("sha256", "jkl")),
|
||
|
+ ("dir", "bar"): (Meta(size=5), HashInfo("sha256", "mno")),
|
||
|
+ ("dir", "baz"): (Meta(size=6), HashInfo("sha256", "pqr")),
|
||
|
},
|
||
|
6,
|
||
|
),
|
||
|
(
|
||
|
{
|
||
|
- ("a",): (Meta(size=1), HashInfo("md5", "abc")),
|
||
|
- ("b",): (Meta(), HashInfo("md5", "def")),
|
||
|
+ ("a",): (Meta(size=1), HashInfo("sha256", "abc")),
|
||
|
+ ("b",): (Meta(), HashInfo("sha256", "def")),
|
||
|
},
|
||
|
2,
|
||
|
),
|
||
|
@@ -110,15 +110,15 @@ def test_nfiles(trie_dict, nfiles):
|
||
|
[
|
||
|
{},
|
||
|
{
|
||
|
- ("a",): (None, HashInfo("md5", "abc")),
|
||
|
- ("b",): (None, HashInfo("md5", "def")),
|
||
|
- ("c",): (None, HashInfo("md5", "ghi")),
|
||
|
- ("dir", "foo"): (None, HashInfo("md5", "jkl")),
|
||
|
- ("dir", "bar"): (None, HashInfo("md5", "mno")),
|
||
|
- ("dir", "baz"): (None, HashInfo("md5", "pqr")),
|
||
|
- ("dir", "subdir", "1"): (None, HashInfo("md5", "stu")),
|
||
|
- ("dir", "subdir", "2"): (None, HashInfo("md5", "vwx")),
|
||
|
- ("dir", "subdir", "3"): (None, HashInfo("md5", "yz")),
|
||
|
+ ("a",): (None, HashInfo("sha256", "abc")),
|
||
|
+ ("b",): (None, HashInfo("sha256", "def")),
|
||
|
+ ("c",): (None, HashInfo("sha256", "ghi")),
|
||
|
+ ("dir", "foo"): (None, HashInfo("sha256", "jkl")),
|
||
|
+ ("dir", "bar"): (None, HashInfo("sha256", "mno")),
|
||
|
+ ("dir", "baz"): (None, HashInfo("sha256", "pqr")),
|
||
|
+ ("dir", "subdir", "1"): (None, HashInfo("sha256", "stu")),
|
||
|
+ ("dir", "subdir", "2"): (None, HashInfo("sha256", "vwx")),
|
||
|
+ ("dir", "subdir", "3"): (None, HashInfo("sha256", "yz")),
|
||
|
},
|
||
|
],
|
||
|
)
|
||
|
@@ -135,63 +135,63 @@ def test_items(trie_dict):
|
||
|
[
|
||
|
({}, {}, {}, {}),
|
||
|
(
|
||
|
- {("foo",): HashInfo("md5", "123")},
|
||
|
+ {("foo",): HashInfo("sha256", "123")},
|
||
|
{
|
||
|
- ("foo",): HashInfo("md5", "123"),
|
||
|
- ("bar",): HashInfo("md5", "345"),
|
||
|
+ ("foo",): HashInfo("sha256", "123"),
|
||
|
+ ("bar",): HashInfo("sha256", "345"),
|
||
|
},
|
||
|
{
|
||
|
- ("foo",): HashInfo("md5", "123"),
|
||
|
- ("baz",): HashInfo("md5", "678"),
|
||
|
+ ("foo",): HashInfo("sha256", "123"),
|
||
|
+ ("baz",): HashInfo("sha256", "678"),
|
||
|
},
|
||
|
{
|
||
|
- ("foo",): HashInfo("md5", "123"),
|
||
|
- ("bar",): HashInfo("md5", "345"),
|
||
|
- ("baz",): HashInfo("md5", "678"),
|
||
|
+ ("foo",): HashInfo("sha256", "123"),
|
||
|
+ ("bar",): HashInfo("sha256", "345"),
|
||
|
+ ("baz",): HashInfo("sha256", "678"),
|
||
|
},
|
||
|
),
|
||
|
(
|
||
|
{
|
||
|
- ("common",): HashInfo("md5", "123"),
|
||
|
- ("subdir", "foo"): HashInfo("md5", "345"),
|
||
|
+ ("common",): HashInfo("sha256", "123"),
|
||
|
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
||
|
},
|
||
|
{
|
||
|
- ("common",): HashInfo("md5", "123"),
|
||
|
- ("subdir", "foo"): HashInfo("md5", "345"),
|
||
|
- ("subdir", "bar"): HashInfo("md5", "678"),
|
||
|
+ ("common",): HashInfo("sha256", "123"),
|
||
|
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
||
|
+ ("subdir", "bar"): HashInfo("sha256", "678"),
|
||
|
},
|
||
|
{
|
||
|
- ("common",): HashInfo("md5", "123"),
|
||
|
- ("subdir", "foo"): HashInfo("md5", "345"),
|
||
|
- ("subdir", "baz"): HashInfo("md5", "91011"),
|
||
|
+ ("common",): HashInfo("sha256", "123"),
|
||
|
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
||
|
+ ("subdir", "baz"): HashInfo("sha256", "91011"),
|
||
|
},
|
||
|
{
|
||
|
- ("common",): HashInfo("md5", "123"),
|
||
|
- ("subdir", "foo"): HashInfo("md5", "345"),
|
||
|
- ("subdir", "bar"): HashInfo("md5", "678"),
|
||
|
- ("subdir", "baz"): HashInfo("md5", "91011"),
|
||
|
+ ("common",): HashInfo("sha256", "123"),
|
||
|
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
||
|
+ ("subdir", "bar"): HashInfo("sha256", "678"),
|
||
|
+ ("subdir", "baz"): HashInfo("sha256", "91011"),
|
||
|
},
|
||
|
),
|
||
|
(
|
||
|
{},
|
||
|
- {("foo",): HashInfo("md5", "123")},
|
||
|
- {("bar",): HashInfo("md5", "456")},
|
||
|
+ {("foo",): HashInfo("sha256", "123")},
|
||
|
+ {("bar",): HashInfo("sha256", "456")},
|
||
|
{
|
||
|
- ("foo",): HashInfo("md5", "123"),
|
||
|
- ("bar",): HashInfo("md5", "456"),
|
||
|
+ ("foo",): HashInfo("sha256", "123"),
|
||
|
+ ("bar",): HashInfo("sha256", "456"),
|
||
|
},
|
||
|
),
|
||
|
(
|
||
|
{},
|
||
|
{},
|
||
|
- {("bar",): HashInfo("md5", "123")},
|
||
|
- {("bar",): HashInfo("md5", "123")},
|
||
|
+ {("bar",): HashInfo("sha256", "123")},
|
||
|
+ {("bar",): HashInfo("sha256", "123")},
|
||
|
),
|
||
|
(
|
||
|
{},
|
||
|
- {("bar",): HashInfo("md5", "123")},
|
||
|
+ {("bar",): HashInfo("sha256", "123")},
|
||
|
{},
|
||
|
- {("bar",): HashInfo("md5", "123")},
|
||
|
+ {("bar",): HashInfo("sha256", "123")},
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
diff --git a/tests/test_index.py b/tests/test_index.py
|
||
|
index c6404fa..635bf66 100644
|
||
|
--- a/tests/test_index.py
|
||
|
+++ b/tests/test_index.py
|
||
|
@@ -17,8 +17,8 @@ def odb(tmp_upath_factory, as_filesystem):
|
||
|
|
||
|
data = tmp_upath_factory.mktemp() / "data.dir"
|
||
|
data.write_bytes(
|
||
|
- b'[{"md5": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
|
||
|
- b'{"md5": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
|
||
|
+ b'[{"sha256": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
|
||
|
+ b'{"sha256": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
|
||
|
)
|
||
|
|
||
|
bar = tmp_upath_factory.mktemp() / "bar"
|
||
|
@@ -46,13 +46,13 @@ def test_fs(tmp_upath, odb, as_filesystem):
|
||
|
("foo",): DataIndexEntry(
|
||
|
odb=odb,
|
||
|
hash_info=HashInfo(
|
||
|
- name="md5", value="d3b07384d113edec49eaa6238ad5ff00"
|
||
|
+ name="sha256", value="d3b07384d113edec49eaa6238ad5ff00"
|
||
|
),
|
||
|
),
|
||
|
("data",): DataIndexEntry(
|
||
|
odb=odb,
|
||
|
hash_info=HashInfo(
|
||
|
- name="md5",
|
||
|
+ name="sha256",
|
||
|
value="1f69c66028c35037e8bf67e5bc4ceb6a.dir",
|
||
|
),
|
||
|
),
|
||
|
@@ -80,22 +80,22 @@ def test_build(tmp_upath, odb, as_filesystem):
|
||
|
},
|
||
|
)
|
||
|
build(index, tmp_upath, as_filesystem(tmp_upath.fs))
|
||
|
- assert index[("foo",)].hash_info.name == "md5"
|
||
|
+ assert index[("foo",)].hash_info.name == "sha256"
|
||
|
assert (
|
||
|
index[("foo",)].hash_info.value == "d3b07384d113edec49eaa6238ad5ff00"
|
||
|
)
|
||
|
assert index[("foo",)].odb == odb
|
||
|
- assert index[("data",)].hash_info.name == "md5"
|
||
|
+ assert index[("data",)].hash_info.name == "sha256"
|
||
|
assert (
|
||
|
index[("data",)].hash_info.value
|
||
|
== "1f69c66028c35037e8bf67e5bc4ceb6a.dir"
|
||
|
)
|
||
|
- assert index[("data", "bar")].hash_info.name == "md5"
|
||
|
+ assert index[("data", "bar")].hash_info.name == "sha256"
|
||
|
assert (
|
||
|
index[("data", "bar")].hash_info.value
|
||
|
== "c157a79031e1c40f85931829bc5fc552"
|
||
|
)
|
||
|
- assert index[("data", "baz")].hash_info.name == "md5"
|
||
|
+ assert index[("data", "baz")].hash_info.name == "sha256"
|
||
|
assert (
|
||
|
index[("data", "baz")].hash_info.value
|
||
|
== "258622b1688250cb619f3c9ccaefb7eb"
|
||
|
@@ -108,13 +108,13 @@ def test_checkout(tmp_upath, odb, as_filesystem):
|
||
|
("foo",): DataIndexEntry(
|
||
|
odb=odb,
|
||
|
hash_info=HashInfo(
|
||
|
- name="md5", value="d3b07384d113edec49eaa6238ad5ff00"
|
||
|
+ name="sha256", value="d3b07384d113edec49eaa6238ad5ff00"
|
||
|
),
|
||
|
),
|
||
|
("data",): DataIndexEntry(
|
||
|
odb=odb,
|
||
|
hash_info=HashInfo(
|
||
|
- name="md5",
|
||
|
+ name="sha256",
|
||
|
value="1f69c66028c35037e8bf67e5bc4ceb6a.dir",
|
||
|
),
|
||
|
),
|