Platform 23.11 #96

Merged
max merged 47 commits from platform-23.11 into master 2023-12-05 01:59:28 +02:00
7 changed files with 0 additions and 1002 deletions
Showing only changes of commit 6f54fa16ca - Show all commits

View file

@ -2,57 +2,10 @@ let
tools = import ./lib/tools.nix;
pins = import ./sources;
dvcMd5ToSha256 = old: {
postPatch = (old.postPatch or "") + ''
grep -Rwl md5 | xargs sed -i s/md5/sha256/g
'';
};
dvcYamlToJson = old: {
postPatch = (old.postPatch or "") + ''
grep -Rwl yaml | xargs sed -i s/yaml/json/g
grep -Rwl ruamel.json | xargs sed -i s/ruamel.json/ruamel.yaml/g
'';
};
in with tools;
super: rec {
cachix = patch super.cachix "patches/base/cachix";
dvc = patch (super.dvc.overrideAttrs (old: let
filteredBaseDeps = super.lib.subtractLists [
super.python3Packages.dvc-data
super.python3Packages.dvc-http
] old.propagatedBuildInputs;
baseDeps = filteredBaseDeps ++ [
dvc-data
dvc-http
];
patched = dvcMd5ToSha256 old;
patched' = dvcYamlToJson patched;
in patched' // {
propagatedBuildInputs = with super.python3Packages; baseDeps ++ [
aiobotocore
boto3
(s3fs.overrideAttrs (_: { postPatch = ''
substituteInPlace requirements.txt \
--replace "fsspec==2023.3.0" "fsspec" \
--replace "aiobotocore~=2.1.0" "aiobotocore"
'';
}))
];
})) "patches/base/dvc";
dvc-data = (super.python3Packages.dvc-data.override {
inherit dvc-objects;
}).overrideAttrs dvcMd5ToSha256;
dvc-http = super.python3Packages.dvc-http.override {
inherit dvc-objects;
};
dvc-objects = super.python3Packages.dvc-objects.overrideAttrs dvcMd5ToSha256;
forgejo = patch super.forgejo "patches/base/forgejo";
garage = patch super.garage_0_8 "patches/base/garage";

View file

@ -56,7 +56,6 @@
in {
tools = with flakePkgs; [
agenix
dvc
graf
hci
npins

View file

@ -1,7 +1,6 @@
{
packages = {
cinny = [ "x86_64-linux" ];
dvc = [ "x86_64-linux" ];
hci = [ "x86_64-linux" ];
hydra = [ "x86_64-linux" ];
jellyfin = [ "x86_64-linux" ];

View file

@ -24,9 +24,6 @@
help = pkgs.hugo.meta.description;
command = "exec ${pkgs.hugo}/bin/hugo ${hugoArgsStr} \"$@\"";
};
tools = with self'.packages; [
dvc
];
};
packages.landing = with pkgs; let

View file

@ -1,612 +0,0 @@
commit d7d093fcb91b0d21faf36dbf62924f23b45abb9b
Author: Max <max@privatevoid.net>
Date: Sat Dec 17 14:23:59 2022 +0100
md5 to sha256 for 2.17.0
diff --git a/src/dvc_data/build.py b/src/dvc_data/build.py
index 3656ca5..3837763 100644
--- a/src/dvc_data/build.py
+++ b/src/dvc_data/build.py
@@ -63,7 +63,7 @@ def _build_file(path, fs, name, odb=None, upload_odb=None, dry_run=False):
state = odb.state if odb else None
meta, hash_info = hash_file(path, fs, name, state=state)
if upload_odb and not dry_run:
- assert odb and name == "md5"
+ assert odb and name == "sha256"
return _upload_file(path, fs, odb, upload_odb)
oid = hash_info.value
@@ -195,9 +195,9 @@ def _get_staging(odb: "HashFileDB") -> "ReferenceHashFileDB":
def _build_external_tree_info(odb, tree, name):
# NOTE: used only for external outputs. Initial reasoning was to be
# able to validate .dir files right in the workspace (e.g. check s3
- # etag), but could be dropped for manual validation with regular md5,
+ # etag), but could be dropped for manual validation with regular sha256,
# that would be universal for all clouds.
- assert odb and name != "md5"
+ assert odb and name != "sha256"
oid = tree.hash_info.value
odb.add(tree.path, tree.fs, oid)
@@ -253,7 +253,7 @@ def build(
**kwargs,
)
logger.debug("built tree '%s'", obj)
- if name != "md5":
+ if name != "sha256":
obj = _build_external_tree_info(odb, obj, name)
else:
meta, obj = _build_file(
diff --git a/src/dvc_data/cli.py b/src/dvc_data/cli.py
index 2348875..ece639a 100644
--- a/src/dvc_data/cli.py
+++ b/src/dvc_data/cli.py
@@ -29,8 +29,8 @@ from dvc_data.diff import ROOT
from dvc_data.diff import diff as _diff
from dvc_data.hashfile.db import HashFileDB
from dvc_data.hashfile.hash import algorithms_available
-from dvc_data.hashfile.hash import file_md5 as _file_md5
-from dvc_data.hashfile.hash import fobj_md5 as _fobj_md5
+from dvc_data.hashfile.hash import file_sha256 as _file_sha256
+from dvc_data.hashfile.hash import fobj_sha256 as _fobj_sha256
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.obj import HashFile
from dvc_data.hashfile.state import State
@@ -93,7 +93,7 @@ app = Application(
@app.command(name="hash", help="Compute checksum of the file")
def hash_file(
file: Path = file_type,
- name: HashEnum = typer.Option("md5", "-n", "--name"),
+ name: HashEnum = typer.Option("sha256", "-n", "--name"),
progress: bool = typer.Option(False, "--progress", "-p"),
text: Optional[bool] = typer.Option(None, "--text/--binary", "-t/-b"),
):
@@ -108,9 +108,9 @@ def hash_file(
with callback:
if path == "-":
fobj = callback.wrap_attr(sys.stdin.buffer)
- hash_value = _fobj_md5(fobj, text=text, name=hash_name)
+ hash_value = _fobj_sha256(fobj, text=text, name=hash_name)
else:
- hash_value = _file_md5(
+ hash_value = _file_sha256(
path, name=hash_name, callback=callback, text=text
)
print(hash_name, hash_value, sep=": ")
@@ -262,7 +262,7 @@ def build(
fs = MemoryFileSystem()
fs.put_file(sys.stdin.buffer, fs_path)
- object_store, _, obj = _build(odb, fs_path, fs, name="md5")
+ object_store, _, obj = _build(odb, fs_path, fs, name="sha256")
if write:
_transfer(
object_store,
@@ -285,7 +285,7 @@ def ls(oid: str = typer.Argument(..., allow_dash=True)):
odb = get_odb()
oid = from_shortoid(odb, oid)
try:
- tree = Tree.load(odb, HashInfo("md5", oid))
+ tree = Tree.load(odb, HashInfo("sha256", oid))
except ObjectFormatError as exc:
typer.echo(exc, err=True)
raise typer.Exit(1) from exc
@@ -454,7 +454,7 @@ def apply_op(odb, obj, application):
)
fs = LocalFileSystem()
- _, meta, new_obj = _build(odb, path, fs, "md5")
+ _, meta, new_obj = _build(odb, path, fs, "sha256")
odb.add(path, fs, new_obj.hash_info.value, hardlink=False)
return obj.add(new, meta, new_obj.hash_info)
diff --git a/src/dvc_data/fs.py b/src/dvc_data/fs.py
index c972981..ac45ad3 100644
--- a/src/dvc_data/fs.py
+++ b/src/dvc_data/fs.py
@@ -47,7 +47,7 @@ class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
if info["type"] == "directory":
raise IsADirectoryError
- value = info.get("md5")
+ value = info.get("sha256")
if not value:
raise FileNotFoundError
@@ -142,7 +142,7 @@ class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
def checksum(self, path):
info = self.info(path)
- md5 = info.get("md5")
- if md5:
- return md5
+ sha256 = info.get("sha256")
+ if sha256:
+ return sha256
raise NotImplementedError
diff --git a/src/dvc_data/hashfile/hash.py b/src/dvc_data/hashfile/hash.py
index 9bef01d..03f731c 100644
--- a/src/dvc_data/hashfile/hash.py
+++ b/src/dvc_data/hashfile/hash.py
@@ -42,7 +42,7 @@ class HashStreamFile(io.IOBase):
def __init__(
self,
fobj: BinaryIO,
- hash_name: str = "md5",
+ hash_name: str = "sha256",
text: Optional[bool] = None,
) -> None:
self.fobj = fobj
@@ -77,11 +77,11 @@ class HashStreamFile(io.IOBase):
return self.hasher.name
-def fobj_md5(
+def fobj_sha256(
fobj: BinaryIO,
chunk_size: int = 2**20,
text: Optional[bool] = None,
- name="md5",
+ name="sha256",
) -> str:
# ideally, we want the heuristics to be applied in a similar way,
# regardless of the size of the first chunk,
@@ -95,17 +95,17 @@ def fobj_md5(
return stream.hash_value
-def file_md5(
+def file_sha256(
fname: "AnyFSPath",
fs: "FileSystem" = localfs,
callback: "Callback" = DEFAULT_CALLBACK,
text: Optional[bool] = None,
- name: str = "md5",
+ name: str = "sha256",
) -> str:
size = fs.size(fname) or 0
callback.set_size(size)
with fs.open(fname, "rb") as fobj:
- return fobj_md5(callback.wrap_attr(fobj), text=text, name=name)
+ return fobj_sha256(callback.wrap_attr(fobj), text=text, name=name)
def _adapt_info(info: Dict[str, Any], scheme: str) -> Dict[str, Any]:
@@ -139,8 +139,8 @@ def _hash_file(
func = getattr(fs, name)
return str(func(path)), info
- if name == "md5":
- return file_md5(path, fs, callback=callback), info
+ if name == "sha256":
+ return file_sha256(path, fs, callback=callback), info
raise NotImplementedError
@@ -162,7 +162,7 @@ class LargeFileHashingCallback(TqdmCallback):
if self.size and self.size > self.LARGE_FILE_SIZE:
if not self._logged:
logger.info(
- f"Computing md5 for a large file '{self.fname}'. "
+ f"Computing sha256 for a large file '{self.fname}'. "
"This is only done once."
)
self._logged = True
diff --git a/src/dvc_data/hashfile/utils.py b/src/dvc_data/hashfile/utils.py
index ea2da9c..b1e7726 100644
--- a/src/dvc_data/hashfile/utils.py
+++ b/src/dvc_data/hashfile/utils.py
@@ -38,7 +38,7 @@ def get_mtime_and_size(
# We track file changes and moves, which cannot be detected with simply
# max(mtime(f) for f in non_ignored_files)
- hasher = hashlib.md5()
+ hasher = hashlib.sha256()
hasher.update(json.dumps(files_mtimes, sort_keys=True).encode("utf-8"))
mtime = hasher.hexdigest()
return mtime, size
diff --git a/src/dvc_data/objects/tree.py b/src/dvc_data/objects/tree.py
index 4f11fa4..7c8b417 100644
--- a/src/dvc_data/objects/tree.py
+++ b/src/dvc_data/objects/tree.py
@@ -81,7 +81,7 @@ class Tree(HashFile):
memfs.pipe_file(path, self.as_bytes())
self.fs = memfs
self.path = path
- _, self.hash_info = hash_file(path, memfs, "md5")
+ _, self.hash_info = hash_file(path, memfs, "sha256")
assert self.hash_info.value
self.hash_info.value += ".dir"
self.oid = self.hash_info.value
diff --git a/tests/hashfile/test_hash.py b/tests/hashfile/test_hash.py
index ca920d8..59bf765 100644
--- a/tests/hashfile/test_hash.py
+++ b/tests/hashfile/test_hash.py
@@ -2,21 +2,21 @@ from os import fspath
from dvc_objects.fs import LocalFileSystem
-from dvc_data.hashfile.hash import file_md5
+from dvc_data.hashfile.hash import file_sha256
-def test_file_md5(tmp_path):
+def test_file_sha256(tmp_path):
foo = tmp_path / "foo"
foo.write_text("foo content", encoding="utf8")
fs = LocalFileSystem()
- assert file_md5(fspath(foo), fs) == file_md5(fspath(foo), fs)
+ assert file_sha256(fspath(foo), fs) == file_sha256(fspath(foo), fs)
-def test_file_md5_crlf(tmp_path):
+def test_file_sha256_crlf(tmp_path):
fs = LocalFileSystem()
cr = tmp_path / "cr"
crlf = tmp_path / "crlf"
cr.write_bytes(b"a\nb\nc")
crlf.write_bytes(b"a\r\nb\r\nc")
- assert file_md5(fspath(cr), fs) == file_md5(fspath(crlf), fs)
+ assert file_sha256(fspath(cr), fs) == file_sha256(fspath(crlf), fs)
diff --git a/tests/hashfile/test_hash_stream.py b/tests/hashfile/test_hash_stream.py
index a003a29..e67b7c1 100644
--- a/tests/hashfile/test_hash_stream.py
+++ b/tests/hashfile/test_hash_stream.py
@@ -3,7 +3,7 @@ from os import fspath
import pytest
from dvc_objects.fs import LocalFileSystem
-from dvc_data.hashfile.hash import HashStreamFile, file_md5
+from dvc_data.hashfile.hash import HashStreamFile, file_sha256
from dvc_data.hashfile.istextfile import DEFAULT_CHUNK_SIZE, istextfile
@@ -23,7 +23,7 @@ def test_hashed_stream_reader(tmp_path):
assert stream_reader.read(1) == b"o"
assert stream_reader.tell() == 3
- hex_digest = file_md5(fspath(foo), LocalFileSystem())
+ hex_digest = file_sha256(fspath(foo), LocalFileSystem())
assert stream_reader.is_text
assert hex_digest == stream_reader.hash_value
@@ -46,7 +46,7 @@ def test_hashed_stream_reader_as_chunks(tmp_path):
assert stream_reader.tell() == actual_size == total_read
- hex_digest = file_md5(fspath(foo), LocalFileSystem())
+ hex_digest = file_sha256(fspath(foo), LocalFileSystem())
assert not stream_reader.is_text
assert hex_digest == stream_reader.hash_value
@@ -68,7 +68,7 @@ def test_hashed_stream_reader_compatibility(tmp_path, contents):
stream_reader.read(chunk_size)
local_fs = LocalFileSystem()
- hex_digest = file_md5(fspath(data), local_fs)
+ hex_digest = file_sha256(fspath(data), local_fs)
assert stream_reader.is_text is istextfile(fspath(data), local_fs)
assert stream_reader.hash_value == hex_digest
diff --git a/tests/hashfile/test_obj.py b/tests/hashfile/test_obj.py
index 01e9fc2..6c47b3c 100644
--- a/tests/hashfile/test_obj.py
+++ b/tests/hashfile/test_obj.py
@@ -3,7 +3,7 @@ from dvc_data.hashfile.obj import HashFile
def test_obj(tmp_upath):
- hash_info = HashInfo("md5", "123456")
+ hash_info = HashInfo("sha256", "123456")
obj = HashFile(tmp_upath, tmp_upath.fs, hash_info)
assert obj.path == tmp_upath
assert obj.fs == tmp_upath.fs
diff --git a/tests/objects/test_tree.py b/tests/objects/test_tree.py
index 6c514ba..611a72f 100644
--- a/tests/objects/test_tree.py
+++ b/tests/objects/test_tree.py
@@ -13,57 +13,57 @@ from dvc_data.objects.tree import Tree, _merge
([], {}),
(
[
- {"md5": "def", "relpath": "zzz"},
- {"md5": "123", "relpath": "foo"},
- {"md5": "abc", "relpath": "aaa"},
- {"md5": "456", "relpath": "bar"},
+ {"sha256": "def", "relpath": "zzz"},
+ {"sha256": "123", "relpath": "foo"},
+ {"sha256": "abc", "relpath": "aaa"},
+ {"sha256": "456", "relpath": "bar"},
],
{
- ("zzz",): (None, HashInfo("md5", "def")),
- ("foo",): (None, HashInfo("md5", "123")),
- ("bar",): (None, HashInfo("md5", "456")),
- ("aaa",): (None, HashInfo("md5", "abc")),
+ ("zzz",): (None, HashInfo("sha256", "def")),
+ ("foo",): (None, HashInfo("sha256", "123")),
+ ("bar",): (None, HashInfo("sha256", "456")),
+ ("aaa",): (None, HashInfo("sha256", "abc")),
},
),
(
[
- {"md5": "123", "relpath": "dir/b"},
- {"md5": "456", "relpath": "dir/z"},
- {"md5": "789", "relpath": "dir/a"},
- {"md5": "abc", "relpath": "b"},
- {"md5": "def", "relpath": "a"},
- {"md5": "ghi", "relpath": "z"},
- {"md5": "jkl", "relpath": "dir/subdir/b"},
- {"md5": "mno", "relpath": "dir/subdir/z"},
- {"md5": "pqr", "relpath": "dir/subdir/a"},
+ {"sha256": "123", "relpath": "dir/b"},
+ {"sha256": "456", "relpath": "dir/z"},
+ {"sha256": "789", "relpath": "dir/a"},
+ {"sha256": "abc", "relpath": "b"},
+ {"sha256": "def", "relpath": "a"},
+ {"sha256": "ghi", "relpath": "z"},
+ {"sha256": "jkl", "relpath": "dir/subdir/b"},
+ {"sha256": "mno", "relpath": "dir/subdir/z"},
+ {"sha256": "pqr", "relpath": "dir/subdir/a"},
],
{
("dir", "b"): (
None,
- HashInfo("md5", "123"),
+ HashInfo("sha256", "123"),
),
("dir", "z"): (
None,
- HashInfo("md5", "456"),
+ HashInfo("sha256", "456"),
),
("dir", "a"): (
None,
- HashInfo("md5", "789"),
+ HashInfo("sha256", "789"),
),
- ("b",): (None, HashInfo("md5", "abc")),
- ("a",): (None, HashInfo("md5", "def")),
- ("z",): (None, HashInfo("md5", "ghi")),
+ ("b",): (None, HashInfo("sha256", "abc")),
+ ("a",): (None, HashInfo("sha256", "def")),
+ ("z",): (None, HashInfo("sha256", "ghi")),
("dir", "subdir", "b"): (
None,
- HashInfo("md5", "jkl"),
+ HashInfo("sha256", "jkl"),
),
("dir", "subdir", "z"): (
None,
- HashInfo("md5", "mno"),
+ HashInfo("sha256", "mno"),
),
("dir", "subdir", "a"): (
None,
- HashInfo("md5", "pqr"),
+ HashInfo("sha256", "pqr"),
),
},
),
@@ -81,19 +81,19 @@ def test_list(lst, trie_dict):
({}, 0),
(
{
- ("a",): (Meta(size=1), HashInfo("md5", "abc")),
- ("b",): (Meta(size=2), HashInfo("md5", "def")),
- ("c",): (Meta(size=3), HashInfo("md5", "ghi")),
- ("dir", "foo"): (Meta(size=4), HashInfo("md5", "jkl")),
- ("dir", "bar"): (Meta(size=5), HashInfo("md5", "mno")),
- ("dir", "baz"): (Meta(size=6), HashInfo("md5", "pqr")),
+ ("a",): (Meta(size=1), HashInfo("sha256", "abc")),
+ ("b",): (Meta(size=2), HashInfo("sha256", "def")),
+ ("c",): (Meta(size=3), HashInfo("sha256", "ghi")),
+ ("dir", "foo"): (Meta(size=4), HashInfo("sha256", "jkl")),
+ ("dir", "bar"): (Meta(size=5), HashInfo("sha256", "mno")),
+ ("dir", "baz"): (Meta(size=6), HashInfo("sha256", "pqr")),
},
6,
),
(
{
- ("a",): (Meta(size=1), HashInfo("md5", "abc")),
- ("b",): (Meta(), HashInfo("md5", "def")),
+ ("a",): (Meta(size=1), HashInfo("sha256", "abc")),
+ ("b",): (Meta(), HashInfo("sha256", "def")),
},
2,
),
@@ -110,15 +110,15 @@ def test_nfiles(trie_dict, nfiles):
[
{},
{
- ("a",): (None, HashInfo("md5", "abc")),
- ("b",): (None, HashInfo("md5", "def")),
- ("c",): (None, HashInfo("md5", "ghi")),
- ("dir", "foo"): (None, HashInfo("md5", "jkl")),
- ("dir", "bar"): (None, HashInfo("md5", "mno")),
- ("dir", "baz"): (None, HashInfo("md5", "pqr")),
- ("dir", "subdir", "1"): (None, HashInfo("md5", "stu")),
- ("dir", "subdir", "2"): (None, HashInfo("md5", "vwx")),
- ("dir", "subdir", "3"): (None, HashInfo("md5", "yz")),
+ ("a",): (None, HashInfo("sha256", "abc")),
+ ("b",): (None, HashInfo("sha256", "def")),
+ ("c",): (None, HashInfo("sha256", "ghi")),
+ ("dir", "foo"): (None, HashInfo("sha256", "jkl")),
+ ("dir", "bar"): (None, HashInfo("sha256", "mno")),
+ ("dir", "baz"): (None, HashInfo("sha256", "pqr")),
+ ("dir", "subdir", "1"): (None, HashInfo("sha256", "stu")),
+ ("dir", "subdir", "2"): (None, HashInfo("sha256", "vwx")),
+ ("dir", "subdir", "3"): (None, HashInfo("sha256", "yz")),
},
],
)
@@ -135,63 +135,63 @@ def test_items(trie_dict):
[
({}, {}, {}, {}),
(
- {("foo",): HashInfo("md5", "123")},
+ {("foo",): HashInfo("sha256", "123")},
{
- ("foo",): HashInfo("md5", "123"),
- ("bar",): HashInfo("md5", "345"),
+ ("foo",): HashInfo("sha256", "123"),
+ ("bar",): HashInfo("sha256", "345"),
},
{
- ("foo",): HashInfo("md5", "123"),
- ("baz",): HashInfo("md5", "678"),
+ ("foo",): HashInfo("sha256", "123"),
+ ("baz",): HashInfo("sha256", "678"),
},
{
- ("foo",): HashInfo("md5", "123"),
- ("bar",): HashInfo("md5", "345"),
- ("baz",): HashInfo("md5", "678"),
+ ("foo",): HashInfo("sha256", "123"),
+ ("bar",): HashInfo("sha256", "345"),
+ ("baz",): HashInfo("sha256", "678"),
},
),
(
{
- ("common",): HashInfo("md5", "123"),
- ("subdir", "foo"): HashInfo("md5", "345"),
+ ("common",): HashInfo("sha256", "123"),
+ ("subdir", "foo"): HashInfo("sha256", "345"),
},
{
- ("common",): HashInfo("md5", "123"),
- ("subdir", "foo"): HashInfo("md5", "345"),
- ("subdir", "bar"): HashInfo("md5", "678"),
+ ("common",): HashInfo("sha256", "123"),
+ ("subdir", "foo"): HashInfo("sha256", "345"),
+ ("subdir", "bar"): HashInfo("sha256", "678"),
},
{
- ("common",): HashInfo("md5", "123"),
- ("subdir", "foo"): HashInfo("md5", "345"),
- ("subdir", "baz"): HashInfo("md5", "91011"),
+ ("common",): HashInfo("sha256", "123"),
+ ("subdir", "foo"): HashInfo("sha256", "345"),
+ ("subdir", "baz"): HashInfo("sha256", "91011"),
},
{
- ("common",): HashInfo("md5", "123"),
- ("subdir", "foo"): HashInfo("md5", "345"),
- ("subdir", "bar"): HashInfo("md5", "678"),
- ("subdir", "baz"): HashInfo("md5", "91011"),
+ ("common",): HashInfo("sha256", "123"),
+ ("subdir", "foo"): HashInfo("sha256", "345"),
+ ("subdir", "bar"): HashInfo("sha256", "678"),
+ ("subdir", "baz"): HashInfo("sha256", "91011"),
},
),
(
{},
- {("foo",): HashInfo("md5", "123")},
- {("bar",): HashInfo("md5", "456")},
+ {("foo",): HashInfo("sha256", "123")},
+ {("bar",): HashInfo("sha256", "456")},
{
- ("foo",): HashInfo("md5", "123"),
- ("bar",): HashInfo("md5", "456"),
+ ("foo",): HashInfo("sha256", "123"),
+ ("bar",): HashInfo("sha256", "456"),
},
),
(
{},
{},
- {("bar",): HashInfo("md5", "123")},
- {("bar",): HashInfo("md5", "123")},
+ {("bar",): HashInfo("sha256", "123")},
+ {("bar",): HashInfo("sha256", "123")},
),
(
{},
- {("bar",): HashInfo("md5", "123")},
+ {("bar",): HashInfo("sha256", "123")},
{},
- {("bar",): HashInfo("md5", "123")},
+ {("bar",): HashInfo("sha256", "123")},
),
],
)
diff --git a/tests/test_index.py b/tests/test_index.py
index c6404fa..635bf66 100644
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -17,8 +17,8 @@ def odb(tmp_upath_factory, as_filesystem):
data = tmp_upath_factory.mktemp() / "data.dir"
data.write_bytes(
- b'[{"md5": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
- b'{"md5": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
+ b'[{"sha256": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
+ b'{"sha256": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
)
bar = tmp_upath_factory.mktemp() / "bar"
@@ -46,13 +46,13 @@ def test_fs(tmp_upath, odb, as_filesystem):
("foo",): DataIndexEntry(
odb=odb,
hash_info=HashInfo(
- name="md5", value="d3b07384d113edec49eaa6238ad5ff00"
+ name="sha256", value="d3b07384d113edec49eaa6238ad5ff00"
),
),
("data",): DataIndexEntry(
odb=odb,
hash_info=HashInfo(
- name="md5",
+ name="sha256",
value="1f69c66028c35037e8bf67e5bc4ceb6a.dir",
),
),
@@ -80,22 +80,22 @@ def test_build(tmp_upath, odb, as_filesystem):
},
)
build(index, tmp_upath, as_filesystem(tmp_upath.fs))
- assert index[("foo",)].hash_info.name == "md5"
+ assert index[("foo",)].hash_info.name == "sha256"
assert (
index[("foo",)].hash_info.value == "d3b07384d113edec49eaa6238ad5ff00"
)
assert index[("foo",)].odb == odb
- assert index[("data",)].hash_info.name == "md5"
+ assert index[("data",)].hash_info.name == "sha256"
assert (
index[("data",)].hash_info.value
== "1f69c66028c35037e8bf67e5bc4ceb6a.dir"
)
- assert index[("data", "bar")].hash_info.name == "md5"
+ assert index[("data", "bar")].hash_info.name == "sha256"
assert (
index[("data", "bar")].hash_info.value
== "c157a79031e1c40f85931829bc5fc552"
)
- assert index[("data", "baz")].hash_info.name == "md5"
+ assert index[("data", "baz")].hash_info.name == "sha256"
assert (
index[("data", "baz")].hash_info.value
== "258622b1688250cb619f3c9ccaefb7eb"
@@ -108,13 +108,13 @@ def test_checkout(tmp_upath, odb, as_filesystem):
("foo",): DataIndexEntry(
odb=odb,
hash_info=HashInfo(
- name="md5", value="d3b07384d113edec49eaa6238ad5ff00"
+ name="sha256", value="d3b07384d113edec49eaa6238ad5ff00"
),
),
("data",): DataIndexEntry(
odb=odb,
hash_info=HashInfo(
- name="md5",
+ name="sha256",
value="1f69c66028c35037e8bf67e5bc4ceb6a.dir",
),
),

View file

@ -1,71 +0,0 @@
commit 2065fc148ce77be68c95a81a05391e1bb35da79d
Author: Max <max@privatevoid.net>
Date: Sat Dec 17 14:35:20 2022 +0100
md5 to sha256 for 2.17.0
diff --git a/src/dvc_objects/db.py b/src/dvc_objects/db.py
index 0f0ab16..3b87fdb 100644
--- a/src/dvc_objects/db.py
+++ b/src/dvc_objects/db.py
@@ -229,7 +229,7 @@ class ObjectDB:
returned.
NOTE: For large remotes the list of oids will be very
- big(e.g. 100M entries, md5 for each is 32 bytes, so ~3200Mb list)
+ big(e.g. 100M entries, sha256 for each is 32 bytes, so ~3200Mb list)
and we don't really need all of it at the same time, so it makes
sense to use a generator to gradually iterate over it, without
keeping all of it in memory.
diff --git a/src/dvc_objects/fs/__init__.py b/src/dvc_objects/fs/__init__.py
index d236fdc..74db3fe 100644
--- a/src/dvc_objects/fs/__init__.py
+++ b/src/dvc_objects/fs/__init__.py
@@ -62,7 +62,7 @@ def get_fs_cls(remote_conf, cls=None, scheme=None):
def as_filesystem(
fs: "AbstractFileSystem",
- checksum: str = "md5",
+ checksum: str = "sha256",
object_based: bool = False,
**fs_args,
) -> "FileSystem":
diff --git a/src/dvc_objects/fs/implementations/local.py b/src/dvc_objects/fs/implementations/local.py
index 7f888ec..3e1a61a 100644
--- a/src/dvc_objects/fs/implementations/local.py
+++ b/src/dvc_objects/fs/implementations/local.py
@@ -167,7 +167,7 @@ class LocalFileSystem(FileSystem):
sep = os.sep
protocol = "local"
- PARAM_CHECKSUM = "md5"
+ PARAM_CHECKSUM = "sha256"
PARAM_PATH = "path"
TRAVERSE_PREFIX_LEN = 2
diff --git a/src/dvc_objects/fs/implementations/memory.py b/src/dvc_objects/fs/implementations/memory.py
index 97702cb..c5b5ad7 100644
--- a/src/dvc_objects/fs/implementations/memory.py
+++ b/src/dvc_objects/fs/implementations/memory.py
@@ -3,7 +3,7 @@ from ..base import FileSystem
class MemoryFileSystem(FileSystem): # pylint:disable=abstract-method
protocol = "memory"
- PARAM_CHECKSUM = "md5"
+ PARAM_CHECKSUM = "sha256"
def __init__(self, global_store=True, trie_based=False, fs=None, **kwargs):
super().__init__(fs=fs, **kwargs)
diff --git a/src/dvc_objects/fs/implementations/ssh.py b/src/dvc_objects/fs/implementations/ssh.py
index 8b93faf..8aed5e4 100644
--- a/src/dvc_objects/fs/implementations/ssh.py
+++ b/src/dvc_objects/fs/implementations/ssh.py
@@ -24,7 +24,7 @@ def ask_password(host, user, port):
class SSHFileSystem(FileSystem):
protocol = "ssh"
REQUIRES = {"sshfs": "sshfs"}
- PARAM_CHECKSUM = "md5"
+ PARAM_CHECKSUM = "sha256"
@classmethod
def _strip_protocol(cls, path: str) -> str:

View file

@ -1,267 +0,0 @@
diff --git a/dvc/analytics.py b/dvc/analytics.py
deleted file mode 100644
index 6e3dc91..0000000
--- a/dvc/analytics.py
+++ /dev/null
@@ -1,156 +0,0 @@
-import json
-import logging
-import os
-
-from .env import DVC_NO_ANALYTICS
-
-logger = logging.getLogger(__name__)
-
-
-def collect_and_send_report(args=None, return_code=None):
- """
- Collect information from the runtime/environment and the command
- being executed into a report and send it over the network.
-
- To prevent analytics from blocking the execution of the main thread,
- sending the report is done in a separate process.
-
- The inter-process communication happens through a file containing the
- report as a JSON, where the _collector_ generates it and the _sender_
- removes it after sending it.
- """
- import tempfile
-
- from dvc.daemon import daemon
-
- report = {}
-
- # Include command execution information on the report only when available.
- if args and hasattr(args, "func"):
- report.update({"cmd_class": args.func.__name__})
-
- if return_code is not None:
- report.update({"cmd_return_code": return_code})
-
- with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj:
- json.dump(report, fobj)
- daemon(["analytics", fobj.name])
-
-
-def is_enabled():
- from dvc.config import Config, to_bool
- from dvc.utils import env2bool
-
- if env2bool("DVC_TEST"):
- return False
-
- enabled = not os.getenv(DVC_NO_ANALYTICS)
- if enabled:
- enabled = to_bool(
- Config.from_cwd(validate=False).get("core", {}).get("analytics", "true")
- )
-
- logger.debug("Analytics is %sabled.", "en" if enabled else "dis")
-
- return enabled
-
-
-def send(path):
- """
- Side effect: Removes the report after sending it.
-
- The report is generated and stored in a temporary file, see:
- `collect_and_send_report`. Sending happens on another process,
- thus, the need of removing such file afterwards.
- """
- import requests
-
- url = "https://analytics.dvc.org"
- headers = {"content-type": "application/json"}
-
- with open(path, encoding="utf-8") as fobj:
- report = json.load(fobj)
-
- report.update(_runtime_info())
-
- try:
- requests.post(url, json=report, headers=headers, timeout=5)
- except requests.exceptions.RequestException:
- logger.debug("failed to send analytics report", exc_info=True)
-
- os.remove(path)
-
-
-def _scm_in_use():
- from dvc.exceptions import NotDvcRepoError
- from dvc.repo import Repo
- from dvc.scm import NoSCM
-
- from .scm import SCM, SCMError
-
- try:
- scm = SCM(root_dir=Repo.find_root())
- return type(scm).__name__
- except SCMError:
- return NoSCM.__name__
- except NotDvcRepoError:
- pass
-
-
-def _runtime_info():
- """
- Gather information from the environment where DVC runs to fill a report.
- """
- from iterative_telemetry import _generate_ci_id, find_or_create_user_id
-
- from dvc import __version__
- from dvc.utils import is_binary
-
- ci_id = _generate_ci_id()
- if ci_id:
- group_id, user_id = ci_id
- else:
- group_id, user_id = None, find_or_create_user_id()
-
- return {
- "dvc_version": __version__,
- "is_binary": is_binary(),
- "scm_class": _scm_in_use(),
- "system_info": _system_info(),
- "user_id": user_id,
- "group_id": group_id,
- }
-
-
-def _system_info():
- import platform
- import sys
-
- import distro
-
- system = platform.system()
-
- if system == "Windows":
- version = sys.getwindowsversion() # type: ignore[attr-defined]
-
- return {
- "os": "windows",
- "windows_version_build": version.build,
- "windows_version_major": version.major,
- "windows_version_minor": version.minor,
- "windows_version_service_pack": version.service_pack,
- }
-
- if system == "Darwin":
- return {"os": "mac", "mac_version": platform.mac_ver()[0]}
-
- if system == "Linux":
- return {
- "os": "linux",
- "linux_distro": distro.id(),
- "linux_distro_like": distro.like(),
- "linux_distro_version": distro.version(),
- }
-
- # We don't collect data for any other system.
- raise NotImplementedError
diff --git a/dvc/cli/__init__.py b/dvc/cli/__init__.py
index 274b564..b601d84 100644
--- a/dvc/cli/__init__.py
+++ b/dvc/cli/__init__.py
@@ -236,11 +236,6 @@ def main(argv=None): # noqa: C901, PLR0912, PLR0915
ret = _log_exceptions(exc) or 255
try:
- from dvc import analytics
-
- if analytics.is_enabled():
- analytics.collect_and_send_report(args, ret)
-
return ret
finally:
logger.setLevel(outer_log_level)
diff --git a/dvc/commands/daemon.py b/dvc/commands/daemon.py
index 35d6e90..d5a7b6e 100644
--- a/dvc/commands/daemon.py
+++ b/dvc/commands/daemon.py
@@ -26,15 +26,6 @@ class CmdDaemonUpdater(CmdDaemonBase):
return 0
-class CmdDaemonAnalytics(CmdDaemonBase):
- def run(self):
- from dvc import analytics
-
- analytics.send(self.args.target)
-
- return 0
-
-
def add_parser(subparsers, parent_parser):
DAEMON_HELP = "Service daemon."
daemon_parser = subparsers.add_parser(
@@ -59,15 +50,3 @@ def add_parser(subparsers, parent_parser):
help=DAEMON_UPDATER_HELP,
)
daemon_updater_parser.set_defaults(func=CmdDaemonUpdater)
-
- DAEMON_ANALYTICS_HELP = "Send dvc usage analytics."
- daemon_analytics_parser = daemon_subparsers.add_parser(
- "analytics",
- parents=[parent_parser],
- description=DAEMON_ANALYTICS_HELP,
- help=DAEMON_ANALYTICS_HELP,
- )
- daemon_analytics_parser.add_argument(
- "target", help="Analytics file."
- ).complete = completion.FILE
- daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics)
diff --git a/dvc/commands/init.py b/dvc/commands/init.py
index ca44919..05730aa 100644
--- a/dvc/commands/init.py
+++ b/dvc/commands/init.py
@@ -3,7 +3,6 @@ import logging
import colorama
-from dvc import analytics
from dvc.cli.command import CmdBaseNoRepo
from dvc.cli.utils import append_doc_link
from dvc.utils import boxify
@@ -15,16 +14,6 @@ logger = logging.getLogger(__name__)
def _welcome_message():
from dvc.ui import ui
- if analytics.is_enabled():
- ui.write(
- boxify(
- "DVC has enabled anonymous aggregate usage analytics.\n"
- "Read the analytics documentation (and how to opt-out) here:\n"
- + fmt_link("https://dvc.org/doc/user-guide/analytics"),
- border_color="red",
- )
- )
-
msg = (
"{yellow}What's next?{nc}\n"
"{yellow}------------{nc}\n"
diff --git a/dvc/config_schema.py b/dvc/config_schema.py
index 2e36e90..3d9e402 100644
--- a/dvc/config_schema.py
+++ b/dvc/config_schema.py
@@ -144,7 +144,6 @@ SCHEMA = {
"remote": Lower,
"checksum_jobs": All(Coerce(int), Range(1)),
Optional("interactive", default=False): Bool,
- Optional("analytics", default=True): Bool,
Optional("hardlink_lock", default=False): Bool,
Optional("no_scm", default=False): Bool,
Optional("autostage", default=False): Bool,
diff --git a/dvc/env.py b/dvc/env.py
index 081ec9d..06c1332 100644
--- a/dvc/env.py
+++ b/dvc/env.py
@@ -7,7 +7,6 @@ DVC_EXP_GIT_REMOTE = "DVC_EXP_GIT_REMOTE"
DVC_EXP_NAME = "DVC_EXP_NAME"
DVC_GLOBAL_CONFIG_DIR = "DVC_GLOBAL_CONFIG_DIR"
DVC_IGNORE_ISATTY = "DVC_IGNORE_ISATTY"
-DVC_NO_ANALYTICS = "DVC_NO_ANALYTICS"
DVC_PAGER = "DVC_PAGER"
DVC_ROOT = "DVC_ROOT"
DVC_SHOW_TRACEBACK = "DVC_SHOW_TRACEBACK"