Platform 23.11 #96
7 changed files with 0 additions and 1002 deletions
|
@ -2,57 +2,10 @@ let
|
||||||
tools = import ./lib/tools.nix;
|
tools = import ./lib/tools.nix;
|
||||||
pins = import ./sources;
|
pins = import ./sources;
|
||||||
|
|
||||||
dvcMd5ToSha256 = old: {
|
|
||||||
postPatch = (old.postPatch or "") + ''
|
|
||||||
grep -Rwl md5 | xargs sed -i s/md5/sha256/g
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
dvcYamlToJson = old: {
|
|
||||||
postPatch = (old.postPatch or "") + ''
|
|
||||||
grep -Rwl yaml | xargs sed -i s/yaml/json/g
|
|
||||||
grep -Rwl ruamel.json | xargs sed -i s/ruamel.json/ruamel.yaml/g
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
in with tools;
|
in with tools;
|
||||||
super: rec {
|
super: rec {
|
||||||
cachix = patch super.cachix "patches/base/cachix";
|
cachix = patch super.cachix "patches/base/cachix";
|
||||||
|
|
||||||
dvc = patch (super.dvc.overrideAttrs (old: let
|
|
||||||
filteredBaseDeps = super.lib.subtractLists [
|
|
||||||
super.python3Packages.dvc-data
|
|
||||||
super.python3Packages.dvc-http
|
|
||||||
] old.propagatedBuildInputs;
|
|
||||||
|
|
||||||
baseDeps = filteredBaseDeps ++ [
|
|
||||||
dvc-data
|
|
||||||
dvc-http
|
|
||||||
];
|
|
||||||
patched = dvcMd5ToSha256 old;
|
|
||||||
patched' = dvcYamlToJson patched;
|
|
||||||
in patched' // {
|
|
||||||
propagatedBuildInputs = with super.python3Packages; baseDeps ++ [
|
|
||||||
aiobotocore
|
|
||||||
boto3
|
|
||||||
(s3fs.overrideAttrs (_: { postPatch = ''
|
|
||||||
substituteInPlace requirements.txt \
|
|
||||||
--replace "fsspec==2023.3.0" "fsspec" \
|
|
||||||
--replace "aiobotocore~=2.1.0" "aiobotocore"
|
|
||||||
'';
|
|
||||||
}))
|
|
||||||
];
|
|
||||||
})) "patches/base/dvc";
|
|
||||||
|
|
||||||
dvc-data = (super.python3Packages.dvc-data.override {
|
|
||||||
inherit dvc-objects;
|
|
||||||
}).overrideAttrs dvcMd5ToSha256;
|
|
||||||
|
|
||||||
dvc-http = super.python3Packages.dvc-http.override {
|
|
||||||
inherit dvc-objects;
|
|
||||||
};
|
|
||||||
|
|
||||||
dvc-objects = super.python3Packages.dvc-objects.overrideAttrs dvcMd5ToSha256;
|
|
||||||
|
|
||||||
forgejo = patch super.forgejo "patches/base/forgejo";
|
forgejo = patch super.forgejo "patches/base/forgejo";
|
||||||
|
|
||||||
garage = patch super.garage_0_8 "patches/base/garage";
|
garage = patch super.garage_0_8 "patches/base/garage";
|
||||||
|
|
|
@ -56,7 +56,6 @@
|
||||||
in {
|
in {
|
||||||
tools = with flakePkgs; [
|
tools = with flakePkgs; [
|
||||||
agenix
|
agenix
|
||||||
dvc
|
|
||||||
graf
|
graf
|
||||||
hci
|
hci
|
||||||
npins
|
npins
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
{
|
{
|
||||||
packages = {
|
packages = {
|
||||||
cinny = [ "x86_64-linux" ];
|
cinny = [ "x86_64-linux" ];
|
||||||
dvc = [ "x86_64-linux" ];
|
|
||||||
hci = [ "x86_64-linux" ];
|
hci = [ "x86_64-linux" ];
|
||||||
hydra = [ "x86_64-linux" ];
|
hydra = [ "x86_64-linux" ];
|
||||||
jellyfin = [ "x86_64-linux" ];
|
jellyfin = [ "x86_64-linux" ];
|
||||||
|
|
|
@ -24,9 +24,6 @@
|
||||||
help = pkgs.hugo.meta.description;
|
help = pkgs.hugo.meta.description;
|
||||||
command = "exec ${pkgs.hugo}/bin/hugo ${hugoArgsStr} \"$@\"";
|
command = "exec ${pkgs.hugo}/bin/hugo ${hugoArgsStr} \"$@\"";
|
||||||
};
|
};
|
||||||
tools = with self'.packages; [
|
|
||||||
dvc
|
|
||||||
];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
packages.landing = with pkgs; let
|
packages.landing = with pkgs; let
|
||||||
|
|
|
@ -1,612 +0,0 @@
|
||||||
commit d7d093fcb91b0d21faf36dbf62924f23b45abb9b
|
|
||||||
Author: Max <max@privatevoid.net>
|
|
||||||
Date: Sat Dec 17 14:23:59 2022 +0100
|
|
||||||
|
|
||||||
md5 to sha256 for 2.17.0
|
|
||||||
|
|
||||||
diff --git a/src/dvc_data/build.py b/src/dvc_data/build.py
|
|
||||||
index 3656ca5..3837763 100644
|
|
||||||
--- a/src/dvc_data/build.py
|
|
||||||
+++ b/src/dvc_data/build.py
|
|
||||||
@@ -63,7 +63,7 @@ def _build_file(path, fs, name, odb=None, upload_odb=None, dry_run=False):
|
|
||||||
state = odb.state if odb else None
|
|
||||||
meta, hash_info = hash_file(path, fs, name, state=state)
|
|
||||||
if upload_odb and not dry_run:
|
|
||||||
- assert odb and name == "md5"
|
|
||||||
+ assert odb and name == "sha256"
|
|
||||||
return _upload_file(path, fs, odb, upload_odb)
|
|
||||||
|
|
||||||
oid = hash_info.value
|
|
||||||
@@ -195,9 +195,9 @@ def _get_staging(odb: "HashFileDB") -> "ReferenceHashFileDB":
|
|
||||||
def _build_external_tree_info(odb, tree, name):
|
|
||||||
# NOTE: used only for external outputs. Initial reasoning was to be
|
|
||||||
# able to validate .dir files right in the workspace (e.g. check s3
|
|
||||||
- # etag), but could be dropped for manual validation with regular md5,
|
|
||||||
+ # etag), but could be dropped for manual validation with regular sha256,
|
|
||||||
# that would be universal for all clouds.
|
|
||||||
- assert odb and name != "md5"
|
|
||||||
+ assert odb and name != "sha256"
|
|
||||||
|
|
||||||
oid = tree.hash_info.value
|
|
||||||
odb.add(tree.path, tree.fs, oid)
|
|
||||||
@@ -253,7 +253,7 @@ def build(
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
logger.debug("built tree '%s'", obj)
|
|
||||||
- if name != "md5":
|
|
||||||
+ if name != "sha256":
|
|
||||||
obj = _build_external_tree_info(odb, obj, name)
|
|
||||||
else:
|
|
||||||
meta, obj = _build_file(
|
|
||||||
diff --git a/src/dvc_data/cli.py b/src/dvc_data/cli.py
|
|
||||||
index 2348875..ece639a 100644
|
|
||||||
--- a/src/dvc_data/cli.py
|
|
||||||
+++ b/src/dvc_data/cli.py
|
|
||||||
@@ -29,8 +29,8 @@ from dvc_data.diff import ROOT
|
|
||||||
from dvc_data.diff import diff as _diff
|
|
||||||
from dvc_data.hashfile.db import HashFileDB
|
|
||||||
from dvc_data.hashfile.hash import algorithms_available
|
|
||||||
-from dvc_data.hashfile.hash import file_md5 as _file_md5
|
|
||||||
-from dvc_data.hashfile.hash import fobj_md5 as _fobj_md5
|
|
||||||
+from dvc_data.hashfile.hash import file_sha256 as _file_sha256
|
|
||||||
+from dvc_data.hashfile.hash import fobj_sha256 as _fobj_sha256
|
|
||||||
from dvc_data.hashfile.hash_info import HashInfo
|
|
||||||
from dvc_data.hashfile.obj import HashFile
|
|
||||||
from dvc_data.hashfile.state import State
|
|
||||||
@@ -93,7 +93,7 @@ app = Application(
|
|
||||||
@app.command(name="hash", help="Compute checksum of the file")
|
|
||||||
def hash_file(
|
|
||||||
file: Path = file_type,
|
|
||||||
- name: HashEnum = typer.Option("md5", "-n", "--name"),
|
|
||||||
+ name: HashEnum = typer.Option("sha256", "-n", "--name"),
|
|
||||||
progress: bool = typer.Option(False, "--progress", "-p"),
|
|
||||||
text: Optional[bool] = typer.Option(None, "--text/--binary", "-t/-b"),
|
|
||||||
):
|
|
||||||
@@ -108,9 +108,9 @@ def hash_file(
|
|
||||||
with callback:
|
|
||||||
if path == "-":
|
|
||||||
fobj = callback.wrap_attr(sys.stdin.buffer)
|
|
||||||
- hash_value = _fobj_md5(fobj, text=text, name=hash_name)
|
|
||||||
+ hash_value = _fobj_sha256(fobj, text=text, name=hash_name)
|
|
||||||
else:
|
|
||||||
- hash_value = _file_md5(
|
|
||||||
+ hash_value = _file_sha256(
|
|
||||||
path, name=hash_name, callback=callback, text=text
|
|
||||||
)
|
|
||||||
print(hash_name, hash_value, sep=": ")
|
|
||||||
@@ -262,7 +262,7 @@ def build(
|
|
||||||
fs = MemoryFileSystem()
|
|
||||||
fs.put_file(sys.stdin.buffer, fs_path)
|
|
||||||
|
|
||||||
- object_store, _, obj = _build(odb, fs_path, fs, name="md5")
|
|
||||||
+ object_store, _, obj = _build(odb, fs_path, fs, name="sha256")
|
|
||||||
if write:
|
|
||||||
_transfer(
|
|
||||||
object_store,
|
|
||||||
@@ -285,7 +285,7 @@ def ls(oid: str = typer.Argument(..., allow_dash=True)):
|
|
||||||
odb = get_odb()
|
|
||||||
oid = from_shortoid(odb, oid)
|
|
||||||
try:
|
|
||||||
- tree = Tree.load(odb, HashInfo("md5", oid))
|
|
||||||
+ tree = Tree.load(odb, HashInfo("sha256", oid))
|
|
||||||
except ObjectFormatError as exc:
|
|
||||||
typer.echo(exc, err=True)
|
|
||||||
raise typer.Exit(1) from exc
|
|
||||||
@@ -454,7 +454,7 @@ def apply_op(odb, obj, application):
|
|
||||||
)
|
|
||||||
|
|
||||||
fs = LocalFileSystem()
|
|
||||||
- _, meta, new_obj = _build(odb, path, fs, "md5")
|
|
||||||
+ _, meta, new_obj = _build(odb, path, fs, "sha256")
|
|
||||||
odb.add(path, fs, new_obj.hash_info.value, hardlink=False)
|
|
||||||
return obj.add(new, meta, new_obj.hash_info)
|
|
||||||
|
|
||||||
diff --git a/src/dvc_data/fs.py b/src/dvc_data/fs.py
|
|
||||||
index c972981..ac45ad3 100644
|
|
||||||
--- a/src/dvc_data/fs.py
|
|
||||||
+++ b/src/dvc_data/fs.py
|
|
||||||
@@ -47,7 +47,7 @@ class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
|
|
||||||
if info["type"] == "directory":
|
|
||||||
raise IsADirectoryError
|
|
||||||
|
|
||||||
- value = info.get("md5")
|
|
||||||
+ value = info.get("sha256")
|
|
||||||
if not value:
|
|
||||||
raise FileNotFoundError
|
|
||||||
|
|
||||||
@@ -142,7 +142,7 @@ class DataFileSystem(AbstractFileSystem): # pylint:disable=abstract-method
|
|
||||||
|
|
||||||
def checksum(self, path):
|
|
||||||
info = self.info(path)
|
|
||||||
- md5 = info.get("md5")
|
|
||||||
- if md5:
|
|
||||||
- return md5
|
|
||||||
+ sha256 = info.get("sha256")
|
|
||||||
+ if sha256:
|
|
||||||
+ return sha256
|
|
||||||
raise NotImplementedError
|
|
||||||
diff --git a/src/dvc_data/hashfile/hash.py b/src/dvc_data/hashfile/hash.py
|
|
||||||
index 9bef01d..03f731c 100644
|
|
||||||
--- a/src/dvc_data/hashfile/hash.py
|
|
||||||
+++ b/src/dvc_data/hashfile/hash.py
|
|
||||||
@@ -42,7 +42,7 @@ class HashStreamFile(io.IOBase):
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
fobj: BinaryIO,
|
|
||||||
- hash_name: str = "md5",
|
|
||||||
+ hash_name: str = "sha256",
|
|
||||||
text: Optional[bool] = None,
|
|
||||||
) -> None:
|
|
||||||
self.fobj = fobj
|
|
||||||
@@ -77,11 +77,11 @@ class HashStreamFile(io.IOBase):
|
|
||||||
return self.hasher.name
|
|
||||||
|
|
||||||
|
|
||||||
-def fobj_md5(
|
|
||||||
+def fobj_sha256(
|
|
||||||
fobj: BinaryIO,
|
|
||||||
chunk_size: int = 2**20,
|
|
||||||
text: Optional[bool] = None,
|
|
||||||
- name="md5",
|
|
||||||
+ name="sha256",
|
|
||||||
) -> str:
|
|
||||||
# ideally, we want the heuristics to be applied in a similar way,
|
|
||||||
# regardless of the size of the first chunk,
|
|
||||||
@@ -95,17 +95,17 @@ def fobj_md5(
|
|
||||||
return stream.hash_value
|
|
||||||
|
|
||||||
|
|
||||||
-def file_md5(
|
|
||||||
+def file_sha256(
|
|
||||||
fname: "AnyFSPath",
|
|
||||||
fs: "FileSystem" = localfs,
|
|
||||||
callback: "Callback" = DEFAULT_CALLBACK,
|
|
||||||
text: Optional[bool] = None,
|
|
||||||
- name: str = "md5",
|
|
||||||
+ name: str = "sha256",
|
|
||||||
) -> str:
|
|
||||||
size = fs.size(fname) or 0
|
|
||||||
callback.set_size(size)
|
|
||||||
with fs.open(fname, "rb") as fobj:
|
|
||||||
- return fobj_md5(callback.wrap_attr(fobj), text=text, name=name)
|
|
||||||
+ return fobj_sha256(callback.wrap_attr(fobj), text=text, name=name)
|
|
||||||
|
|
||||||
|
|
||||||
def _adapt_info(info: Dict[str, Any], scheme: str) -> Dict[str, Any]:
|
|
||||||
@@ -139,8 +139,8 @@ def _hash_file(
|
|
||||||
func = getattr(fs, name)
|
|
||||||
return str(func(path)), info
|
|
||||||
|
|
||||||
- if name == "md5":
|
|
||||||
- return file_md5(path, fs, callback=callback), info
|
|
||||||
+ if name == "sha256":
|
|
||||||
+ return file_sha256(path, fs, callback=callback), info
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
@@ -162,7 +162,7 @@ class LargeFileHashingCallback(TqdmCallback):
|
|
||||||
if self.size and self.size > self.LARGE_FILE_SIZE:
|
|
||||||
if not self._logged:
|
|
||||||
logger.info(
|
|
||||||
- f"Computing md5 for a large file '{self.fname}'. "
|
|
||||||
+ f"Computing sha256 for a large file '{self.fname}'. "
|
|
||||||
"This is only done once."
|
|
||||||
)
|
|
||||||
self._logged = True
|
|
||||||
diff --git a/src/dvc_data/hashfile/utils.py b/src/dvc_data/hashfile/utils.py
|
|
||||||
index ea2da9c..b1e7726 100644
|
|
||||||
--- a/src/dvc_data/hashfile/utils.py
|
|
||||||
+++ b/src/dvc_data/hashfile/utils.py
|
|
||||||
@@ -38,7 +38,7 @@ def get_mtime_and_size(
|
|
||||||
|
|
||||||
# We track file changes and moves, which cannot be detected with simply
|
|
||||||
# max(mtime(f) for f in non_ignored_files)
|
|
||||||
- hasher = hashlib.md5()
|
|
||||||
+ hasher = hashlib.sha256()
|
|
||||||
hasher.update(json.dumps(files_mtimes, sort_keys=True).encode("utf-8"))
|
|
||||||
mtime = hasher.hexdigest()
|
|
||||||
return mtime, size
|
|
||||||
diff --git a/src/dvc_data/objects/tree.py b/src/dvc_data/objects/tree.py
|
|
||||||
index 4f11fa4..7c8b417 100644
|
|
||||||
--- a/src/dvc_data/objects/tree.py
|
|
||||||
+++ b/src/dvc_data/objects/tree.py
|
|
||||||
@@ -81,7 +81,7 @@ class Tree(HashFile):
|
|
||||||
memfs.pipe_file(path, self.as_bytes())
|
|
||||||
self.fs = memfs
|
|
||||||
self.path = path
|
|
||||||
- _, self.hash_info = hash_file(path, memfs, "md5")
|
|
||||||
+ _, self.hash_info = hash_file(path, memfs, "sha256")
|
|
||||||
assert self.hash_info.value
|
|
||||||
self.hash_info.value += ".dir"
|
|
||||||
self.oid = self.hash_info.value
|
|
||||||
diff --git a/tests/hashfile/test_hash.py b/tests/hashfile/test_hash.py
|
|
||||||
index ca920d8..59bf765 100644
|
|
||||||
--- a/tests/hashfile/test_hash.py
|
|
||||||
+++ b/tests/hashfile/test_hash.py
|
|
||||||
@@ -2,21 +2,21 @@ from os import fspath
|
|
||||||
|
|
||||||
from dvc_objects.fs import LocalFileSystem
|
|
||||||
|
|
||||||
-from dvc_data.hashfile.hash import file_md5
|
|
||||||
+from dvc_data.hashfile.hash import file_sha256
|
|
||||||
|
|
||||||
|
|
||||||
-def test_file_md5(tmp_path):
|
|
||||||
+def test_file_sha256(tmp_path):
|
|
||||||
foo = tmp_path / "foo"
|
|
||||||
foo.write_text("foo content", encoding="utf8")
|
|
||||||
|
|
||||||
fs = LocalFileSystem()
|
|
||||||
- assert file_md5(fspath(foo), fs) == file_md5(fspath(foo), fs)
|
|
||||||
+ assert file_sha256(fspath(foo), fs) == file_sha256(fspath(foo), fs)
|
|
||||||
|
|
||||||
|
|
||||||
-def test_file_md5_crlf(tmp_path):
|
|
||||||
+def test_file_sha256_crlf(tmp_path):
|
|
||||||
fs = LocalFileSystem()
|
|
||||||
cr = tmp_path / "cr"
|
|
||||||
crlf = tmp_path / "crlf"
|
|
||||||
cr.write_bytes(b"a\nb\nc")
|
|
||||||
crlf.write_bytes(b"a\r\nb\r\nc")
|
|
||||||
- assert file_md5(fspath(cr), fs) == file_md5(fspath(crlf), fs)
|
|
||||||
+ assert file_sha256(fspath(cr), fs) == file_sha256(fspath(crlf), fs)
|
|
||||||
diff --git a/tests/hashfile/test_hash_stream.py b/tests/hashfile/test_hash_stream.py
|
|
||||||
index a003a29..e67b7c1 100644
|
|
||||||
--- a/tests/hashfile/test_hash_stream.py
|
|
||||||
+++ b/tests/hashfile/test_hash_stream.py
|
|
||||||
@@ -3,7 +3,7 @@ from os import fspath
|
|
||||||
import pytest
|
|
||||||
from dvc_objects.fs import LocalFileSystem
|
|
||||||
|
|
||||||
-from dvc_data.hashfile.hash import HashStreamFile, file_md5
|
|
||||||
+from dvc_data.hashfile.hash import HashStreamFile, file_sha256
|
|
||||||
from dvc_data.hashfile.istextfile import DEFAULT_CHUNK_SIZE, istextfile
|
|
||||||
|
|
||||||
|
|
||||||
@@ -23,7 +23,7 @@ def test_hashed_stream_reader(tmp_path):
|
|
||||||
assert stream_reader.read(1) == b"o"
|
|
||||||
assert stream_reader.tell() == 3
|
|
||||||
|
|
||||||
- hex_digest = file_md5(fspath(foo), LocalFileSystem())
|
|
||||||
+ hex_digest = file_sha256(fspath(foo), LocalFileSystem())
|
|
||||||
assert stream_reader.is_text
|
|
||||||
assert hex_digest == stream_reader.hash_value
|
|
||||||
|
|
||||||
@@ -46,7 +46,7 @@ def test_hashed_stream_reader_as_chunks(tmp_path):
|
|
||||||
|
|
||||||
assert stream_reader.tell() == actual_size == total_read
|
|
||||||
|
|
||||||
- hex_digest = file_md5(fspath(foo), LocalFileSystem())
|
|
||||||
+ hex_digest = file_sha256(fspath(foo), LocalFileSystem())
|
|
||||||
assert not stream_reader.is_text
|
|
||||||
assert hex_digest == stream_reader.hash_value
|
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ def test_hashed_stream_reader_compatibility(tmp_path, contents):
|
|
||||||
stream_reader.read(chunk_size)
|
|
||||||
|
|
||||||
local_fs = LocalFileSystem()
|
|
||||||
- hex_digest = file_md5(fspath(data), local_fs)
|
|
||||||
+ hex_digest = file_sha256(fspath(data), local_fs)
|
|
||||||
|
|
||||||
assert stream_reader.is_text is istextfile(fspath(data), local_fs)
|
|
||||||
assert stream_reader.hash_value == hex_digest
|
|
||||||
diff --git a/tests/hashfile/test_obj.py b/tests/hashfile/test_obj.py
|
|
||||||
index 01e9fc2..6c47b3c 100644
|
|
||||||
--- a/tests/hashfile/test_obj.py
|
|
||||||
+++ b/tests/hashfile/test_obj.py
|
|
||||||
@@ -3,7 +3,7 @@ from dvc_data.hashfile.obj import HashFile
|
|
||||||
|
|
||||||
|
|
||||||
def test_obj(tmp_upath):
|
|
||||||
- hash_info = HashInfo("md5", "123456")
|
|
||||||
+ hash_info = HashInfo("sha256", "123456")
|
|
||||||
obj = HashFile(tmp_upath, tmp_upath.fs, hash_info)
|
|
||||||
assert obj.path == tmp_upath
|
|
||||||
assert obj.fs == tmp_upath.fs
|
|
||||||
diff --git a/tests/objects/test_tree.py b/tests/objects/test_tree.py
|
|
||||||
index 6c514ba..611a72f 100644
|
|
||||||
--- a/tests/objects/test_tree.py
|
|
||||||
+++ b/tests/objects/test_tree.py
|
|
||||||
@@ -13,57 +13,57 @@ from dvc_data.objects.tree import Tree, _merge
|
|
||||||
([], {}),
|
|
||||||
(
|
|
||||||
[
|
|
||||||
- {"md5": "def", "relpath": "zzz"},
|
|
||||||
- {"md5": "123", "relpath": "foo"},
|
|
||||||
- {"md5": "abc", "relpath": "aaa"},
|
|
||||||
- {"md5": "456", "relpath": "bar"},
|
|
||||||
+ {"sha256": "def", "relpath": "zzz"},
|
|
||||||
+ {"sha256": "123", "relpath": "foo"},
|
|
||||||
+ {"sha256": "abc", "relpath": "aaa"},
|
|
||||||
+ {"sha256": "456", "relpath": "bar"},
|
|
||||||
],
|
|
||||||
{
|
|
||||||
- ("zzz",): (None, HashInfo("md5", "def")),
|
|
||||||
- ("foo",): (None, HashInfo("md5", "123")),
|
|
||||||
- ("bar",): (None, HashInfo("md5", "456")),
|
|
||||||
- ("aaa",): (None, HashInfo("md5", "abc")),
|
|
||||||
+ ("zzz",): (None, HashInfo("sha256", "def")),
|
|
||||||
+ ("foo",): (None, HashInfo("sha256", "123")),
|
|
||||||
+ ("bar",): (None, HashInfo("sha256", "456")),
|
|
||||||
+ ("aaa",): (None, HashInfo("sha256", "abc")),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
[
|
|
||||||
- {"md5": "123", "relpath": "dir/b"},
|
|
||||||
- {"md5": "456", "relpath": "dir/z"},
|
|
||||||
- {"md5": "789", "relpath": "dir/a"},
|
|
||||||
- {"md5": "abc", "relpath": "b"},
|
|
||||||
- {"md5": "def", "relpath": "a"},
|
|
||||||
- {"md5": "ghi", "relpath": "z"},
|
|
||||||
- {"md5": "jkl", "relpath": "dir/subdir/b"},
|
|
||||||
- {"md5": "mno", "relpath": "dir/subdir/z"},
|
|
||||||
- {"md5": "pqr", "relpath": "dir/subdir/a"},
|
|
||||||
+ {"sha256": "123", "relpath": "dir/b"},
|
|
||||||
+ {"sha256": "456", "relpath": "dir/z"},
|
|
||||||
+ {"sha256": "789", "relpath": "dir/a"},
|
|
||||||
+ {"sha256": "abc", "relpath": "b"},
|
|
||||||
+ {"sha256": "def", "relpath": "a"},
|
|
||||||
+ {"sha256": "ghi", "relpath": "z"},
|
|
||||||
+ {"sha256": "jkl", "relpath": "dir/subdir/b"},
|
|
||||||
+ {"sha256": "mno", "relpath": "dir/subdir/z"},
|
|
||||||
+ {"sha256": "pqr", "relpath": "dir/subdir/a"},
|
|
||||||
],
|
|
||||||
{
|
|
||||||
("dir", "b"): (
|
|
||||||
None,
|
|
||||||
- HashInfo("md5", "123"),
|
|
||||||
+ HashInfo("sha256", "123"),
|
|
||||||
),
|
|
||||||
("dir", "z"): (
|
|
||||||
None,
|
|
||||||
- HashInfo("md5", "456"),
|
|
||||||
+ HashInfo("sha256", "456"),
|
|
||||||
),
|
|
||||||
("dir", "a"): (
|
|
||||||
None,
|
|
||||||
- HashInfo("md5", "789"),
|
|
||||||
+ HashInfo("sha256", "789"),
|
|
||||||
),
|
|
||||||
- ("b",): (None, HashInfo("md5", "abc")),
|
|
||||||
- ("a",): (None, HashInfo("md5", "def")),
|
|
||||||
- ("z",): (None, HashInfo("md5", "ghi")),
|
|
||||||
+ ("b",): (None, HashInfo("sha256", "abc")),
|
|
||||||
+ ("a",): (None, HashInfo("sha256", "def")),
|
|
||||||
+ ("z",): (None, HashInfo("sha256", "ghi")),
|
|
||||||
("dir", "subdir", "b"): (
|
|
||||||
None,
|
|
||||||
- HashInfo("md5", "jkl"),
|
|
||||||
+ HashInfo("sha256", "jkl"),
|
|
||||||
),
|
|
||||||
("dir", "subdir", "z"): (
|
|
||||||
None,
|
|
||||||
- HashInfo("md5", "mno"),
|
|
||||||
+ HashInfo("sha256", "mno"),
|
|
||||||
),
|
|
||||||
("dir", "subdir", "a"): (
|
|
||||||
None,
|
|
||||||
- HashInfo("md5", "pqr"),
|
|
||||||
+ HashInfo("sha256", "pqr"),
|
|
||||||
),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
@@ -81,19 +81,19 @@ def test_list(lst, trie_dict):
|
|
||||||
({}, 0),
|
|
||||||
(
|
|
||||||
{
|
|
||||||
- ("a",): (Meta(size=1), HashInfo("md5", "abc")),
|
|
||||||
- ("b",): (Meta(size=2), HashInfo("md5", "def")),
|
|
||||||
- ("c",): (Meta(size=3), HashInfo("md5", "ghi")),
|
|
||||||
- ("dir", "foo"): (Meta(size=4), HashInfo("md5", "jkl")),
|
|
||||||
- ("dir", "bar"): (Meta(size=5), HashInfo("md5", "mno")),
|
|
||||||
- ("dir", "baz"): (Meta(size=6), HashInfo("md5", "pqr")),
|
|
||||||
+ ("a",): (Meta(size=1), HashInfo("sha256", "abc")),
|
|
||||||
+ ("b",): (Meta(size=2), HashInfo("sha256", "def")),
|
|
||||||
+ ("c",): (Meta(size=3), HashInfo("sha256", "ghi")),
|
|
||||||
+ ("dir", "foo"): (Meta(size=4), HashInfo("sha256", "jkl")),
|
|
||||||
+ ("dir", "bar"): (Meta(size=5), HashInfo("sha256", "mno")),
|
|
||||||
+ ("dir", "baz"): (Meta(size=6), HashInfo("sha256", "pqr")),
|
|
||||||
},
|
|
||||||
6,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{
|
|
||||||
- ("a",): (Meta(size=1), HashInfo("md5", "abc")),
|
|
||||||
- ("b",): (Meta(), HashInfo("md5", "def")),
|
|
||||||
+ ("a",): (Meta(size=1), HashInfo("sha256", "abc")),
|
|
||||||
+ ("b",): (Meta(), HashInfo("sha256", "def")),
|
|
||||||
},
|
|
||||||
2,
|
|
||||||
),
|
|
||||||
@@ -110,15 +110,15 @@ def test_nfiles(trie_dict, nfiles):
|
|
||||||
[
|
|
||||||
{},
|
|
||||||
{
|
|
||||||
- ("a",): (None, HashInfo("md5", "abc")),
|
|
||||||
- ("b",): (None, HashInfo("md5", "def")),
|
|
||||||
- ("c",): (None, HashInfo("md5", "ghi")),
|
|
||||||
- ("dir", "foo"): (None, HashInfo("md5", "jkl")),
|
|
||||||
- ("dir", "bar"): (None, HashInfo("md5", "mno")),
|
|
||||||
- ("dir", "baz"): (None, HashInfo("md5", "pqr")),
|
|
||||||
- ("dir", "subdir", "1"): (None, HashInfo("md5", "stu")),
|
|
||||||
- ("dir", "subdir", "2"): (None, HashInfo("md5", "vwx")),
|
|
||||||
- ("dir", "subdir", "3"): (None, HashInfo("md5", "yz")),
|
|
||||||
+ ("a",): (None, HashInfo("sha256", "abc")),
|
|
||||||
+ ("b",): (None, HashInfo("sha256", "def")),
|
|
||||||
+ ("c",): (None, HashInfo("sha256", "ghi")),
|
|
||||||
+ ("dir", "foo"): (None, HashInfo("sha256", "jkl")),
|
|
||||||
+ ("dir", "bar"): (None, HashInfo("sha256", "mno")),
|
|
||||||
+ ("dir", "baz"): (None, HashInfo("sha256", "pqr")),
|
|
||||||
+ ("dir", "subdir", "1"): (None, HashInfo("sha256", "stu")),
|
|
||||||
+ ("dir", "subdir", "2"): (None, HashInfo("sha256", "vwx")),
|
|
||||||
+ ("dir", "subdir", "3"): (None, HashInfo("sha256", "yz")),
|
|
||||||
},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
@@ -135,63 +135,63 @@ def test_items(trie_dict):
|
|
||||||
[
|
|
||||||
({}, {}, {}, {}),
|
|
||||||
(
|
|
||||||
- {("foo",): HashInfo("md5", "123")},
|
|
||||||
+ {("foo",): HashInfo("sha256", "123")},
|
|
||||||
{
|
|
||||||
- ("foo",): HashInfo("md5", "123"),
|
|
||||||
- ("bar",): HashInfo("md5", "345"),
|
|
||||||
+ ("foo",): HashInfo("sha256", "123"),
|
|
||||||
+ ("bar",): HashInfo("sha256", "345"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
- ("foo",): HashInfo("md5", "123"),
|
|
||||||
- ("baz",): HashInfo("md5", "678"),
|
|
||||||
+ ("foo",): HashInfo("sha256", "123"),
|
|
||||||
+ ("baz",): HashInfo("sha256", "678"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
- ("foo",): HashInfo("md5", "123"),
|
|
||||||
- ("bar",): HashInfo("md5", "345"),
|
|
||||||
- ("baz",): HashInfo("md5", "678"),
|
|
||||||
+ ("foo",): HashInfo("sha256", "123"),
|
|
||||||
+ ("bar",): HashInfo("sha256", "345"),
|
|
||||||
+ ("baz",): HashInfo("sha256", "678"),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{
|
|
||||||
- ("common",): HashInfo("md5", "123"),
|
|
||||||
- ("subdir", "foo"): HashInfo("md5", "345"),
|
|
||||||
+ ("common",): HashInfo("sha256", "123"),
|
|
||||||
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
- ("common",): HashInfo("md5", "123"),
|
|
||||||
- ("subdir", "foo"): HashInfo("md5", "345"),
|
|
||||||
- ("subdir", "bar"): HashInfo("md5", "678"),
|
|
||||||
+ ("common",): HashInfo("sha256", "123"),
|
|
||||||
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
|
||||||
+ ("subdir", "bar"): HashInfo("sha256", "678"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
- ("common",): HashInfo("md5", "123"),
|
|
||||||
- ("subdir", "foo"): HashInfo("md5", "345"),
|
|
||||||
- ("subdir", "baz"): HashInfo("md5", "91011"),
|
|
||||||
+ ("common",): HashInfo("sha256", "123"),
|
|
||||||
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
|
||||||
+ ("subdir", "baz"): HashInfo("sha256", "91011"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
- ("common",): HashInfo("md5", "123"),
|
|
||||||
- ("subdir", "foo"): HashInfo("md5", "345"),
|
|
||||||
- ("subdir", "bar"): HashInfo("md5", "678"),
|
|
||||||
- ("subdir", "baz"): HashInfo("md5", "91011"),
|
|
||||||
+ ("common",): HashInfo("sha256", "123"),
|
|
||||||
+ ("subdir", "foo"): HashInfo("sha256", "345"),
|
|
||||||
+ ("subdir", "bar"): HashInfo("sha256", "678"),
|
|
||||||
+ ("subdir", "baz"): HashInfo("sha256", "91011"),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{},
|
|
||||||
- {("foo",): HashInfo("md5", "123")},
|
|
||||||
- {("bar",): HashInfo("md5", "456")},
|
|
||||||
+ {("foo",): HashInfo("sha256", "123")},
|
|
||||||
+ {("bar",): HashInfo("sha256", "456")},
|
|
||||||
{
|
|
||||||
- ("foo",): HashInfo("md5", "123"),
|
|
||||||
- ("bar",): HashInfo("md5", "456"),
|
|
||||||
+ ("foo",): HashInfo("sha256", "123"),
|
|
||||||
+ ("bar",): HashInfo("sha256", "456"),
|
|
||||||
},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{},
|
|
||||||
{},
|
|
||||||
- {("bar",): HashInfo("md5", "123")},
|
|
||||||
- {("bar",): HashInfo("md5", "123")},
|
|
||||||
+ {("bar",): HashInfo("sha256", "123")},
|
|
||||||
+ {("bar",): HashInfo("sha256", "123")},
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{},
|
|
||||||
- {("bar",): HashInfo("md5", "123")},
|
|
||||||
+ {("bar",): HashInfo("sha256", "123")},
|
|
||||||
{},
|
|
||||||
- {("bar",): HashInfo("md5", "123")},
|
|
||||||
+ {("bar",): HashInfo("sha256", "123")},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
diff --git a/tests/test_index.py b/tests/test_index.py
|
|
||||||
index c6404fa..635bf66 100644
|
|
||||||
--- a/tests/test_index.py
|
|
||||||
+++ b/tests/test_index.py
|
|
||||||
@@ -17,8 +17,8 @@ def odb(tmp_upath_factory, as_filesystem):
|
|
||||||
|
|
||||||
data = tmp_upath_factory.mktemp() / "data.dir"
|
|
||||||
data.write_bytes(
|
|
||||||
- b'[{"md5": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
|
|
||||||
- b'{"md5": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
|
|
||||||
+ b'[{"sha256": "c157a79031e1c40f85931829bc5fc552", "relpath": "bar"}, '
|
|
||||||
+ b'{"sha256": "258622b1688250cb619f3c9ccaefb7eb", "relpath": "baz"}]'
|
|
||||||
)
|
|
||||||
|
|
||||||
bar = tmp_upath_factory.mktemp() / "bar"
|
|
||||||
@@ -46,13 +46,13 @@ def test_fs(tmp_upath, odb, as_filesystem):
|
|
||||||
("foo",): DataIndexEntry(
|
|
||||||
odb=odb,
|
|
||||||
hash_info=HashInfo(
|
|
||||||
- name="md5", value="d3b07384d113edec49eaa6238ad5ff00"
|
|
||||||
+ name="sha256", value="d3b07384d113edec49eaa6238ad5ff00"
|
|
||||||
),
|
|
||||||
),
|
|
||||||
("data",): DataIndexEntry(
|
|
||||||
odb=odb,
|
|
||||||
hash_info=HashInfo(
|
|
||||||
- name="md5",
|
|
||||||
+ name="sha256",
|
|
||||||
value="1f69c66028c35037e8bf67e5bc4ceb6a.dir",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
@@ -80,22 +80,22 @@ def test_build(tmp_upath, odb, as_filesystem):
|
|
||||||
},
|
|
||||||
)
|
|
||||||
build(index, tmp_upath, as_filesystem(tmp_upath.fs))
|
|
||||||
- assert index[("foo",)].hash_info.name == "md5"
|
|
||||||
+ assert index[("foo",)].hash_info.name == "sha256"
|
|
||||||
assert (
|
|
||||||
index[("foo",)].hash_info.value == "d3b07384d113edec49eaa6238ad5ff00"
|
|
||||||
)
|
|
||||||
assert index[("foo",)].odb == odb
|
|
||||||
- assert index[("data",)].hash_info.name == "md5"
|
|
||||||
+ assert index[("data",)].hash_info.name == "sha256"
|
|
||||||
assert (
|
|
||||||
index[("data",)].hash_info.value
|
|
||||||
== "1f69c66028c35037e8bf67e5bc4ceb6a.dir"
|
|
||||||
)
|
|
||||||
- assert index[("data", "bar")].hash_info.name == "md5"
|
|
||||||
+ assert index[("data", "bar")].hash_info.name == "sha256"
|
|
||||||
assert (
|
|
||||||
index[("data", "bar")].hash_info.value
|
|
||||||
== "c157a79031e1c40f85931829bc5fc552"
|
|
||||||
)
|
|
||||||
- assert index[("data", "baz")].hash_info.name == "md5"
|
|
||||||
+ assert index[("data", "baz")].hash_info.name == "sha256"
|
|
||||||
assert (
|
|
||||||
index[("data", "baz")].hash_info.value
|
|
||||||
== "258622b1688250cb619f3c9ccaefb7eb"
|
|
||||||
@@ -108,13 +108,13 @@ def test_checkout(tmp_upath, odb, as_filesystem):
|
|
||||||
("foo",): DataIndexEntry(
|
|
||||||
odb=odb,
|
|
||||||
hash_info=HashInfo(
|
|
||||||
- name="md5", value="d3b07384d113edec49eaa6238ad5ff00"
|
|
||||||
+ name="sha256", value="d3b07384d113edec49eaa6238ad5ff00"
|
|
||||||
),
|
|
||||||
),
|
|
||||||
("data",): DataIndexEntry(
|
|
||||||
odb=odb,
|
|
||||||
hash_info=HashInfo(
|
|
||||||
- name="md5",
|
|
||||||
+ name="sha256",
|
|
||||||
value="1f69c66028c35037e8bf67e5bc4ceb6a.dir",
|
|
||||||
),
|
|
||||||
),
|
|
|
@ -1,71 +0,0 @@
|
||||||
commit 2065fc148ce77be68c95a81a05391e1bb35da79d
|
|
||||||
Author: Max <max@privatevoid.net>
|
|
||||||
Date: Sat Dec 17 14:35:20 2022 +0100
|
|
||||||
|
|
||||||
md5 to sha256 for 2.17.0
|
|
||||||
|
|
||||||
diff --git a/src/dvc_objects/db.py b/src/dvc_objects/db.py
|
|
||||||
index 0f0ab16..3b87fdb 100644
|
|
||||||
--- a/src/dvc_objects/db.py
|
|
||||||
+++ b/src/dvc_objects/db.py
|
|
||||||
@@ -229,7 +229,7 @@ class ObjectDB:
|
|
||||||
returned.
|
|
||||||
|
|
||||||
NOTE: For large remotes the list of oids will be very
|
|
||||||
- big(e.g. 100M entries, md5 for each is 32 bytes, so ~3200Mb list)
|
|
||||||
+ big(e.g. 100M entries, sha256 for each is 32 bytes, so ~3200Mb list)
|
|
||||||
and we don't really need all of it at the same time, so it makes
|
|
||||||
sense to use a generator to gradually iterate over it, without
|
|
||||||
keeping all of it in memory.
|
|
||||||
diff --git a/src/dvc_objects/fs/__init__.py b/src/dvc_objects/fs/__init__.py
|
|
||||||
index d236fdc..74db3fe 100644
|
|
||||||
--- a/src/dvc_objects/fs/__init__.py
|
|
||||||
+++ b/src/dvc_objects/fs/__init__.py
|
|
||||||
@@ -62,7 +62,7 @@ def get_fs_cls(remote_conf, cls=None, scheme=None):
|
|
||||||
|
|
||||||
def as_filesystem(
|
|
||||||
fs: "AbstractFileSystem",
|
|
||||||
- checksum: str = "md5",
|
|
||||||
+ checksum: str = "sha256",
|
|
||||||
object_based: bool = False,
|
|
||||||
**fs_args,
|
|
||||||
) -> "FileSystem":
|
|
||||||
diff --git a/src/dvc_objects/fs/implementations/local.py b/src/dvc_objects/fs/implementations/local.py
|
|
||||||
index 7f888ec..3e1a61a 100644
|
|
||||||
--- a/src/dvc_objects/fs/implementations/local.py
|
|
||||||
+++ b/src/dvc_objects/fs/implementations/local.py
|
|
||||||
@@ -167,7 +167,7 @@ class LocalFileSystem(FileSystem):
|
|
||||||
sep = os.sep
|
|
||||||
|
|
||||||
protocol = "local"
|
|
||||||
- PARAM_CHECKSUM = "md5"
|
|
||||||
+ PARAM_CHECKSUM = "sha256"
|
|
||||||
PARAM_PATH = "path"
|
|
||||||
TRAVERSE_PREFIX_LEN = 2
|
|
||||||
|
|
||||||
diff --git a/src/dvc_objects/fs/implementations/memory.py b/src/dvc_objects/fs/implementations/memory.py
|
|
||||||
index 97702cb..c5b5ad7 100644
|
|
||||||
--- a/src/dvc_objects/fs/implementations/memory.py
|
|
||||||
+++ b/src/dvc_objects/fs/implementations/memory.py
|
|
||||||
@@ -3,7 +3,7 @@ from ..base import FileSystem
|
|
||||||
|
|
||||||
class MemoryFileSystem(FileSystem): # pylint:disable=abstract-method
|
|
||||||
protocol = "memory"
|
|
||||||
- PARAM_CHECKSUM = "md5"
|
|
||||||
+ PARAM_CHECKSUM = "sha256"
|
|
||||||
|
|
||||||
def __init__(self, global_store=True, trie_based=False, fs=None, **kwargs):
|
|
||||||
super().__init__(fs=fs, **kwargs)
|
|
||||||
diff --git a/src/dvc_objects/fs/implementations/ssh.py b/src/dvc_objects/fs/implementations/ssh.py
|
|
||||||
index 8b93faf..8aed5e4 100644
|
|
||||||
--- a/src/dvc_objects/fs/implementations/ssh.py
|
|
||||||
+++ b/src/dvc_objects/fs/implementations/ssh.py
|
|
||||||
@@ -24,7 +24,7 @@ def ask_password(host, user, port):
|
|
||||||
class SSHFileSystem(FileSystem):
|
|
||||||
protocol = "ssh"
|
|
||||||
REQUIRES = {"sshfs": "sshfs"}
|
|
||||||
- PARAM_CHECKSUM = "md5"
|
|
||||||
+ PARAM_CHECKSUM = "sha256"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _strip_protocol(cls, path: str) -> str:
|
|
|
@ -1,267 +0,0 @@
|
||||||
diff --git a/dvc/analytics.py b/dvc/analytics.py
|
|
||||||
deleted file mode 100644
|
|
||||||
index 6e3dc91..0000000
|
|
||||||
--- a/dvc/analytics.py
|
|
||||||
+++ /dev/null
|
|
||||||
@@ -1,156 +0,0 @@
|
|
||||||
-import json
|
|
||||||
-import logging
|
|
||||||
-import os
|
|
||||||
-
|
|
||||||
-from .env import DVC_NO_ANALYTICS
|
|
||||||
-
|
|
||||||
-logger = logging.getLogger(__name__)
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-def collect_and_send_report(args=None, return_code=None):
|
|
||||||
- """
|
|
||||||
- Collect information from the runtime/environment and the command
|
|
||||||
- being executed into a report and send it over the network.
|
|
||||||
-
|
|
||||||
- To prevent analytics from blocking the execution of the main thread,
|
|
||||||
- sending the report is done in a separate process.
|
|
||||||
-
|
|
||||||
- The inter-process communication happens through a file containing the
|
|
||||||
- report as a JSON, where the _collector_ generates it and the _sender_
|
|
||||||
- removes it after sending it.
|
|
||||||
- """
|
|
||||||
- import tempfile
|
|
||||||
-
|
|
||||||
- from dvc.daemon import daemon
|
|
||||||
-
|
|
||||||
- report = {}
|
|
||||||
-
|
|
||||||
- # Include command execution information on the report only when available.
|
|
||||||
- if args and hasattr(args, "func"):
|
|
||||||
- report.update({"cmd_class": args.func.__name__})
|
|
||||||
-
|
|
||||||
- if return_code is not None:
|
|
||||||
- report.update({"cmd_return_code": return_code})
|
|
||||||
-
|
|
||||||
- with tempfile.NamedTemporaryFile(delete=False, mode="w") as fobj:
|
|
||||||
- json.dump(report, fobj)
|
|
||||||
- daemon(["analytics", fobj.name])
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-def is_enabled():
|
|
||||||
- from dvc.config import Config, to_bool
|
|
||||||
- from dvc.utils import env2bool
|
|
||||||
-
|
|
||||||
- if env2bool("DVC_TEST"):
|
|
||||||
- return False
|
|
||||||
-
|
|
||||||
- enabled = not os.getenv(DVC_NO_ANALYTICS)
|
|
||||||
- if enabled:
|
|
||||||
- enabled = to_bool(
|
|
||||||
- Config.from_cwd(validate=False).get("core", {}).get("analytics", "true")
|
|
||||||
- )
|
|
||||||
-
|
|
||||||
- logger.debug("Analytics is %sabled.", "en" if enabled else "dis")
|
|
||||||
-
|
|
||||||
- return enabled
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-def send(path):
|
|
||||||
- """
|
|
||||||
- Side effect: Removes the report after sending it.
|
|
||||||
-
|
|
||||||
- The report is generated and stored in a temporary file, see:
|
|
||||||
- `collect_and_send_report`. Sending happens on another process,
|
|
||||||
- thus, the need of removing such file afterwards.
|
|
||||||
- """
|
|
||||||
- import requests
|
|
||||||
-
|
|
||||||
- url = "https://analytics.dvc.org"
|
|
||||||
- headers = {"content-type": "application/json"}
|
|
||||||
-
|
|
||||||
- with open(path, encoding="utf-8") as fobj:
|
|
||||||
- report = json.load(fobj)
|
|
||||||
-
|
|
||||||
- report.update(_runtime_info())
|
|
||||||
-
|
|
||||||
- try:
|
|
||||||
- requests.post(url, json=report, headers=headers, timeout=5)
|
|
||||||
- except requests.exceptions.RequestException:
|
|
||||||
- logger.debug("failed to send analytics report", exc_info=True)
|
|
||||||
-
|
|
||||||
- os.remove(path)
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-def _scm_in_use():
|
|
||||||
- from dvc.exceptions import NotDvcRepoError
|
|
||||||
- from dvc.repo import Repo
|
|
||||||
- from dvc.scm import NoSCM
|
|
||||||
-
|
|
||||||
- from .scm import SCM, SCMError
|
|
||||||
-
|
|
||||||
- try:
|
|
||||||
- scm = SCM(root_dir=Repo.find_root())
|
|
||||||
- return type(scm).__name__
|
|
||||||
- except SCMError:
|
|
||||||
- return NoSCM.__name__
|
|
||||||
- except NotDvcRepoError:
|
|
||||||
- pass
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-def _runtime_info():
|
|
||||||
- """
|
|
||||||
- Gather information from the environment where DVC runs to fill a report.
|
|
||||||
- """
|
|
||||||
- from iterative_telemetry import _generate_ci_id, find_or_create_user_id
|
|
||||||
-
|
|
||||||
- from dvc import __version__
|
|
||||||
- from dvc.utils import is_binary
|
|
||||||
-
|
|
||||||
- ci_id = _generate_ci_id()
|
|
||||||
- if ci_id:
|
|
||||||
- group_id, user_id = ci_id
|
|
||||||
- else:
|
|
||||||
- group_id, user_id = None, find_or_create_user_id()
|
|
||||||
-
|
|
||||||
- return {
|
|
||||||
- "dvc_version": __version__,
|
|
||||||
- "is_binary": is_binary(),
|
|
||||||
- "scm_class": _scm_in_use(),
|
|
||||||
- "system_info": _system_info(),
|
|
||||||
- "user_id": user_id,
|
|
||||||
- "group_id": group_id,
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-def _system_info():
|
|
||||||
- import platform
|
|
||||||
- import sys
|
|
||||||
-
|
|
||||||
- import distro
|
|
||||||
-
|
|
||||||
- system = platform.system()
|
|
||||||
-
|
|
||||||
- if system == "Windows":
|
|
||||||
- version = sys.getwindowsversion() # type: ignore[attr-defined]
|
|
||||||
-
|
|
||||||
- return {
|
|
||||||
- "os": "windows",
|
|
||||||
- "windows_version_build": version.build,
|
|
||||||
- "windows_version_major": version.major,
|
|
||||||
- "windows_version_minor": version.minor,
|
|
||||||
- "windows_version_service_pack": version.service_pack,
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- if system == "Darwin":
|
|
||||||
- return {"os": "mac", "mac_version": platform.mac_ver()[0]}
|
|
||||||
-
|
|
||||||
- if system == "Linux":
|
|
||||||
- return {
|
|
||||||
- "os": "linux",
|
|
||||||
- "linux_distro": distro.id(),
|
|
||||||
- "linux_distro_like": distro.like(),
|
|
||||||
- "linux_distro_version": distro.version(),
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- # We don't collect data for any other system.
|
|
||||||
- raise NotImplementedError
|
|
||||||
diff --git a/dvc/cli/__init__.py b/dvc/cli/__init__.py
|
|
||||||
index 274b564..b601d84 100644
|
|
||||||
--- a/dvc/cli/__init__.py
|
|
||||||
+++ b/dvc/cli/__init__.py
|
|
||||||
@@ -236,11 +236,6 @@ def main(argv=None): # noqa: C901, PLR0912, PLR0915
|
|
||||||
ret = _log_exceptions(exc) or 255
|
|
||||||
|
|
||||||
try:
|
|
||||||
- from dvc import analytics
|
|
||||||
-
|
|
||||||
- if analytics.is_enabled():
|
|
||||||
- analytics.collect_and_send_report(args, ret)
|
|
||||||
-
|
|
||||||
return ret
|
|
||||||
finally:
|
|
||||||
logger.setLevel(outer_log_level)
|
|
||||||
diff --git a/dvc/commands/daemon.py b/dvc/commands/daemon.py
|
|
||||||
index 35d6e90..d5a7b6e 100644
|
|
||||||
--- a/dvc/commands/daemon.py
|
|
||||||
+++ b/dvc/commands/daemon.py
|
|
||||||
@@ -26,15 +26,6 @@ class CmdDaemonUpdater(CmdDaemonBase):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
-class CmdDaemonAnalytics(CmdDaemonBase):
|
|
||||||
- def run(self):
|
|
||||||
- from dvc import analytics
|
|
||||||
-
|
|
||||||
- analytics.send(self.args.target)
|
|
||||||
-
|
|
||||||
- return 0
|
|
||||||
-
|
|
||||||
-
|
|
||||||
def add_parser(subparsers, parent_parser):
|
|
||||||
DAEMON_HELP = "Service daemon."
|
|
||||||
daemon_parser = subparsers.add_parser(
|
|
||||||
@@ -59,15 +50,3 @@ def add_parser(subparsers, parent_parser):
|
|
||||||
help=DAEMON_UPDATER_HELP,
|
|
||||||
)
|
|
||||||
daemon_updater_parser.set_defaults(func=CmdDaemonUpdater)
|
|
||||||
-
|
|
||||||
- DAEMON_ANALYTICS_HELP = "Send dvc usage analytics."
|
|
||||||
- daemon_analytics_parser = daemon_subparsers.add_parser(
|
|
||||||
- "analytics",
|
|
||||||
- parents=[parent_parser],
|
|
||||||
- description=DAEMON_ANALYTICS_HELP,
|
|
||||||
- help=DAEMON_ANALYTICS_HELP,
|
|
||||||
- )
|
|
||||||
- daemon_analytics_parser.add_argument(
|
|
||||||
- "target", help="Analytics file."
|
|
||||||
- ).complete = completion.FILE
|
|
||||||
- daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics)
|
|
||||||
diff --git a/dvc/commands/init.py b/dvc/commands/init.py
|
|
||||||
index ca44919..05730aa 100644
|
|
||||||
--- a/dvc/commands/init.py
|
|
||||||
+++ b/dvc/commands/init.py
|
|
||||||
@@ -3,7 +3,6 @@ import logging
|
|
||||||
|
|
||||||
import colorama
|
|
||||||
|
|
||||||
-from dvc import analytics
|
|
||||||
from dvc.cli.command import CmdBaseNoRepo
|
|
||||||
from dvc.cli.utils import append_doc_link
|
|
||||||
from dvc.utils import boxify
|
|
||||||
@@ -15,16 +14,6 @@ logger = logging.getLogger(__name__)
|
|
||||||
def _welcome_message():
|
|
||||||
from dvc.ui import ui
|
|
||||||
|
|
||||||
- if analytics.is_enabled():
|
|
||||||
- ui.write(
|
|
||||||
- boxify(
|
|
||||||
- "DVC has enabled anonymous aggregate usage analytics.\n"
|
|
||||||
- "Read the analytics documentation (and how to opt-out) here:\n"
|
|
||||||
- + fmt_link("https://dvc.org/doc/user-guide/analytics"),
|
|
||||||
- border_color="red",
|
|
||||||
- )
|
|
||||||
- )
|
|
||||||
-
|
|
||||||
msg = (
|
|
||||||
"{yellow}What's next?{nc}\n"
|
|
||||||
"{yellow}------------{nc}\n"
|
|
||||||
diff --git a/dvc/config_schema.py b/dvc/config_schema.py
|
|
||||||
index 2e36e90..3d9e402 100644
|
|
||||||
--- a/dvc/config_schema.py
|
|
||||||
+++ b/dvc/config_schema.py
|
|
||||||
@@ -144,7 +144,6 @@ SCHEMA = {
|
|
||||||
"remote": Lower,
|
|
||||||
"checksum_jobs": All(Coerce(int), Range(1)),
|
|
||||||
Optional("interactive", default=False): Bool,
|
|
||||||
- Optional("analytics", default=True): Bool,
|
|
||||||
Optional("hardlink_lock", default=False): Bool,
|
|
||||||
Optional("no_scm", default=False): Bool,
|
|
||||||
Optional("autostage", default=False): Bool,
|
|
||||||
diff --git a/dvc/env.py b/dvc/env.py
|
|
||||||
index 081ec9d..06c1332 100644
|
|
||||||
--- a/dvc/env.py
|
|
||||||
+++ b/dvc/env.py
|
|
||||||
@@ -7,7 +7,6 @@ DVC_EXP_GIT_REMOTE = "DVC_EXP_GIT_REMOTE"
|
|
||||||
DVC_EXP_NAME = "DVC_EXP_NAME"
|
|
||||||
DVC_GLOBAL_CONFIG_DIR = "DVC_GLOBAL_CONFIG_DIR"
|
|
||||||
DVC_IGNORE_ISATTY = "DVC_IGNORE_ISATTY"
|
|
||||||
-DVC_NO_ANALYTICS = "DVC_NO_ANALYTICS"
|
|
||||||
DVC_PAGER = "DVC_PAGER"
|
|
||||||
DVC_ROOT = "DVC_ROOT"
|
|
||||||
DVC_SHOW_TRACEBACK = "DVC_SHOW_TRACEBACK"
|
|
Loading…
Reference in a new issue