Skip to content

Commit 1d176f9

Browse files
List and Restores versions (#153)
* List non-current and live version(s) and Restore specific version from non-current to live * Added unit tests for list_versions and restore() * Bumped new version 2.0.14 -> 2.0.15
1 parent d93b008 commit 1d176f9

File tree

3 files changed

+191
-8
lines changed

3 files changed

+191
-8
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "dapla-toolbelt"
3-
version = "2.0.14"
3+
version = "2.0.15"
44
description = "Dapla Toolbelt"
55
authors = ["Dapla Developers <dapla-platform-developers@ssb.no>"]
66
license = "MIT"

src/dapla/files.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pandas as pd
66
from fsspec.spec import AbstractBufferedFile
7+
from google.cloud import storage
78

89
from .auth import AuthClient
910
from .gcs import GCSFileSystem
@@ -63,6 +64,57 @@ def ls(gcs_path: str, detail: bool = False, **kwargs: Any) -> Any:
6364
"""
6465
return FileClient.get_gcs_file_system().ls(gcs_path, detail=detail, **kwargs)
6566

67+
@staticmethod
68+
def get_versions(bucket_name: str, file_name: str) -> Any:
69+
"""Get all versions of a file in a bucket.
70+
71+
Args:
72+
bucket_name: Bucket name where the file is located.
73+
file_name: Name of the file.
74+
75+
Returns:
76+
List of versions of the file.
77+
"""
78+
storage_client = storage.Client()
79+
bucket = storage_client.bucket(bucket_name)
80+
81+
return bucket.list_blobs(prefix=file_name, versions=True)
82+
83+
@staticmethod
84+
def restore_version(
85+
bucket_name: str,
86+
file_name: str,
87+
destination_file: str,
88+
generation_id: str,
89+
destination_generation_id: str,
90+
) -> Any:
91+
"""Restores deleted/non-current version of file to the live version.
92+
93+
Args:
94+
bucket_name: source bucket name where the file is located.
95+
file_name: non-current file name.
96+
destination_file: name of the file to be restored .
97+
generation_id: generation_id of the non-current.
98+
destination_generation_id: Incase live version already exists, generation_id of the live version
99+
100+
Returns:
101+
A new blob with new generation id.
102+
"""
103+
storage_client = storage.Client()
104+
source_bucket = storage_client.bucket(bucket_name)
105+
source_file = source_bucket.blob(file_name)
106+
107+
# Restoring file means the destination bucket will be same as source
108+
destination_bucket = storage_client.bucket(bucket_name)
109+
110+
return source_bucket.copy_blob(
111+
source_file,
112+
destination_bucket,
113+
destination_file,
114+
source_generation=generation_id,
115+
if_generation_match=destination_generation_id,
116+
)
117+
66118
@staticmethod
67119
def cat(gcs_path: str) -> str:
68120
"""Get string content of a file from GCS.

tests/test_files.py

Lines changed: 138 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,148 @@
11
# Test for FileClient class
2-
2+
import unittest
3+
from unittest.mock import Mock
4+
from unittest.mock import patch
35

46
from dapla import FileClient
57

68
PATH_WITH_PREFIX = "gs://bucket/path"
79
PATH_WITHOUT_PREFIX = "bucket/path"
810

911

10-
def test_ensure_gcs_uri_prefix() -> None:
11-
assert FileClient._ensure_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITH_PREFIX
12-
assert FileClient._ensure_gcs_uri_prefix(PATH_WITHOUT_PREFIX) == PATH_WITH_PREFIX
12+
class TestFiles(unittest.TestCase):
13+
14+
def test_ensure_gcs_uri_prefix(self) -> None:
15+
assert FileClient._ensure_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITH_PREFIX
16+
assert (
17+
FileClient._ensure_gcs_uri_prefix(PATH_WITHOUT_PREFIX) == PATH_WITH_PREFIX
18+
)
19+
20+
def test_remove_gcs_uri_prefix(self) -> None:
21+
assert (
22+
FileClient._remove_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITHOUT_PREFIX
23+
)
24+
assert (
25+
FileClient._remove_gcs_uri_prefix(PATH_WITHOUT_PREFIX)
26+
== PATH_WITHOUT_PREFIX
27+
)
28+
29+
@patch("google.cloud.storage.Client")
30+
def test_get_versions_valid(self, mock_client: Mock) -> None:
31+
# Arrange
32+
bucket_name = "test-bucket"
33+
file_name = "test-file.txt"
34+
mock_bucket = Mock()
35+
mock_client.return_value.bucket.return_value = mock_bucket
36+
mock_blob1 = Mock(
37+
name="test-file.txt",
38+
generation=1,
39+
updated="2023-04-01T00:00:00Z",
40+
time_deleted=None,
41+
)
42+
mock_blob2 = Mock(
43+
name="test-file.txt",
44+
generation=2,
45+
updated="2023-04-02T00:00:00Z",
46+
time_deleted=None,
47+
)
48+
mock_bucket.list_blobs.return_value = [mock_blob1, mock_blob2]
49+
50+
files = FileClient.get_versions(bucket_name, file_name)
51+
52+
mock_client.return_value.bucket.assert_called_with(bucket_name)
53+
mock_bucket.list_blobs.assert_called_with(prefix=file_name, versions=True)
54+
55+
assert len(files) == 2
56+
57+
assert files[0].name == mock_blob1.name
58+
assert files[0].generation == mock_blob1.generation
59+
assert files[0].updated == mock_blob1.updated
60+
assert files[0].time_deleted is None
61+
62+
@patch("google.cloud.storage.Client")
63+
def test_get_versions_nonexistent_file(self, mock_client: Mock) -> None:
64+
bucket_name = "test-bucket"
65+
file_name = "nonexistent-file.txt"
66+
mock_bucket = Mock()
67+
mock_client.return_value.bucket.return_value = mock_bucket
68+
mock_bucket.list_blobs.return_value = []
69+
70+
files = FileClient.get_versions(bucket_name, file_name)
71+
72+
mock_client.return_value.bucket.assert_called_with(bucket_name)
73+
mock_bucket.list_blobs.assert_called_with(prefix=file_name, versions=True)
74+
75+
assert len(files) == 0
76+
assert files == []
77+
78+
@patch("google.cloud.storage.Client")
79+
def test_get_versions_empty_bucket(self, mock_client: Mock) -> None:
80+
bucket_name = "test-bucket"
81+
file_name = "test-file.txt"
82+
mock_bucket = Mock()
83+
mock_client.return_value.bucket.return_value = mock_bucket
84+
mock_bucket.list_blobs.return_value = []
85+
86+
files = FileClient.get_versions(bucket_name, file_name)
87+
88+
mock_client.return_value.bucket.assert_called_with(bucket_name)
89+
mock_bucket.list_blobs.assert_called_with(prefix=file_name, versions=True)
90+
91+
assert len(files) == 0
92+
assert files == []
93+
94+
@patch("google.cloud.storage.Client")
95+
def test_restore_version_success(self, mock_client: Mock) -> None:
96+
mock_bucket = Mock()
97+
mock_source_blob = Mock()
98+
mock_client.return_value.bucket.return_value = mock_bucket
99+
mock_bucket.blob.return_value = mock_source_blob
100+
101+
blob = FileClient.restore_version(
102+
bucket_name="test-bucket",
103+
file_name="test-file.txt",
104+
destination_file="restored-file.txt",
105+
generation_id="1234567890",
106+
destination_generation_id="0",
107+
)
108+
109+
mock_client.return_value.bucket.assert_called_with("test-bucket")
110+
mock_bucket.blob.assert_called_with("test-file.txt")
111+
mock_bucket.copy_blob.assert_called_with(
112+
mock_source_blob,
113+
mock_bucket,
114+
"restored-file.txt",
115+
source_generation="1234567890",
116+
if_generation_match="0",
117+
)
118+
assert blob == mock_bucket.copy_blob.return_value
119+
120+
@patch("google.cloud.storage.Client")
121+
def test_restore_version_existing_live_version(self, mock_client: Mock) -> None:
122+
mock_bucket = Mock()
123+
mock_source_blob = Mock()
124+
mock_client.return_value.bucket.return_value = mock_bucket
125+
mock_bucket.blob.return_value = mock_source_blob
126+
127+
blob = FileClient.restore_version(
128+
bucket_name="test-bucket",
129+
file_name="test-file.txt",
130+
destination_file="restored-file.txt",
131+
generation_id="1234567890",
132+
destination_generation_id="0987654321",
133+
)
134+
135+
mock_client.return_value.bucket.assert_called_with("test-bucket")
136+
mock_bucket.blob.assert_called_with("test-file.txt")
137+
mock_bucket.copy_blob.assert_called_with(
138+
mock_source_blob,
139+
mock_bucket,
140+
"restored-file.txt",
141+
source_generation="1234567890",
142+
if_generation_match="0987654321",
143+
)
144+
assert blob == mock_bucket.copy_blob.return_value
13145

14146

15-
def test_remove_gcs_uri_prefix() -> None:
16-
assert FileClient._remove_gcs_uri_prefix(PATH_WITH_PREFIX) == PATH_WITHOUT_PREFIX
17-
assert FileClient._remove_gcs_uri_prefix(PATH_WITHOUT_PREFIX) == PATH_WITHOUT_PREFIX
147+
if __name__ == "__main__":
148+
unittest.main()

0 commit comments

Comments
 (0)