azure_utils.utils.blob_storage_utils

  1import os, hashlib, json, pathlib
  2
  3
  4def validate_path(path: str, is_remote: bool, is_folder: bool) -> None:
  5    """
  6    Function:
  7
  8    Validates a path string
  9
 10    Parameters:
 11
 12    - `path` (str): The path to validate
 13    - `is_remote` (bool): Whether the path is remote or local
 14    - `is_folder` (bool): Whether the path is a folder or file
 15
 16    Returns:
 17
 18    - `None`
 19
 20    Raises:
 21
 22    - `Exception`: If the path is invalid
 23
 24    Example:
 25
 26    ```python
 27    from azure_utils.blob_storage import validate_path
 28    validate_path(path='/path/to/folder/', is_remote=True, is_folder=True)
 29    # This will return `None`
 30    # This will raise an exception if the path is invalid
 31    ```
 32    """
 33    posix_path = str(pathlib.PurePosixPath(path))
 34    if path.endswith("/") or path.endswith("\\"):
 35        if posix_path != "/":
 36            posix_path = posix_path + "/"
 37    if is_remote:
 38        if posix_path != path:
 39            raise Exception(
 40                f"Path Error: Remote storage uses posix paths. Received: `{path}`"
 41            )
 42    else:
 43        if posix_path == "/":
 44            raise Exception(
 45                f"Path Error: Cannot use the root folder for local storage. Received: `{path}`"
 46            )
 47    if not posix_path.startswith("/"):
 48        raise Exception(
 49            f"Path Error: Path must be absolute. Received: `{path}`"
 50        )
 51    if is_folder and not posix_path.endswith("/"):
 52        raise Exception(
 53            f"Path Error: A folder is required for this path. You may need to include a trailing slash. Received: `{path}`"
 54        )
 55    if not is_folder and posix_path.endswith("/"):
 56        raise Exception(
 57            f"Path Error: A file is required for this path. Received: `{path}`"
 58        )
 59
 60
 61class MetaFile:
 62    def __init__(self, filepath: str):
 63        """
 64        Function:
 65
 66        - Creates a MetaFile object
 67        - Can be used to associate arbitrary meta data to any file
 68        - Includes a built in method to calculate the md5 hash of the file
 69
 70        Parameters:
 71
 72        - `filepath` (str): The path to the file
 73
 74        Returns:
 75
 76        - The MetaFile object
 77
 78        Example:
 79
 80        ```python
 81        from azure_utils.blob_storage import MetaFile
 82        meta_file = MetaFile(filepath='/path/to/file.txt')
 83        meta_file.data['my_meta_key'] = 'my_meta_value'
 84        meta_file.update()
 85        # This will create a file at `/path/to/.meta.file.txt`
 86        # The file will contain the following json:
 87        # {
 88        #     "my_meta_key": "my_meta_value",
 89        # }
 90        ```
 91        """
 92        validate_path(path=filepath, is_remote=False, is_folder=False)
 93        self.filepath = filepath
 94        self.meta_filepath = self.__get_meta_filepath__(filepath)
 95        self.data = (
 96            self.__get_data__() if os.path.isfile(self.meta_filepath) else {}
 97        )
 98
 99    def __get_meta_filepath__(self, filepath: str) -> str:
100        """
101        Function:
102
103        - Gets the path to the meta file
104
105        Parameters:
106
107        - `filepath` (str): The path to the file
108
109        Returns:
110
111        - The path to the meta file
112
113        Example:
114
115        ```python
116        from azure_utils.blob_storage import MetaFile
117        meta_file = MetaFile(filepath='/path/to/file.txt')
118        meta_file.__get_meta_filepath__(filepath='/path/to/file.txt')
119        # This will return `/path/to/.meta.file.txt`
120        ```
121        """
122        return os.path.join(
123            os.path.dirname(filepath), ".meta." + os.path.basename(filepath)
124        )
125
126    def __get_data__(self) -> dict:
127        """
128        Function:
129
130        - Gets the data from the meta file
131
132        Parameters:
133
134        - `None`
135
136        Returns:
137
138        - The data from the meta file
139
140        Example:
141
142        ```python
143        from azure_utils.blob_storage import MetaFile
144        meta_file = MetaFile(filepath='/path/to/file.txt')
145        meta_file.__get_data__()
146        # This will return the data from the meta file
147        ```
148        """
149        with open(self.meta_filepath, "r") as meta_file:
150            return json.load(meta_file)
151
152    def __calc_md5__(self) -> str:
153        """
154        Function:
155
156        - Calculates the md5 hash of the file
157
158        Parameters:
159
160        - `None`
161
162        Returns:
163
164        - The md5 hash of the file
165
166        Example:
167
168        ```python
169        from azure_utils.blob_storage import MetaFile
170        meta_file = MetaFile(filepath='/path/to/file.txt')
171        meta_file.__calc_md5__()
172        # This will return the md5 hash of `/path/to/file.txt`
173        ```
174        """
175        if not os.path.isfile(self.filepath):
176            return None
177        with open(self.filepath, "rb") as blob_file:
178            return hashlib.md5(blob_file.read()).hexdigest()
179
180    def validate_md5(self) -> bool:
181        """
182
183        Function:
184
185        - Validates the md5 hash of the file
186
187        Parameters:
188
189        - `None`
190
191        Returns:
192
193        - Whether the md5 hash of the file is valid
194
195
196        """
197        return self.data["md5"] == self.__calc_md5__()
198
199    def update(self, include_md5: bool = True) -> None:
200        """
201        Function:
202
203        - Updates the meta file
204
205        Parameters:
206
207        - `include_md5` (bool): Whether to include the md5 hash of the file
208
209        Returns:
210
211        - `None`
212
213        Example:
214
215        ```python
216        from azure_utils.blob_storage import MetaFile
217        meta_file = MetaFile(filepath='/path/to/file.txt')
218        meta_file.data['my_meta_key'] = 'my_meta_value'
219        meta_file.update()
220        # This will create a file at `/path/to/.meta.file.txt`
221        # The file will contain the following json:
222        # {
223        #     "my_meta_key": "my_meta_value",
224        # }
225        ```
226        """
227        if include_md5:
228            self.data["md5"] = self.__calc_md5__()
229        with open(self.meta_filepath, "w") as meta_file:
230            json.dump(self.data, meta_file)
231
232
233class AZBlob:
234    def __init__(self, blob_client):
235        """
236        Function:
237
238        - Creates an AZBlob object
239
240        Parameters:
241
242        - `blob_client` (azure.storage.blob.BlobClient): The blob client
243
244        Returns:
245
246        - The AZBlob object
247
248        Example:
249
250        ```python
251        from azure.storage.blob import BlobClient
252        from azure_utils.blob_storage import AZBlob
253        myblob = AZBlob(
254            blob_client=BlobClient.from_connection_string(
255                conn_str="my_connection_string",
256                container_name="my_container",
257                blob_name="/path/to/my/blob/file.txt"
258            )
259        )
260        myblob.download(filepath='/path/to/file.txt')
261        ```
262        """
263        self.blob_client = blob_client
264
265    def download(self, filepath) -> None:
266        """
267        Function:
268
269        - Downloads a blob from the remote to a local file
270
271        Parameters:
272
273        - `filepath` (str): The path to the local file
274
275        Returns:
276
277        - `None`
278        """
279        validate_path(path=filepath, is_remote=False, is_folder=False)
280        os.makedirs(os.path.dirname(filepath), exist_ok=True)
281        with open(filepath, "wb") as blob_file:
282            self.blob_client.download_blob().readinto(blob_file)
283
284    def delete(self) -> None:
285        """
286        Function:
287
288        - Deletes a blob from the remote
289
290        Parameters:
291
292        - `None`
293
294        Returns:
295
296        - `None`
297        """
298        self.blob_client.delete_blob(delete_snapshots="include")
299
300
301class MetaBlob(AZBlob):
302    def __init__(
303        self,
304        blob_client,
305        filepath: str,
306        smart_sync: bool = False,
307        remote_etag: [str, None] = None,
308        overwrite: bool = False,
309    ):
310        """
311        Function:
312
313        - Creates a MetaBlob object
314
315        Parameters:
316
317        - `blob_client` (azure.storage.blob.BlobClient): The blob client
318        - `filepath` (str): The path to the local file
319        - `smart_sync` (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file
320            - Optional: Defaults to `False`
321        - `remote_etag` (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known
322            - Optional: Defaults to the current etag of the remote blob
323        - `overwrite` (bool): Whether to overwrite the local file if it already exists
324            - Optional: Defaults to `False`
325
326        Returns:
327
328        - The MetaBlob object
329
330        Example:
331
332        ```python
333
334        from azure.storage.blob import BlobClient
335        from azure_utils.blob_storage import MetaBlob
336        myblob = MetaBlob(
337            blob_client=BlobClient.from_connection_string(
338                conn_str="my_connection_string",
339                container_name="my_container",
340                blob_name="/path/to/my/blob/file.txt"
341            ),
342            filepath='/path/to/file.txt',
343            smart_sync=True,
344            overwrite=True
345        )
346        myblob.download()
347        """
348        super().__init__(blob_client)
349        validate_path(path=filepath, is_remote=False, is_folder=False)
350        self.filepath = filepath
351        self.meta = MetaFile(filepath)
352        if remote_etag is None:
353            self.update_etag()
354        else:
355            self.remote_etag = remote_etag
356        self.block_download = False
357        if smart_sync:
358            if self.meta.data.get("etag") == self.remote_etag:
359                if self.meta.validate_md5():
360                    self.block_download = True
361        if not overwrite and os.path.isfile(filepath):
362            self.block_download = True
363
364    def update_etag(self) -> None:
365        """
366        Function:
367
368        - Updates the local etag with the current etag of the remote blob
369
370        Parameters:
371
372        - `None`
373
374        Returns:
375
376        - `None`
377        """
378        self.remote_etag = self.blob_client.get_blob_properties().etag
379
380    def update_meta(self) -> None:
381        """
382        Function:
383
384        - Updates the local meta file
385
386        Parameters:
387
388        - `None`
389
390        Returns:
391
392        - `None`
393        """
394        self.meta.data["etag"] = self.remote_etag
395        self.meta.update(include_md5=True)
396
397    def download(self) -> None:
398        """
399        Function:
400
401        - Downloads a blob from the remote to a local file
402
403        Parameters:
404
405        - `None`
406
407        Returns:
408
409        - `None`
410        """
411        if self.block_download:
412            return
413        super().download(filepath=self.filepath)
414        self.update_meta()
def validate_path(path: str, is_remote: bool, is_folder: bool) -> None:
 5def validate_path(path: str, is_remote: bool, is_folder: bool) -> None:
 6    """
 7    Function:
 8
 9    Validates a path string
10
11    Parameters:
12
13    - `path` (str): The path to validate
14    - `is_remote` (bool): Whether the path is remote or local
15    - `is_folder` (bool): Whether the path is a folder or file
16
17    Returns:
18
19    - `None`
20
21    Raises:
22
23    - `Exception`: If the path is invalid
24
25    Example:
26
27    ```python
28    from azure_utils.blob_storage import validate_path
29    validate_path(path='/path/to/folder/', is_remote=True, is_folder=True)
30    # This will return `None`
31    # This will raise an exception if the path is invalid
32    ```
33    """
34    posix_path = str(pathlib.PurePosixPath(path))
35    if path.endswith("/") or path.endswith("\\"):
36        if posix_path != "/":
37            posix_path = posix_path + "/"
38    if is_remote:
39        if posix_path != path:
40            raise Exception(
41                f"Path Error: Remote storage uses posix paths. Received: `{path}`"
42            )
43    else:
44        if posix_path == "/":
45            raise Exception(
46                f"Path Error: Cannot use the root folder for local storage. Received: `{path}`"
47            )
48    if not posix_path.startswith("/"):
49        raise Exception(
50            f"Path Error: Path must be absolute. Received: `{path}`"
51        )
52    if is_folder and not posix_path.endswith("/"):
53        raise Exception(
54            f"Path Error: A folder is required for this path. You may need to include a trailing slash. Received: `{path}`"
55        )
56    if not is_folder and posix_path.endswith("/"):
57        raise Exception(
58            f"Path Error: A file is required for this path. Received: `{path}`"
59        )

Function:

Validates a path string

Parameters:

  • path (str): The path to validate
  • is_remote (bool): Whether the path is remote or local
  • is_folder (bool): Whether the path is a folder or file

Returns:

  • None

Raises:

  • Exception: If the path is invalid

Example:

from azure_utils.blob_storage import validate_path
validate_path(path='/path/to/folder/', is_remote=True, is_folder=True)
# This will return `None`
# This will raise an exception if the path is invalid
class MetaFile:
 62class MetaFile:
 63    def __init__(self, filepath: str):
 64        """
 65        Function:
 66
 67        - Creates a MetaFile object
 68        - Can be used to associate arbitrary meta data to any file
 69        - Includes a built in method to calculate the md5 hash of the file
 70
 71        Parameters:
 72
 73        - `filepath` (str): The path to the file
 74
 75        Returns:
 76
 77        - The MetaFile object
 78
 79        Example:
 80
 81        ```python
 82        from azure_utils.blob_storage import MetaFile
 83        meta_file = MetaFile(filepath='/path/to/file.txt')
 84        meta_file.data['my_meta_key'] = 'my_meta_value'
 85        meta_file.update()
 86        # This will create a file at `/path/to/.meta.file.txt`
 87        # The file will contain the following json:
 88        # {
 89        #     "my_meta_key": "my_meta_value",
 90        # }
 91        ```
 92        """
 93        validate_path(path=filepath, is_remote=False, is_folder=False)
 94        self.filepath = filepath
 95        self.meta_filepath = self.__get_meta_filepath__(filepath)
 96        self.data = (
 97            self.__get_data__() if os.path.isfile(self.meta_filepath) else {}
 98        )
 99
100    def __get_meta_filepath__(self, filepath: str) -> str:
101        """
102        Function:
103
104        - Gets the path to the meta file
105
106        Parameters:
107
108        - `filepath` (str): The path to the file
109
110        Returns:
111
112        - The path to the meta file
113
114        Example:
115
116        ```python
117        from azure_utils.blob_storage import MetaFile
118        meta_file = MetaFile(filepath='/path/to/file.txt')
119        meta_file.__get_meta_filepath__(filepath='/path/to/file.txt')
120        # This will return `/path/to/.meta.file.txt`
121        ```
122        """
123        return os.path.join(
124            os.path.dirname(filepath), ".meta." + os.path.basename(filepath)
125        )
126
127    def __get_data__(self) -> dict:
128        """
129        Function:
130
131        - Gets the data from the meta file
132
133        Parameters:
134
135        - `None`
136
137        Returns:
138
139        - The data from the meta file
140
141        Example:
142
143        ```python
144        from azure_utils.blob_storage import MetaFile
145        meta_file = MetaFile(filepath='/path/to/file.txt')
146        meta_file.__get_data__()
147        # This will return the data from the meta file
148        ```
149        """
150        with open(self.meta_filepath, "r") as meta_file:
151            return json.load(meta_file)
152
153    def __calc_md5__(self) -> str:
154        """
155        Function:
156
157        - Calculates the md5 hash of the file
158
159        Parameters:
160
161        - `None`
162
163        Returns:
164
165        - The md5 hash of the file
166
167        Example:
168
169        ```python
170        from azure_utils.blob_storage import MetaFile
171        meta_file = MetaFile(filepath='/path/to/file.txt')
172        meta_file.__calc_md5__()
173        # This will return the md5 hash of `/path/to/file.txt`
174        ```
175        """
176        if not os.path.isfile(self.filepath):
177            return None
178        with open(self.filepath, "rb") as blob_file:
179            return hashlib.md5(blob_file.read()).hexdigest()
180
181    def validate_md5(self) -> bool:
182        """
183
184        Function:
185
186        - Validates the md5 hash of the file
187
188        Parameters:
189
190        - `None`
191
192        Returns:
193
194        - Whether the md5 hash of the file is valid
195
196
197        """
198        return self.data["md5"] == self.__calc_md5__()
199
200    def update(self, include_md5: bool = True) -> None:
201        """
202        Function:
203
204        - Updates the meta file
205
206        Parameters:
207
208        - `include_md5` (bool): Whether to include the md5 hash of the file
209
210        Returns:
211
212        - `None`
213
214        Example:
215
216        ```python
217        from azure_utils.blob_storage import MetaFile
218        meta_file = MetaFile(filepath='/path/to/file.txt')
219        meta_file.data['my_meta_key'] = 'my_meta_value'
220        meta_file.update()
221        # This will create a file at `/path/to/.meta.file.txt`
222        # The file will contain the following json:
223        # {
224        #     "my_meta_key": "my_meta_value",
225        # }
226        ```
227        """
228        if include_md5:
229            self.data["md5"] = self.__calc_md5__()
230        with open(self.meta_filepath, "w") as meta_file:
231            json.dump(self.data, meta_file)
MetaFile(filepath: str)
63    def __init__(self, filepath: str):
64        """
65        Function:
66
67        - Creates a MetaFile object
68        - Can be used to associate arbitrary meta data to any file
69        - Includes a built in method to calculate the md5 hash of the file
70
71        Parameters:
72
73        - `filepath` (str): The path to the file
74
75        Returns:
76
77        - The MetaFile object
78
79        Example:
80
81        ```python
82        from azure_utils.blob_storage import MetaFile
83        meta_file = MetaFile(filepath='/path/to/file.txt')
84        meta_file.data['my_meta_key'] = 'my_meta_value'
85        meta_file.update()
86        # This will create a file at `/path/to/.meta.file.txt`
87        # The file will contain the following json:
88        # {
89        #     "my_meta_key": "my_meta_value",
90        # }
91        ```
92        """
93        validate_path(path=filepath, is_remote=False, is_folder=False)
94        self.filepath = filepath
95        self.meta_filepath = self.__get_meta_filepath__(filepath)
96        self.data = (
97            self.__get_data__() if os.path.isfile(self.meta_filepath) else {}
98        )

Function:

  • Creates a MetaFile object
  • Can be used to associate arbitrary meta data to any file
  • Includes a built in method to calculate the md5 hash of the file

Parameters:

Returns:

  • The MetaFile object

Example:

from azure_utils.blob_storage import MetaFile
meta_file = MetaFile(filepath='/path/to/file.txt')
meta_file.data['my_meta_key'] = 'my_meta_value'
meta_file.update()
# This will create a file at `/path/to/.meta.file.txt`
# The file will contain the following json:
# {
#     "my_meta_key": "my_meta_value",
# }
filepath
meta_filepath
data
def validate_md5(self) -> bool:
181    def validate_md5(self) -> bool:
182        """
183
184        Function:
185
186        - Validates the md5 hash of the file
187
188        Parameters:
189
190        - `None`
191
192        Returns:
193
194        - Whether the md5 hash of the file is valid
195
196
197        """
198        return self.data["md5"] == self.__calc_md5__()

Function:

  • Validates the md5 hash of the file

Parameters:

  • None

Returns:

  • Whether the md5 hash of the file is valid
def update(self, include_md5: bool = True) -> None:
200    def update(self, include_md5: bool = True) -> None:
201        """
202        Function:
203
204        - Updates the meta file
205
206        Parameters:
207
208        - `include_md5` (bool): Whether to include the md5 hash of the file
209
210        Returns:
211
212        - `None`
213
214        Example:
215
216        ```python
217        from azure_utils.blob_storage import MetaFile
218        meta_file = MetaFile(filepath='/path/to/file.txt')
219        meta_file.data['my_meta_key'] = 'my_meta_value'
220        meta_file.update()
221        # This will create a file at `/path/to/.meta.file.txt`
222        # The file will contain the following json:
223        # {
224        #     "my_meta_key": "my_meta_value",
225        # }
226        ```
227        """
228        if include_md5:
229            self.data["md5"] = self.__calc_md5__()
230        with open(self.meta_filepath, "w") as meta_file:
231            json.dump(self.data, meta_file)

Function:

  • Updates the meta file

Parameters:

  • include_md5 (bool): Whether to include the md5 hash of the file

Returns:

  • None

Example:

from azure_utils.blob_storage import MetaFile
meta_file = MetaFile(filepath='/path/to/file.txt')
meta_file.data['my_meta_key'] = 'my_meta_value'
meta_file.update()
# This will create a file at `/path/to/.meta.file.txt`
# The file will contain the following json:
# {
#     "my_meta_key": "my_meta_value",
# }
class AZBlob:
234class AZBlob:
235    def __init__(self, blob_client):
236        """
237        Function:
238
239        - Creates an AZBlob object
240
241        Parameters:
242
243        - `blob_client` (azure.storage.blob.BlobClient): The blob client
244
245        Returns:
246
247        - The AZBlob object
248
249        Example:
250
251        ```python
252        from azure.storage.blob import BlobClient
253        from azure_utils.blob_storage import AZBlob
254        myblob = AZBlob(
255            blob_client=BlobClient.from_connection_string(
256                conn_str="my_connection_string",
257                container_name="my_container",
258                blob_name="/path/to/my/blob/file.txt"
259            )
260        )
261        myblob.download(filepath='/path/to/file.txt')
262        ```
263        """
264        self.blob_client = blob_client
265
266    def download(self, filepath) -> None:
267        """
268        Function:
269
270        - Downloads a blob from the remote to a local file
271
272        Parameters:
273
274        - `filepath` (str): The path to the local file
275
276        Returns:
277
278        - `None`
279        """
280        validate_path(path=filepath, is_remote=False, is_folder=False)
281        os.makedirs(os.path.dirname(filepath), exist_ok=True)
282        with open(filepath, "wb") as blob_file:
283            self.blob_client.download_blob().readinto(blob_file)
284
285    def delete(self) -> None:
286        """
287        Function:
288
289        - Deletes a blob from the remote
290
291        Parameters:
292
293        - `None`
294
295        Returns:
296
297        - `None`
298        """
299        self.blob_client.delete_blob(delete_snapshots="include")
AZBlob(blob_client)
235    def __init__(self, blob_client):
236        """
237        Function:
238
239        - Creates an AZBlob object
240
241        Parameters:
242
243        - `blob_client` (azure.storage.blob.BlobClient): The blob client
244
245        Returns:
246
247        - The AZBlob object
248
249        Example:
250
251        ```python
252        from azure.storage.blob import BlobClient
253        from azure_utils.blob_storage import AZBlob
254        myblob = AZBlob(
255            blob_client=BlobClient.from_connection_string(
256                conn_str="my_connection_string",
257                container_name="my_container",
258                blob_name="/path/to/my/blob/file.txt"
259            )
260        )
261        myblob.download(filepath='/path/to/file.txt')
262        ```
263        """
264        self.blob_client = blob_client

Function:

  • Creates an AZBlob object

Parameters:

  • blob_client (azure.storage.blob.BlobClient): The blob client

Returns:

  • The AZBlob object

Example:

from azure.storage.blob import BlobClient
from azure_utils.blob_storage import AZBlob
myblob = AZBlob(
    blob_client=BlobClient.from_connection_string(
        conn_str="my_connection_string",
        container_name="my_container",
        blob_name="/path/to/my/blob/file.txt"
    )
)
myblob.download(filepath='/path/to/file.txt')
blob_client
def download(self, filepath) -> None:
266    def download(self, filepath) -> None:
267        """
268        Function:
269
270        - Downloads a blob from the remote to a local file
271
272        Parameters:
273
274        - `filepath` (str): The path to the local file
275
276        Returns:
277
278        - `None`
279        """
280        validate_path(path=filepath, is_remote=False, is_folder=False)
281        os.makedirs(os.path.dirname(filepath), exist_ok=True)
282        with open(filepath, "wb") as blob_file:
283            self.blob_client.download_blob().readinto(blob_file)

Function:

  • Downloads a blob from the remote to a local file

Parameters:

  • filepath (str): The path to the local file

Returns:

  • None
def delete(self) -> None:
285    def delete(self) -> None:
286        """
287        Function:
288
289        - Deletes a blob from the remote
290
291        Parameters:
292
293        - `None`
294
295        Returns:
296
297        - `None`
298        """
299        self.blob_client.delete_blob(delete_snapshots="include")

Function:

  • Deletes a blob from the remote

Parameters:

  • None

Returns:

  • None
class MetaBlob(AZBlob):
302class MetaBlob(AZBlob):
303    def __init__(
304        self,
305        blob_client,
306        filepath: str,
307        smart_sync: bool = False,
308        remote_etag: [str, None] = None,
309        overwrite: bool = False,
310    ):
311        """
312        Function:
313
314        - Creates a MetaBlob object
315
316        Parameters:
317
318        - `blob_client` (azure.storage.blob.BlobClient): The blob client
319        - `filepath` (str): The path to the local file
320        - `smart_sync` (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file
321            - Optional: Defaults to `False`
322        - `remote_etag` (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known
323            - Optional: Defaults to the current etag of the remote blob
324        - `overwrite` (bool): Whether to overwrite the local file if it already exists
325            - Optional: Defaults to `False`
326
327        Returns:
328
329        - The MetaBlob object
330
331        Example:
332
333        ```python
334
335        from azure.storage.blob import BlobClient
336        from azure_utils.blob_storage import MetaBlob
337        myblob = MetaBlob(
338            blob_client=BlobClient.from_connection_string(
339                conn_str="my_connection_string",
340                container_name="my_container",
341                blob_name="/path/to/my/blob/file.txt"
342            ),
343            filepath='/path/to/file.txt',
344            smart_sync=True,
345            overwrite=True
346        )
347        myblob.download()
348        """
349        super().__init__(blob_client)
350        validate_path(path=filepath, is_remote=False, is_folder=False)
351        self.filepath = filepath
352        self.meta = MetaFile(filepath)
353        if remote_etag is None:
354            self.update_etag()
355        else:
356            self.remote_etag = remote_etag
357        self.block_download = False
358        if smart_sync:
359            if self.meta.data.get("etag") == self.remote_etag:
360                if self.meta.validate_md5():
361                    self.block_download = True
362        if not overwrite and os.path.isfile(filepath):
363            self.block_download = True
364
365    def update_etag(self) -> None:
366        """
367        Function:
368
369        - Updates the local etag with the current etag of the remote blob
370
371        Parameters:
372
373        - `None`
374
375        Returns:
376
377        - `None`
378        """
379        self.remote_etag = self.blob_client.get_blob_properties().etag
380
381    def update_meta(self) -> None:
382        """
383        Function:
384
385        - Updates the local meta file
386
387        Parameters:
388
389        - `None`
390
391        Returns:
392
393        - `None`
394        """
395        self.meta.data["etag"] = self.remote_etag
396        self.meta.update(include_md5=True)
397
398    def download(self) -> None:
399        """
400        Function:
401
402        - Downloads a blob from the remote to a local file
403
404        Parameters:
405
406        - `None`
407
408        Returns:
409
410        - `None`
411        """
412        if self.block_download:
413            return
414        super().download(filepath=self.filepath)
415        self.update_meta()
MetaBlob( blob_client, filepath: str, smart_sync: bool = False, remote_etag: [<class 'str'>, None] = None, overwrite: bool = False)
303    def __init__(
304        self,
305        blob_client,
306        filepath: str,
307        smart_sync: bool = False,
308        remote_etag: [str, None] = None,
309        overwrite: bool = False,
310    ):
311        """
312        Function:
313
314        - Creates a MetaBlob object
315
316        Parameters:
317
318        - `blob_client` (azure.storage.blob.BlobClient): The blob client
319        - `filepath` (str): The path to the local file
320        - `smart_sync` (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file
321            - Optional: Defaults to `False`
322        - `remote_etag` (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known
323            - Optional: Defaults to the current etag of the remote blob
324        - `overwrite` (bool): Whether to overwrite the local file if it already exists
325            - Optional: Defaults to `False`
326
327        Returns:
328
329        - The MetaBlob object
330
331        Example:
332
333        ```python
334
335        from azure.storage.blob import BlobClient
336        from azure_utils.blob_storage import MetaBlob
337        myblob = MetaBlob(
338            blob_client=BlobClient.from_connection_string(
339                conn_str="my_connection_string",
340                container_name="my_container",
341                blob_name="/path/to/my/blob/file.txt"
342            ),
343            filepath='/path/to/file.txt',
344            smart_sync=True,
345            overwrite=True
346        )
347        myblob.download()
348        """
349        super().__init__(blob_client)
350        validate_path(path=filepath, is_remote=False, is_folder=False)
351        self.filepath = filepath
352        self.meta = MetaFile(filepath)
353        if remote_etag is None:
354            self.update_etag()
355        else:
356            self.remote_etag = remote_etag
357        self.block_download = False
358        if smart_sync:
359            if self.meta.data.get("etag") == self.remote_etag:
360                if self.meta.validate_md5():
361                    self.block_download = True
362        if not overwrite and os.path.isfile(filepath):
363            self.block_download = True

Function:

  • Creates a MetaBlob object

Parameters:

  • blob_client (azure.storage.blob.BlobClient): The blob client
  • filepath (str): The path to the local file
  • smart_sync (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file
    • Optional: Defaults to False
  • remote_etag (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known
    • Optional: Defaults to the current etag of the remote blob
  • overwrite (bool): Whether to overwrite the local file if it already exists
    • Optional: Defaults to False

Returns:

  • The MetaBlob object

Example:

```python

from azure.storage.blob import BlobClient from azure_utils.blob_storage import MetaBlob myblob = MetaBlob( blob_client=BlobClient.from_connection_string( conn_str="my_connection_string", container_name="my_container", blob_name="/path/to/my/blob/file.txt" ), filepath='/path/to/file.txt', smart_sync=True, overwrite=True ) myblob.download()

filepath
meta
block_download
def update_etag(self) -> None:
365    def update_etag(self) -> None:
366        """
367        Function:
368
369        - Updates the local etag with the current etag of the remote blob
370
371        Parameters:
372
373        - `None`
374
375        Returns:
376
377        - `None`
378        """
379        self.remote_etag = self.blob_client.get_blob_properties().etag

Function:

  • Updates the local etag with the current etag of the remote blob

Parameters:

  • None

Returns:

  • None
def update_meta(self) -> None:
381    def update_meta(self) -> None:
382        """
383        Function:
384
385        - Updates the local meta file
386
387        Parameters:
388
389        - `None`
390
391        Returns:
392
393        - `None`
394        """
395        self.meta.data["etag"] = self.remote_etag
396        self.meta.update(include_md5=True)

Function:

  • Updates the local meta file

Parameters:

  • None

Returns:

  • None
def download(self) -> None:
398    def download(self) -> None:
399        """
400        Function:
401
402        - Downloads a blob from the remote to a local file
403
404        Parameters:
405
406        - `None`
407
408        Returns:
409
410        - `None`
411        """
412        if self.block_download:
413            return
414        super().download(filepath=self.filepath)
415        self.update_meta()

Function:

  • Downloads a blob from the remote to a local file

Parameters:

  • None

Returns:

  • None
Inherited Members
AZBlob
blob_client
delete