azure_utils.utils.blob_storage_utils
1import os, hashlib, json, pathlib 2 3 4def validate_path(path: str, is_remote: bool, is_folder: bool) -> None: 5 """ 6 Function: 7 8 Validates a path string 9 10 Parameters: 11 12 - `path` (str): The path to validate 13 - `is_remote` (bool): Whether the path is remote or local 14 - `is_folder` (bool): Whether the path is a folder or file 15 16 Returns: 17 18 - `None` 19 20 Raises: 21 22 - `Exception`: If the path is invalid 23 24 Example: 25 26 ```python 27 from azure_utils.blob_storage import validate_path 28 validate_path(path='/path/to/folder/', is_remote=True, is_folder=True) 29 # This will return `None` 30 # This will raise an exception if the path is invalid 31 ``` 32 """ 33 posix_path = str(pathlib.PurePosixPath(path)) 34 if path.endswith("/") or path.endswith("\\"): 35 if posix_path != "/": 36 posix_path = posix_path + "/" 37 if is_remote: 38 if posix_path != path: 39 raise Exception( 40 f"Path Error: Remote storage uses posix paths. Received: `{path}`" 41 ) 42 else: 43 if posix_path == "/": 44 raise Exception( 45 f"Path Error: Cannot use the root folder for local storage. Received: `{path}`" 46 ) 47 if not posix_path.startswith("/"): 48 raise Exception( 49 f"Path Error: Path must be absolute. Received: `{path}`" 50 ) 51 if is_folder and not posix_path.endswith("/"): 52 raise Exception( 53 f"Path Error: A folder is required for this path. You may need to include a trailing slash. Received: `{path}`" 54 ) 55 if not is_folder and posix_path.endswith("/"): 56 raise Exception( 57 f"Path Error: A file is required for this path. Received: `{path}`" 58 ) 59 60 61class MetaFile: 62 def __init__(self, filepath: str): 63 """ 64 Function: 65 66 - Creates a MetaFile object 67 - Can be used to associate arbitrary meta data to any file 68 - Includes a built in method to calculate the md5 hash of the file 69 70 Parameters: 71 72 - `filepath` (str): The path to the file 73 74 Returns: 75 76 - The MetaFile object 77 78 Example: 79 80 ```python 81 from azure_utils.blob_storage import MetaFile 82 meta_file = MetaFile(filepath='/path/to/file.txt') 83 meta_file.data['my_meta_key'] = 'my_meta_value' 84 meta_file.update() 85 # This will create a file at `/path/to/.meta.file.txt` 86 # The file will contain the following json: 87 # { 88 # "my_meta_key": "my_meta_value", 89 # } 90 ``` 91 """ 92 validate_path(path=filepath, is_remote=False, is_folder=False) 93 self.filepath = filepath 94 self.meta_filepath = self.__get_meta_filepath__(filepath) 95 self.data = ( 96 self.__get_data__() if os.path.isfile(self.meta_filepath) else {} 97 ) 98 99 def __get_meta_filepath__(self, filepath: str) -> str: 100 """ 101 Function: 102 103 - Gets the path to the meta file 104 105 Parameters: 106 107 - `filepath` (str): The path to the file 108 109 Returns: 110 111 - The path to the meta file 112 113 Example: 114 115 ```python 116 from azure_utils.blob_storage import MetaFile 117 meta_file = MetaFile(filepath='/path/to/file.txt') 118 meta_file.__get_meta_filepath__(filepath='/path/to/file.txt') 119 # This will return `/path/to/.meta.file.txt` 120 ``` 121 """ 122 return os.path.join( 123 os.path.dirname(filepath), ".meta." + os.path.basename(filepath) 124 ) 125 126 def __get_data__(self) -> dict: 127 """ 128 Function: 129 130 - Gets the data from the meta file 131 132 Parameters: 133 134 - `None` 135 136 Returns: 137 138 - The data from the meta file 139 140 Example: 141 142 ```python 143 from azure_utils.blob_storage import MetaFile 144 meta_file = MetaFile(filepath='/path/to/file.txt') 145 meta_file.__get_data__() 146 # This will return the data from the meta file 147 ``` 148 """ 149 with open(self.meta_filepath, "r") as meta_file: 150 return json.load(meta_file) 151 152 def __calc_md5__(self) -> str: 153 """ 154 Function: 155 156 - Calculates the md5 hash of the file 157 158 Parameters: 159 160 - `None` 161 162 Returns: 163 164 - The md5 hash of the file 165 166 Example: 167 168 ```python 169 from azure_utils.blob_storage import MetaFile 170 meta_file = MetaFile(filepath='/path/to/file.txt') 171 meta_file.__calc_md5__() 172 # This will return the md5 hash of `/path/to/file.txt` 173 ``` 174 """ 175 if not os.path.isfile(self.filepath): 176 return None 177 with open(self.filepath, "rb") as blob_file: 178 return hashlib.md5(blob_file.read()).hexdigest() 179 180 def validate_md5(self) -> bool: 181 """ 182 183 Function: 184 185 - Validates the md5 hash of the file 186 187 Parameters: 188 189 - `None` 190 191 Returns: 192 193 - Whether the md5 hash of the file is valid 194 195 196 """ 197 return self.data["md5"] == self.__calc_md5__() 198 199 def update(self, include_md5: bool = True) -> None: 200 """ 201 Function: 202 203 - Updates the meta file 204 205 Parameters: 206 207 - `include_md5` (bool): Whether to include the md5 hash of the file 208 209 Returns: 210 211 - `None` 212 213 Example: 214 215 ```python 216 from azure_utils.blob_storage import MetaFile 217 meta_file = MetaFile(filepath='/path/to/file.txt') 218 meta_file.data['my_meta_key'] = 'my_meta_value' 219 meta_file.update() 220 # This will create a file at `/path/to/.meta.file.txt` 221 # The file will contain the following json: 222 # { 223 # "my_meta_key": "my_meta_value", 224 # } 225 ``` 226 """ 227 if include_md5: 228 self.data["md5"] = self.__calc_md5__() 229 with open(self.meta_filepath, "w") as meta_file: 230 json.dump(self.data, meta_file) 231 232 233class AZBlob: 234 def __init__(self, blob_client): 235 """ 236 Function: 237 238 - Creates an AZBlob object 239 240 Parameters: 241 242 - `blob_client` (azure.storage.blob.BlobClient): The blob client 243 244 Returns: 245 246 - The AZBlob object 247 248 Example: 249 250 ```python 251 from azure.storage.blob import BlobClient 252 from azure_utils.blob_storage import AZBlob 253 myblob = AZBlob( 254 blob_client=BlobClient.from_connection_string( 255 conn_str="my_connection_string", 256 container_name="my_container", 257 blob_name="/path/to/my/blob/file.txt" 258 ) 259 ) 260 myblob.download(filepath='/path/to/file.txt') 261 ``` 262 """ 263 self.blob_client = blob_client 264 265 def download(self, filepath) -> None: 266 """ 267 Function: 268 269 - Downloads a blob from the remote to a local file 270 271 Parameters: 272 273 - `filepath` (str): The path to the local file 274 275 Returns: 276 277 - `None` 278 """ 279 validate_path(path=filepath, is_remote=False, is_folder=False) 280 os.makedirs(os.path.dirname(filepath), exist_ok=True) 281 with open(filepath, "wb") as blob_file: 282 self.blob_client.download_blob().readinto(blob_file) 283 284 def delete(self) -> None: 285 """ 286 Function: 287 288 - Deletes a blob from the remote 289 290 Parameters: 291 292 - `None` 293 294 Returns: 295 296 - `None` 297 """ 298 self.blob_client.delete_blob(delete_snapshots="include") 299 300 301class MetaBlob(AZBlob): 302 def __init__( 303 self, 304 blob_client, 305 filepath: str, 306 smart_sync: bool = False, 307 remote_etag: [str, None] = None, 308 overwrite: bool = False, 309 ): 310 """ 311 Function: 312 313 - Creates a MetaBlob object 314 315 Parameters: 316 317 - `blob_client` (azure.storage.blob.BlobClient): The blob client 318 - `filepath` (str): The path to the local file 319 - `smart_sync` (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file 320 - Optional: Defaults to `False` 321 - `remote_etag` (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known 322 - Optional: Defaults to the current etag of the remote blob 323 - `overwrite` (bool): Whether to overwrite the local file if it already exists 324 - Optional: Defaults to `False` 325 326 Returns: 327 328 - The MetaBlob object 329 330 Example: 331 332 ```python 333 334 from azure.storage.blob import BlobClient 335 from azure_utils.blob_storage import MetaBlob 336 myblob = MetaBlob( 337 blob_client=BlobClient.from_connection_string( 338 conn_str="my_connection_string", 339 container_name="my_container", 340 blob_name="/path/to/my/blob/file.txt" 341 ), 342 filepath='/path/to/file.txt', 343 smart_sync=True, 344 overwrite=True 345 ) 346 myblob.download() 347 """ 348 super().__init__(blob_client) 349 validate_path(path=filepath, is_remote=False, is_folder=False) 350 self.filepath = filepath 351 self.meta = MetaFile(filepath) 352 if remote_etag is None: 353 self.update_etag() 354 else: 355 self.remote_etag = remote_etag 356 self.block_download = False 357 if smart_sync: 358 if self.meta.data.get("etag") == self.remote_etag: 359 if self.meta.validate_md5(): 360 self.block_download = True 361 if not overwrite and os.path.isfile(filepath): 362 self.block_download = True 363 364 def update_etag(self) -> None: 365 """ 366 Function: 367 368 - Updates the local etag with the current etag of the remote blob 369 370 Parameters: 371 372 - `None` 373 374 Returns: 375 376 - `None` 377 """ 378 self.remote_etag = self.blob_client.get_blob_properties().etag 379 380 def update_meta(self) -> None: 381 """ 382 Function: 383 384 - Updates the local meta file 385 386 Parameters: 387 388 - `None` 389 390 Returns: 391 392 - `None` 393 """ 394 self.meta.data["etag"] = self.remote_etag 395 self.meta.update(include_md5=True) 396 397 def download(self) -> None: 398 """ 399 Function: 400 401 - Downloads a blob from the remote to a local file 402 403 Parameters: 404 405 - `None` 406 407 Returns: 408 409 - `None` 410 """ 411 if self.block_download: 412 return 413 super().download(filepath=self.filepath) 414 self.update_meta()
5def validate_path(path: str, is_remote: bool, is_folder: bool) -> None: 6 """ 7 Function: 8 9 Validates a path string 10 11 Parameters: 12 13 - `path` (str): The path to validate 14 - `is_remote` (bool): Whether the path is remote or local 15 - `is_folder` (bool): Whether the path is a folder or file 16 17 Returns: 18 19 - `None` 20 21 Raises: 22 23 - `Exception`: If the path is invalid 24 25 Example: 26 27 ```python 28 from azure_utils.blob_storage import validate_path 29 validate_path(path='/path/to/folder/', is_remote=True, is_folder=True) 30 # This will return `None` 31 # This will raise an exception if the path is invalid 32 ``` 33 """ 34 posix_path = str(pathlib.PurePosixPath(path)) 35 if path.endswith("/") or path.endswith("\\"): 36 if posix_path != "/": 37 posix_path = posix_path + "/" 38 if is_remote: 39 if posix_path != path: 40 raise Exception( 41 f"Path Error: Remote storage uses posix paths. Received: `{path}`" 42 ) 43 else: 44 if posix_path == "/": 45 raise Exception( 46 f"Path Error: Cannot use the root folder for local storage. Received: `{path}`" 47 ) 48 if not posix_path.startswith("/"): 49 raise Exception( 50 f"Path Error: Path must be absolute. Received: `{path}`" 51 ) 52 if is_folder and not posix_path.endswith("/"): 53 raise Exception( 54 f"Path Error: A folder is required for this path. You may need to include a trailing slash. Received: `{path}`" 55 ) 56 if not is_folder and posix_path.endswith("/"): 57 raise Exception( 58 f"Path Error: A file is required for this path. Received: `{path}`" 59 )
Function:
Validates a path string
Parameters:
path
(str): The path to validateis_remote
(bool): Whether the path is remote or localis_folder
(bool): Whether the path is a folder or file
Returns:
None
Raises:
Exception
: If the path is invalid
Example:
from azure_utils.blob_storage import validate_path
validate_path(path='/path/to/folder/', is_remote=True, is_folder=True)
# This will return `None`
# This will raise an exception if the path is invalid
62class MetaFile: 63 def __init__(self, filepath: str): 64 """ 65 Function: 66 67 - Creates a MetaFile object 68 - Can be used to associate arbitrary meta data to any file 69 - Includes a built in method to calculate the md5 hash of the file 70 71 Parameters: 72 73 - `filepath` (str): The path to the file 74 75 Returns: 76 77 - The MetaFile object 78 79 Example: 80 81 ```python 82 from azure_utils.blob_storage import MetaFile 83 meta_file = MetaFile(filepath='/path/to/file.txt') 84 meta_file.data['my_meta_key'] = 'my_meta_value' 85 meta_file.update() 86 # This will create a file at `/path/to/.meta.file.txt` 87 # The file will contain the following json: 88 # { 89 # "my_meta_key": "my_meta_value", 90 # } 91 ``` 92 """ 93 validate_path(path=filepath, is_remote=False, is_folder=False) 94 self.filepath = filepath 95 self.meta_filepath = self.__get_meta_filepath__(filepath) 96 self.data = ( 97 self.__get_data__() if os.path.isfile(self.meta_filepath) else {} 98 ) 99 100 def __get_meta_filepath__(self, filepath: str) -> str: 101 """ 102 Function: 103 104 - Gets the path to the meta file 105 106 Parameters: 107 108 - `filepath` (str): The path to the file 109 110 Returns: 111 112 - The path to the meta file 113 114 Example: 115 116 ```python 117 from azure_utils.blob_storage import MetaFile 118 meta_file = MetaFile(filepath='/path/to/file.txt') 119 meta_file.__get_meta_filepath__(filepath='/path/to/file.txt') 120 # This will return `/path/to/.meta.file.txt` 121 ``` 122 """ 123 return os.path.join( 124 os.path.dirname(filepath), ".meta." + os.path.basename(filepath) 125 ) 126 127 def __get_data__(self) -> dict: 128 """ 129 Function: 130 131 - Gets the data from the meta file 132 133 Parameters: 134 135 - `None` 136 137 Returns: 138 139 - The data from the meta file 140 141 Example: 142 143 ```python 144 from azure_utils.blob_storage import MetaFile 145 meta_file = MetaFile(filepath='/path/to/file.txt') 146 meta_file.__get_data__() 147 # This will return the data from the meta file 148 ``` 149 """ 150 with open(self.meta_filepath, "r") as meta_file: 151 return json.load(meta_file) 152 153 def __calc_md5__(self) -> str: 154 """ 155 Function: 156 157 - Calculates the md5 hash of the file 158 159 Parameters: 160 161 - `None` 162 163 Returns: 164 165 - The md5 hash of the file 166 167 Example: 168 169 ```python 170 from azure_utils.blob_storage import MetaFile 171 meta_file = MetaFile(filepath='/path/to/file.txt') 172 meta_file.__calc_md5__() 173 # This will return the md5 hash of `/path/to/file.txt` 174 ``` 175 """ 176 if not os.path.isfile(self.filepath): 177 return None 178 with open(self.filepath, "rb") as blob_file: 179 return hashlib.md5(blob_file.read()).hexdigest() 180 181 def validate_md5(self) -> bool: 182 """ 183 184 Function: 185 186 - Validates the md5 hash of the file 187 188 Parameters: 189 190 - `None` 191 192 Returns: 193 194 - Whether the md5 hash of the file is valid 195 196 197 """ 198 return self.data["md5"] == self.__calc_md5__() 199 200 def update(self, include_md5: bool = True) -> None: 201 """ 202 Function: 203 204 - Updates the meta file 205 206 Parameters: 207 208 - `include_md5` (bool): Whether to include the md5 hash of the file 209 210 Returns: 211 212 - `None` 213 214 Example: 215 216 ```python 217 from azure_utils.blob_storage import MetaFile 218 meta_file = MetaFile(filepath='/path/to/file.txt') 219 meta_file.data['my_meta_key'] = 'my_meta_value' 220 meta_file.update() 221 # This will create a file at `/path/to/.meta.file.txt` 222 # The file will contain the following json: 223 # { 224 # "my_meta_key": "my_meta_value", 225 # } 226 ``` 227 """ 228 if include_md5: 229 self.data["md5"] = self.__calc_md5__() 230 with open(self.meta_filepath, "w") as meta_file: 231 json.dump(self.data, meta_file)
63 def __init__(self, filepath: str): 64 """ 65 Function: 66 67 - Creates a MetaFile object 68 - Can be used to associate arbitrary meta data to any file 69 - Includes a built in method to calculate the md5 hash of the file 70 71 Parameters: 72 73 - `filepath` (str): The path to the file 74 75 Returns: 76 77 - The MetaFile object 78 79 Example: 80 81 ```python 82 from azure_utils.blob_storage import MetaFile 83 meta_file = MetaFile(filepath='/path/to/file.txt') 84 meta_file.data['my_meta_key'] = 'my_meta_value' 85 meta_file.update() 86 # This will create a file at `/path/to/.meta.file.txt` 87 # The file will contain the following json: 88 # { 89 # "my_meta_key": "my_meta_value", 90 # } 91 ``` 92 """ 93 validate_path(path=filepath, is_remote=False, is_folder=False) 94 self.filepath = filepath 95 self.meta_filepath = self.__get_meta_filepath__(filepath) 96 self.data = ( 97 self.__get_data__() if os.path.isfile(self.meta_filepath) else {} 98 )
Function:
- Creates a MetaFile object
- Can be used to associate arbitrary meta data to any file
- Includes a built in method to calculate the md5 hash of the file
Parameters:
filepath
(str): The path to the file
Returns:
- The MetaFile object
Example:
from azure_utils.blob_storage import MetaFile
meta_file = MetaFile(filepath='/path/to/file.txt')
meta_file.data['my_meta_key'] = 'my_meta_value'
meta_file.update()
# This will create a file at `/path/to/.meta.file.txt`
# The file will contain the following json:
# {
# "my_meta_key": "my_meta_value",
# }
181 def validate_md5(self) -> bool: 182 """ 183 184 Function: 185 186 - Validates the md5 hash of the file 187 188 Parameters: 189 190 - `None` 191 192 Returns: 193 194 - Whether the md5 hash of the file is valid 195 196 197 """ 198 return self.data["md5"] == self.__calc_md5__()
Function:
- Validates the md5 hash of the file
Parameters:
None
Returns:
- Whether the md5 hash of the file is valid
200 def update(self, include_md5: bool = True) -> None: 201 """ 202 Function: 203 204 - Updates the meta file 205 206 Parameters: 207 208 - `include_md5` (bool): Whether to include the md5 hash of the file 209 210 Returns: 211 212 - `None` 213 214 Example: 215 216 ```python 217 from azure_utils.blob_storage import MetaFile 218 meta_file = MetaFile(filepath='/path/to/file.txt') 219 meta_file.data['my_meta_key'] = 'my_meta_value' 220 meta_file.update() 221 # This will create a file at `/path/to/.meta.file.txt` 222 # The file will contain the following json: 223 # { 224 # "my_meta_key": "my_meta_value", 225 # } 226 ``` 227 """ 228 if include_md5: 229 self.data["md5"] = self.__calc_md5__() 230 with open(self.meta_filepath, "w") as meta_file: 231 json.dump(self.data, meta_file)
Function:
- Updates the meta file
Parameters:
include_md5
(bool): Whether to include the md5 hash of the file
Returns:
None
Example:
from azure_utils.blob_storage import MetaFile
meta_file = MetaFile(filepath='/path/to/file.txt')
meta_file.data['my_meta_key'] = 'my_meta_value'
meta_file.update()
# This will create a file at `/path/to/.meta.file.txt`
# The file will contain the following json:
# {
# "my_meta_key": "my_meta_value",
# }
234class AZBlob: 235 def __init__(self, blob_client): 236 """ 237 Function: 238 239 - Creates an AZBlob object 240 241 Parameters: 242 243 - `blob_client` (azure.storage.blob.BlobClient): The blob client 244 245 Returns: 246 247 - The AZBlob object 248 249 Example: 250 251 ```python 252 from azure.storage.blob import BlobClient 253 from azure_utils.blob_storage import AZBlob 254 myblob = AZBlob( 255 blob_client=BlobClient.from_connection_string( 256 conn_str="my_connection_string", 257 container_name="my_container", 258 blob_name="/path/to/my/blob/file.txt" 259 ) 260 ) 261 myblob.download(filepath='/path/to/file.txt') 262 ``` 263 """ 264 self.blob_client = blob_client 265 266 def download(self, filepath) -> None: 267 """ 268 Function: 269 270 - Downloads a blob from the remote to a local file 271 272 Parameters: 273 274 - `filepath` (str): The path to the local file 275 276 Returns: 277 278 - `None` 279 """ 280 validate_path(path=filepath, is_remote=False, is_folder=False) 281 os.makedirs(os.path.dirname(filepath), exist_ok=True) 282 with open(filepath, "wb") as blob_file: 283 self.blob_client.download_blob().readinto(blob_file) 284 285 def delete(self) -> None: 286 """ 287 Function: 288 289 - Deletes a blob from the remote 290 291 Parameters: 292 293 - `None` 294 295 Returns: 296 297 - `None` 298 """ 299 self.blob_client.delete_blob(delete_snapshots="include")
235 def __init__(self, blob_client): 236 """ 237 Function: 238 239 - Creates an AZBlob object 240 241 Parameters: 242 243 - `blob_client` (azure.storage.blob.BlobClient): The blob client 244 245 Returns: 246 247 - The AZBlob object 248 249 Example: 250 251 ```python 252 from azure.storage.blob import BlobClient 253 from azure_utils.blob_storage import AZBlob 254 myblob = AZBlob( 255 blob_client=BlobClient.from_connection_string( 256 conn_str="my_connection_string", 257 container_name="my_container", 258 blob_name="/path/to/my/blob/file.txt" 259 ) 260 ) 261 myblob.download(filepath='/path/to/file.txt') 262 ``` 263 """ 264 self.blob_client = blob_client
Function:
- Creates an AZBlob object
Parameters:
blob_client
(azure.storage.blob.BlobClient): The blob client
Returns:
- The AZBlob object
Example:
from azure.storage.blob import BlobClient
from azure_utils.blob_storage import AZBlob
myblob = AZBlob(
blob_client=BlobClient.from_connection_string(
conn_str="my_connection_string",
container_name="my_container",
blob_name="/path/to/my/blob/file.txt"
)
)
myblob.download(filepath='/path/to/file.txt')
266 def download(self, filepath) -> None: 267 """ 268 Function: 269 270 - Downloads a blob from the remote to a local file 271 272 Parameters: 273 274 - `filepath` (str): The path to the local file 275 276 Returns: 277 278 - `None` 279 """ 280 validate_path(path=filepath, is_remote=False, is_folder=False) 281 os.makedirs(os.path.dirname(filepath), exist_ok=True) 282 with open(filepath, "wb") as blob_file: 283 self.blob_client.download_blob().readinto(blob_file)
Function:
- Downloads a blob from the remote to a local file
Parameters:
filepath
(str): The path to the local file
Returns:
None
285 def delete(self) -> None: 286 """ 287 Function: 288 289 - Deletes a blob from the remote 290 291 Parameters: 292 293 - `None` 294 295 Returns: 296 297 - `None` 298 """ 299 self.blob_client.delete_blob(delete_snapshots="include")
Function:
- Deletes a blob from the remote
Parameters:
None
Returns:
None
302class MetaBlob(AZBlob): 303 def __init__( 304 self, 305 blob_client, 306 filepath: str, 307 smart_sync: bool = False, 308 remote_etag: [str, None] = None, 309 overwrite: bool = False, 310 ): 311 """ 312 Function: 313 314 - Creates a MetaBlob object 315 316 Parameters: 317 318 - `blob_client` (azure.storage.blob.BlobClient): The blob client 319 - `filepath` (str): The path to the local file 320 - `smart_sync` (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file 321 - Optional: Defaults to `False` 322 - `remote_etag` (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known 323 - Optional: Defaults to the current etag of the remote blob 324 - `overwrite` (bool): Whether to overwrite the local file if it already exists 325 - Optional: Defaults to `False` 326 327 Returns: 328 329 - The MetaBlob object 330 331 Example: 332 333 ```python 334 335 from azure.storage.blob import BlobClient 336 from azure_utils.blob_storage import MetaBlob 337 myblob = MetaBlob( 338 blob_client=BlobClient.from_connection_string( 339 conn_str="my_connection_string", 340 container_name="my_container", 341 blob_name="/path/to/my/blob/file.txt" 342 ), 343 filepath='/path/to/file.txt', 344 smart_sync=True, 345 overwrite=True 346 ) 347 myblob.download() 348 """ 349 super().__init__(blob_client) 350 validate_path(path=filepath, is_remote=False, is_folder=False) 351 self.filepath = filepath 352 self.meta = MetaFile(filepath) 353 if remote_etag is None: 354 self.update_etag() 355 else: 356 self.remote_etag = remote_etag 357 self.block_download = False 358 if smart_sync: 359 if self.meta.data.get("etag") == self.remote_etag: 360 if self.meta.validate_md5(): 361 self.block_download = True 362 if not overwrite and os.path.isfile(filepath): 363 self.block_download = True 364 365 def update_etag(self) -> None: 366 """ 367 Function: 368 369 - Updates the local etag with the current etag of the remote blob 370 371 Parameters: 372 373 - `None` 374 375 Returns: 376 377 - `None` 378 """ 379 self.remote_etag = self.blob_client.get_blob_properties().etag 380 381 def update_meta(self) -> None: 382 """ 383 Function: 384 385 - Updates the local meta file 386 387 Parameters: 388 389 - `None` 390 391 Returns: 392 393 - `None` 394 """ 395 self.meta.data["etag"] = self.remote_etag 396 self.meta.update(include_md5=True) 397 398 def download(self) -> None: 399 """ 400 Function: 401 402 - Downloads a blob from the remote to a local file 403 404 Parameters: 405 406 - `None` 407 408 Returns: 409 410 - `None` 411 """ 412 if self.block_download: 413 return 414 super().download(filepath=self.filepath) 415 self.update_meta()
303 def __init__( 304 self, 305 blob_client, 306 filepath: str, 307 smart_sync: bool = False, 308 remote_etag: [str, None] = None, 309 overwrite: bool = False, 310 ): 311 """ 312 Function: 313 314 - Creates a MetaBlob object 315 316 Parameters: 317 318 - `blob_client` (azure.storage.blob.BlobClient): The blob client 319 - `filepath` (str): The path to the local file 320 - `smart_sync` (bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file 321 - Optional: Defaults to `False` 322 - `remote_etag` (str): The remote etag. Used to avoid fetching the etag from the remote if it is already known 323 - Optional: Defaults to the current etag of the remote blob 324 - `overwrite` (bool): Whether to overwrite the local file if it already exists 325 - Optional: Defaults to `False` 326 327 Returns: 328 329 - The MetaBlob object 330 331 Example: 332 333 ```python 334 335 from azure.storage.blob import BlobClient 336 from azure_utils.blob_storage import MetaBlob 337 myblob = MetaBlob( 338 blob_client=BlobClient.from_connection_string( 339 conn_str="my_connection_string", 340 container_name="my_container", 341 blob_name="/path/to/my/blob/file.txt" 342 ), 343 filepath='/path/to/file.txt', 344 smart_sync=True, 345 overwrite=True 346 ) 347 myblob.download() 348 """ 349 super().__init__(blob_client) 350 validate_path(path=filepath, is_remote=False, is_folder=False) 351 self.filepath = filepath 352 self.meta = MetaFile(filepath) 353 if remote_etag is None: 354 self.update_etag() 355 else: 356 self.remote_etag = remote_etag 357 self.block_download = False 358 if smart_sync: 359 if self.meta.data.get("etag") == self.remote_etag: 360 if self.meta.validate_md5(): 361 self.block_download = True 362 if not overwrite and os.path.isfile(filepath): 363 self.block_download = True
Function:
- Creates a MetaBlob object
Parameters:
blob_client
(azure.storage.blob.BlobClient): The blob clientfilepath
(str): The path to the local filesmart_sync
(bool): Whether to skip downloading if the remote etag and md5 hash match the local meta file- Optional: Defaults to
False
- Optional: Defaults to
remote_etag
(str): The remote etag. Used to avoid fetching the etag from the remote if it is already known- Optional: Defaults to the current etag of the remote blob
overwrite
(bool): Whether to overwrite the local file if it already exists- Optional: Defaults to
False
- Optional: Defaults to
Returns:
- The MetaBlob object
Example:
```python
from azure.storage.blob import BlobClient from azure_utils.blob_storage import MetaBlob myblob = MetaBlob( blob_client=BlobClient.from_connection_string( conn_str="my_connection_string", container_name="my_container", blob_name="/path/to/my/blob/file.txt" ), filepath='/path/to/file.txt', smart_sync=True, overwrite=True ) myblob.download()
365 def update_etag(self) -> None: 366 """ 367 Function: 368 369 - Updates the local etag with the current etag of the remote blob 370 371 Parameters: 372 373 - `None` 374 375 Returns: 376 377 - `None` 378 """ 379 self.remote_etag = self.blob_client.get_blob_properties().etag
Function:
- Updates the local etag with the current etag of the remote blob
Parameters:
None
Returns:
None
381 def update_meta(self) -> None: 382 """ 383 Function: 384 385 - Updates the local meta file 386 387 Parameters: 388 389 - `None` 390 391 Returns: 392 393 - `None` 394 """ 395 self.meta.data["etag"] = self.remote_etag 396 self.meta.update(include_md5=True)
Function:
- Updates the local meta file
Parameters:
None
Returns:
None
398 def download(self) -> None: 399 """ 400 Function: 401 402 - Downloads a blob from the remote to a local file 403 404 Parameters: 405 406 - `None` 407 408 Returns: 409 410 - `None` 411 """ 412 if self.block_download: 413 return 414 super().download(filepath=self.filepath) 415 self.update_meta()
Function:
- Downloads a blob from the remote to a local file
Parameters:
None
Returns:
None