-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathupdate_mapping_and_upload_to_all_blobs.py
More file actions
117 lines (94 loc) · 4.05 KB
/
update_mapping_and_upload_to_all_blobs.py
File metadata and controls
117 lines (94 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# pylint: disable=too-many-locals
"""update_mapping_and_upload_to_all_blobs_if_file_updated"""
import json
import os
import hashlib
from datetime import date
from dotenv import load_dotenv
from azure.storage.blob import BlobServiceClient
from lib.absolute_path import absolute_path
from constants import (
BLOB_FILE_PATH,
FILE_MAPPING_JSON_PATH,
)
load_dotenv(override=True)
dev_connection_string = os.getenv("CONNECTION_STRING_DEV")
tst_connection_string = os.getenv("CONNECTION_STRING_TST")
prd_connection_string = os.getenv("CONNECTION_STRING_PRD")
def update_mapping_and_upload_to_all_blobs_if_file_updated(
file_path: str,
filename: str,
) -> dict[str, dict[str, str]]:
"""
Updates the file mapping json dictionary and uploads the file to dev Azure blob storage if
the file has changed.
Args:
file_path (str): The file path of file.
filename (str): The file name of the file.
Returns:
dict: The updated file mapping dictionary.
"""
container_name = file_path.split("/")[
-2
] # currenly not using this method for html files (for text)
dev_blob_service_client = BlobServiceClient.from_connection_string(
dev_connection_string
)
with open(absolute_path(FILE_MAPPING_JSON_PATH), "rb") as json_file:
file_mapping = json.load(json_file)
with open(file_path, "rb") as data:
file_content = data.read()
hash_key_of_local_file = hashlib.sha256(file_content).hexdigest()
if filename in file_mapping: # file exists in blob storage
blob_info = file_mapping[filename]
version = int(blob_info[BLOB_FILE_PATH].split("_")[1])
blob_name = blob_info[BLOB_FILE_PATH].split("/")[-1]
dev_blob_client = dev_blob_service_client.get_blob_client(
container_name, blob_name
)
# Download blob from Azure
downloader = dev_blob_client.download_blob(
max_concurrency=1, encoding="UTF-8"
)
blob_content = downloader.readall()
hash_key_of_blob_file = hashlib.sha256(
blob_content.encode("utf-8")
).hexdigest()
if hash_key_of_local_file != hash_key_of_blob_file:
new_version = version + 1
else:
print(f"No changes to file {filename}")
return file_mapping
else:
new_version = 1
# Construct new blob file name with the new version
new_blob_file_name = f"v_{new_version}_{date.today()}_{filename}"
print(f"Uploading {new_blob_file_name}")
dev_blob_client = dev_blob_service_client.get_blob_client(
container_name, blob=new_blob_file_name
)
tst_blob_service_client = BlobServiceClient.from_connection_string(
tst_connection_string
)
tst_blob_client = tst_blob_service_client.get_blob_client(
container=container_name, blob=new_blob_file_name
)
prd_blob_service_client = BlobServiceClient.from_connection_string(
prd_connection_string
)
prd_blob_client = prd_blob_service_client.get_blob_client(
container=container_name, blob=new_blob_file_name
)
# Upload the file
for blob_client in [dev_blob_client, tst_blob_client, prd_blob_client]:
data.seek(
0
) # Rewind the file pointer to the beginning of the file, so the next operation
# which uploads the file content (file_content), uploads the entire file content
blob_client.upload_blob(file_content, overwrite=True)
print(f"Uploaded {new_blob_file_name}")
# Update the file mapping
file_mapping[filename] = {
BLOB_FILE_PATH: f"{container_name}/{new_blob_file_name}",
}
return file_mapping