Skip to content

Commit 3c89d81

Browse files
committed
ADDED: sharepoint file download
1 parent 89c9a55 commit 3c89d81

10 files changed

Lines changed: 222 additions & 57 deletions

File tree

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ pandas
44
openpyxl
55
PyCryptodome
66
Office365-REST-Python-Client
7-
pyfiglet
7+
pyfiglet
8+
requests

secrets/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*
2+
!.gitignore

src/filesystem.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,73 @@
66
PROOFS_OUTPUT_NAME, CONTRACTS_OUTPUT_NAME, RNTS_OUTPUT_NAME, RLCS_OUTPUT_NAME
77

88

9+
def read_env_var(var_name):
10+
"""
11+
Reads an environment variable.
12+
13+
Args:
14+
var_name (str): Name of the environment variable.
15+
16+
Returns:
17+
str: The value of the environment variable if valid.
18+
19+
Raises:
20+
KeyError: If the environment variable does not exist.
21+
ValueError: If the environment variable is empty or contains only whitespace.
22+
"""
23+
# Check if the environment variable exists
24+
if var_name not in os.environ:
25+
raise KeyError(f"The environment variable '{var_name}' does not exist.")
26+
27+
# Read the value
28+
value = os.environ[var_name]
29+
30+
# Check if the value is empty
31+
if not value:
32+
raise ValueError(f"The environment variable '{var_name}' is empty.")
33+
34+
return value
35+
36+
37+
def read_file_content(file_path):
38+
content = read_file(file_path)
39+
# Check if the file is empty or contains only whitespace
40+
if not content:
41+
raise ValueError(f"The file '{file_path}' is empty.")
42+
43+
return content
44+
45+
46+
def read_file(file_path):
47+
"""
48+
Reads a file and returns its content.
49+
Handles edge cases such as the file not existing or being unreadable.
50+
51+
Args:
52+
file_path (str): Path to the token file.
53+
54+
Returns:
55+
str: The content of the file.
56+
57+
Raises:
58+
FileNotFoundError: If the file does not exist.
59+
PermissionError: If the file cannot be read due to permission issues.
60+
"""
61+
# Check if the file exists
62+
if not os.path.exists(file_path):
63+
raise FileNotFoundError(f"The file '{file_path}' does not exist.")
64+
65+
# Check if the file is readable
66+
if not os.access(file_path, os.R_OK):
67+
raise PermissionError(f"The file '{file_path}' cannot be read. Check permissions.")
68+
69+
# Read the file
70+
with open(file_path, "r") as file:
71+
content = file.read()
72+
73+
return content
74+
75+
976
def ensure_output_gitignore():
1077
# Ensure existence of .gitignore
1178
gitignore_path = os.path.join(GENERAL_OUTPUT_FOLDER, '.gitignore')

src/secret.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import sys
2+
import os
3+
4+
from filesystem import read_file_content, read_env_var
5+
6+
7+
def read_secret(secret_name):
8+
"""Retrieve a token from predefined sources in order of priority."""
9+
sources = [
10+
lambda: read_file_content(f"/run/secrets/{secret_name}"),
11+
lambda: read_file_content(
12+
os.path.join(
13+
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
14+
"secrets",
15+
secret_name
16+
).__str__()
17+
),
18+
lambda: read_env_var(secret_name),
19+
]
20+
21+
for source in sources:
22+
try:
23+
return source()
24+
except Exception as e:
25+
continue
26+
27+
print(f"Could not read {secret_name} from any source")
28+
sys.exit(1)
29+

src/sharepoint.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import os
2+
import time
3+
4+
import requests
5+
from requests.exceptions import HTTPError
6+
7+
from secret import read_secret
8+
9+
10+
11+
def get_access_token(tenant_id, client_id, client_secret):
12+
token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
13+
token_data = {
14+
'grant_type': 'client_credentials',
15+
'client_id': client_id,
16+
'client_secret': client_secret,
17+
'scope': 'https://graph.microsoft.com/.default',
18+
}
19+
response = requests.post(token_url, data=token_data)
20+
response.raise_for_status()
21+
return response.json()['access_token']
22+
23+
24+
def get_site_id(access_token, domain, site_name):
25+
url = f"https://graph.microsoft.com/v1.0/sites/{domain}:/sites/{site_name}"
26+
headers = {"Authorization": f"Bearer {access_token}"}
27+
response = requests.get(url, headers=headers)
28+
response.raise_for_status()
29+
return response.json()['id']
30+
31+
32+
def get_drive_id(access_token, site_id, drive_name="Documents"):
33+
url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drives"
34+
headers = {"Authorization": f"Bearer {access_token}"}
35+
response = requests.get(url, headers=headers)
36+
response.raise_for_status()
37+
drives = response.json()['value']
38+
for drive in drives:
39+
if drive['name'] == drive_name:
40+
return drive['id']
41+
raise Exception(f"Drive '{drive_name}' no encontrado.")
42+
43+
44+
def list_folder_contents(access_token, drive_id, path):
45+
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{path}:/children"
46+
headers = {"Authorization": f"Bearer {access_token}"}
47+
response = requests.get(url, headers=headers)
48+
response.raise_for_status()
49+
return response.json()['value']
50+
51+
52+
def download_file(access_token, drive_id, item_path, local_path, max_retries=5):
53+
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{item_path}:/content"
54+
headers = {"Authorization": f"Bearer {access_token}"}
55+
56+
retry_count = 0
57+
backoff = 2 # segundos
58+
59+
while retry_count <= max_retries:
60+
response = None
61+
try:
62+
response = requests.get(url, headers=headers, stream=True)
63+
response.raise_for_status()
64+
65+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
66+
with open(local_path, 'wb') as f:
67+
for chunk in response.iter_content(chunk_size=8192):
68+
if chunk:
69+
f.write(chunk)
70+
print(f"✅ Descargado: {item_path}")
71+
return # Éxito, salimos de la función
72+
73+
except HTTPError as e:
74+
if response is None:
75+
raise
76+
if response.status_code == 503:
77+
retry_count += 1
78+
wait_time = backoff * retry_count
79+
print(
80+
f"⚠️ Error 503 en '{item_path}' - Reintentando en {wait_time}s (intento {retry_count}/{max_retries})...")
81+
time.sleep(wait_time)
82+
else:
83+
raise # Si no es 503, relanzamos la excepción inmediatamente
84+
85+
raise RuntimeError(f"❌ Fallo permanente al descargar '{item_path}' tras {max_retries} reintentos.")
86+
87+
88+
def download_folder_recursive(access_token, drive_id, remote_path, local_root):
89+
items = list_folder_contents(access_token, drive_id, remote_path)
90+
for item in items:
91+
name = item['name']
92+
item_path = f"{remote_path}/{name}"
93+
local_path = os.path.join(local_root, name)
94+
95+
if 'folder' in item:
96+
download_folder_recursive(access_token, drive_id, item_path, local_path)
97+
elif 'file' in item:
98+
download_file(access_token, drive_id, item_path, local_path)
99+
100+
101+
def demo():
102+
tenant_id = read_secret('TENANT_ID')
103+
client_id = read_secret('CLIENT_ID')
104+
client_secret = read_secret('CLIENT_SECRET')
105+
106+
sharepoint_domain = read_secret('SHAREPOINT_DOMAIN') # ej. 'asbtec.sharepoint.com'
107+
site_name = read_secret('SITE_NAME') # ej. 'RecursosHumanos'
108+
carpeta_sharepoint = read_secret("SHAREPOINT_FOLDER")
109+
110+
carpeta_local = 'descargas_input'
111+
112+
access_token = get_access_token(tenant_id, client_id, client_secret)
113+
site_id = get_site_id(access_token, sharepoint_domain, site_name)
114+
drive_id = get_drive_id(access_token, site_id)
115+
116+
print("Comenzando descarga recursiva de SharePoint...")
117+
download_folder_recursive(access_token, drive_id, carpeta_sharepoint, carpeta_local)
118+
print("✅ Descarga completada.")
119+
120+
121+
if __name__ == "__main__":
122+
demo()

src/sharepoint/__init__.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

src/sharepoint/auth.py

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/sharepoint/demo.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

src/sharepoint/files.py

Whitespace-only changes.

src/sharepoint/lists.py

Whitespace-only changes.

0 commit comments

Comments
 (0)