1+ import os
2+ import time
3+
4+ import requests
5+ from requests .exceptions import HTTPError
6+
7+ from secret import read_secret
8+
9+
10+
11+ def get_access_token (tenant_id , client_id , client_secret ):
12+ token_url = f"https://login.microsoftonline.com/{ tenant_id } /oauth2/v2.0/token"
13+ token_data = {
14+ 'grant_type' : 'client_credentials' ,
15+ 'client_id' : client_id ,
16+ 'client_secret' : client_secret ,
17+ 'scope' : 'https://graph.microsoft.com/.default' ,
18+ }
19+ response = requests .post (token_url , data = token_data )
20+ response .raise_for_status ()
21+ return response .json ()['access_token' ]
22+
23+
24+ def get_site_id (access_token , domain , site_name ):
25+ url = f"https://graph.microsoft.com/v1.0/sites/{ domain } :/sites/{ site_name } "
26+ headers = {"Authorization" : f"Bearer { access_token } " }
27+ response = requests .get (url , headers = headers )
28+ response .raise_for_status ()
29+ return response .json ()['id' ]
30+
31+
32+ def get_drive_id (access_token , site_id , drive_name = "Documents" ):
33+ url = f"https://graph.microsoft.com/v1.0/sites/{ site_id } /drives"
34+ headers = {"Authorization" : f"Bearer { access_token } " }
35+ response = requests .get (url , headers = headers )
36+ response .raise_for_status ()
37+ drives = response .json ()['value' ]
38+ for drive in drives :
39+ if drive ['name' ] == drive_name :
40+ return drive ['id' ]
41+ raise Exception (f"Drive '{ drive_name } ' no encontrado." )
42+
43+
44+ def list_folder_contents (access_token , drive_id , path ):
45+ url = f"https://graph.microsoft.com/v1.0/drives/{ drive_id } /root:/{ path } :/children"
46+ headers = {"Authorization" : f"Bearer { access_token } " }
47+ response = requests .get (url , headers = headers )
48+ response .raise_for_status ()
49+ return response .json ()['value' ]
50+
51+
52+ def download_file (access_token , drive_id , item_path , local_path , max_retries = 5 ):
53+ url = f"https://graph.microsoft.com/v1.0/drives/{ drive_id } /root:/{ item_path } :/content"
54+ headers = {"Authorization" : f"Bearer { access_token } " }
55+
56+ retry_count = 0
57+ backoff = 2 # segundos
58+
59+ while retry_count <= max_retries :
60+ response = None
61+ try :
62+ response = requests .get (url , headers = headers , stream = True )
63+ response .raise_for_status ()
64+
65+ os .makedirs (os .path .dirname (local_path ), exist_ok = True )
66+ with open (local_path , 'wb' ) as f :
67+ for chunk in response .iter_content (chunk_size = 8192 ):
68+ if chunk :
69+ f .write (chunk )
70+ print (f"✅ Descargado: { item_path } " )
71+ return # Éxito, salimos de la función
72+
73+ except HTTPError as e :
74+ if response is None :
75+ raise
76+ if response .status_code == 503 :
77+ retry_count += 1
78+ wait_time = backoff * retry_count
79+ print (
80+ f"⚠️ Error 503 en '{ item_path } ' - Reintentando en { wait_time } s (intento { retry_count } /{ max_retries } )..." )
81+ time .sleep (wait_time )
82+ else :
83+ raise # Si no es 503, relanzamos la excepción inmediatamente
84+
85+ raise RuntimeError (f"❌ Fallo permanente al descargar '{ item_path } ' tras { max_retries } reintentos." )
86+
87+
88+ def download_folder_recursive (access_token , drive_id , remote_path , local_root ):
89+ items = list_folder_contents (access_token , drive_id , remote_path )
90+ for item in items :
91+ name = item ['name' ]
92+ item_path = f"{ remote_path } /{ name } "
93+ local_path = os .path .join (local_root , name )
94+
95+ if 'folder' in item :
96+ download_folder_recursive (access_token , drive_id , item_path , local_path )
97+ elif 'file' in item :
98+ download_file (access_token , drive_id , item_path , local_path )
99+
100+
101+ def demo ():
102+ tenant_id = read_secret ('TENANT_ID' )
103+ client_id = read_secret ('CLIENT_ID' )
104+ client_secret = read_secret ('CLIENT_SECRET' )
105+
106+ sharepoint_domain = read_secret ('SHAREPOINT_DOMAIN' ) # ej. 'asbtec.sharepoint.com'
107+ site_name = read_secret ('SITE_NAME' ) # ej. 'RecursosHumanos'
108+ carpeta_sharepoint = read_secret ("SHAREPOINT_FOLDER" )
109+
110+ carpeta_local = 'descargas_input'
111+
112+ access_token = get_access_token (tenant_id , client_id , client_secret )
113+ site_id = get_site_id (access_token , sharepoint_domain , site_name )
114+ drive_id = get_drive_id (access_token , site_id )
115+
116+ print ("Comenzando descarga recursiva de SharePoint..." )
117+ download_folder_recursive (access_token , drive_id , carpeta_sharepoint , carpeta_local )
118+ print ("✅ Descarga completada." )
119+
120+
121+ if __name__ == "__main__" :
122+ demo ()
0 commit comments