Skip to content

Commit 27c6383

Browse files
authored
Merge pull request #36 from MIT-Emerging-Talent/MLProject
Ml project
2 parents cd03f7e + 37953e2 commit 27c6383

18 files changed

+4732
-126
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
__pycache__
22
*.pytest_cache
3+
<<<<<<< HEAD
4+
.venv/
5+
6+
=======
37
.env/
8+
>>>>>>> origin/main
49
# .env
510
*.db
611
*.idea

.ls-lint.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ ls:
44
.md: snake_case | regex:[0-9A-Z\-]+
55
.txt: snake_case | kebab-case
66
.yml: snake_case | kebab-case
7+
.ipynb: snake_case
8+
.csv: snake_case
9+
.py: snake_case
710

811
.: # Répertoires racine
912
.*/:
@@ -15,13 +18,28 @@ ignore:
1518
- .git
1619
- .github
1720
- .vscode
18-
- venv
1921
- .ruff_cache
2022
- .pytest_cache
2123
- __pycache__
24+
<<<<<<< HEAD
25+
- .venv
26+
- **/site-packages/**
27+
- 1_datasets
28+
- data
29+
- dist
30+
- build
31+
32+
rules:
33+
some_rule: "true" # string entre guillemets
34+
ignore:
35+
- ".venv/"
36+
37+
description: "Check code formatting: true/false"
38+
=======
2239
- .ls-lint.yml
2340
- .markdownlint.yml
2441

2542

2643
rules:
2744
some_rule: "true"
45+
>>>>>>> origin/main
-10.9 KB
Binary file not shown.

2_data_preparation/Extract_data_haiti_and_caribbean.ipynb

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,13 @@
187187
],
188188
"source": [
189189
"# Load dataset for inbound UN arrivals arrivals tourist in the world\n",
190-
"df = pd.read_excel (\"UN_Tourism_inbound_arrivals_10_2025.xlsx\")\n",
190+
"df = pd.read_excel(\"UN_Tourism_inbound_arrivals_10_2025.xlsx\")\n",
191191
"df.head()\n",
192192
"# Load dataset for inbound UN arrivals arrivals tourist by purpose in the world\n",
193193
"df_purpose = pd.read_excel(\"UN_Tourism_inbound_arrivals_by_purpose_10_2025.xlsx\")\n",
194194
"df_purpose.head()\n",
195195
"# Load dataset for inbound UN arrivals arrivals tourist by transport in the world\n",
196-
"df_transport = pd.read_excel (\"UN_Tourism_inbound_arrivals_by_transport_10_2025.xlsx\")\n",
196+
"df_transport = pd.read_excel(\"UN_Tourism_inbound_arrivals_by_transport_10_2025.xlsx\")\n",
197197
"df_transport.head()\n",
198198
"# Load dataset for inbound expenditure by tourists in the world\n",
199199
"df_expenditure = pd.read_excel(\"UN_Tourism_inbound_expenditure_10_2025.xlsx\")\n",
@@ -386,7 +386,9 @@
386386
"df_haiti_by_transport = df_transport[(df_transport[\"reporter_area_label\"] == \"Haiti\")]\n",
387387
"df_haiti_by_transport.head()\n",
388388
"# Load the inbound expenditure by tourists in the world dataset to extract data only for Haiti\n",
389-
"df_haiti_by_expenditure = df_expenditure[(df_expenditure[\"reporter_area_label\"] == \"Haiti\")]\n",
389+
"df_haiti_by_expenditure = df_expenditure[\n",
390+
" (df_expenditure[\"reporter_area_label\"] == \"Haiti\")\n",
391+
"]\n",
390392
"df_haiti_by_expenditure.head()"
391393
]
392394
},
@@ -398,13 +400,19 @@
398400
"outputs": [],
399401
"source": [
400402
"# save the df_haiti in a csv file\n",
401-
"df_haiti.to_csv (\"UN_tourism_inbound_Arrivals_haiti.csv\",index = False)\n",
403+
"df_haiti.to_csv(\"UN_tourism_inbound_Arrivals_haiti.csv\", index=False)\n",
402404
"# save the df_haiti_by_purpose in a csv file\n",
403-
"df_haiti_by_purpose.to_csv(\"UN_tourism_inbound_Arrivals_haiti_by_purpose.csv\", index= False)\n",
405+
"df_haiti_by_purpose.to_csv(\n",
406+
" \"UN_tourism_inbound_Arrivals_haiti_by_purpose.csv\", index=False\n",
407+
")\n",
404408
"# save the df_haiti_by_transport in a csv file\n",
405-
"df_haiti_by_transport.to_csv(\"UN_tourism_inbound_Arrivals_haiti_by_transport.csv\", index= False)\n",
409+
"df_haiti_by_transport.to_csv(\n",
410+
" \"UN_tourism_inbound_Arrivals_haiti_by_transport.csv\", index=False\n",
411+
")\n",
406412
"# save the df_haiti_by_expenditure in a csv file\n",
407-
"df_haiti_by_expenditure.to_csv(\"UN_tourism_inbound_Arrivals_haiti_by_expenditure.csv\", index= False)"
413+
"df_haiti_by_expenditure.to_csv(\n",
414+
" \"UN_tourism_inbound_Arrivals_haiti_by_expenditure.csv\", index=False\n",
415+
")"
408416
]
409417
},
410418
{
@@ -612,10 +620,10 @@
612620
],
613621
"source": [
614622
"# df[\"reporter_area_label\"].unique()\n",
615-
"#df_haiti_by_purpose[\"year\"].unique()\n",
616-
"#df_haiti_by_purpose.isnull().sum()\n",
617-
"#df_haiti_by_transport.isnull().sum()\n",
618-
"#df_haiti_by_expenditure.isnull().sum()\n",
623+
"# df_haiti_by_purpose[\"year\"].unique()\n",
624+
"# df_haiti_by_purpose.isnull().sum()\n",
625+
"# df_haiti_by_transport.isnull().sum()\n",
626+
"# df_haiti_by_expenditure.isnull().sum()\n",
619627
"df_haiti_by_expenditure[\"year\"].unique()"
620628
]
621629
},
@@ -628,15 +636,37 @@
628636
"source": [
629637
"# create a array with the names the caribbean country to extract data for the caribbean region\n",
630638
"caribbean_countries = [\n",
631-
" \"Antigua and Barbuda\", \"Bahamas\", \"Barbados\", \"Belize\",\n",
632-
" \"Cuba\", \"Dominica\", \"Dominican Republic\", \"Grenada\",\n",
633-
" \"Jamaica\", \"Saint Kitts and Nevis\",\"Saint Lucia\",\"Haiti\",\n",
634-
" \"Saint Vincent and the Grenadines\", \"Trinidad and Tobago\",\n",
635-
" \"Anguilla\", \"Bermuda\", \"British Virgin Islands\", \"Cayman Islands\",\n",
636-
" \"Montserrat\", \"Turks and Caicos Islands\", \"Guadeloupe\", \"Martinique\",\n",
637-
" \"Aruba\", \"Curaçao\", \"Sint Maarten (Dutch part)\", \"Bonaire\",\n",
638-
" \"Saba\", \"St. Eustatius\", \"Puerto Rico\", \"United States Virgin Islands\"\n",
639-
"]\n"
639+
" \"Antigua and Barbuda\",\n",
640+
" \"Bahamas\",\n",
641+
" \"Barbados\",\n",
642+
" \"Belize\",\n",
643+
" \"Cuba\",\n",
644+
" \"Dominica\",\n",
645+
" \"Dominican Republic\",\n",
646+
" \"Grenada\",\n",
647+
" \"Jamaica\",\n",
648+
" \"Saint Kitts and Nevis\",\n",
649+
" \"Saint Lucia\",\n",
650+
" \"Haiti\",\n",
651+
" \"Saint Vincent and the Grenadines\",\n",
652+
" \"Trinidad and Tobago\",\n",
653+
" \"Anguilla\",\n",
654+
" \"Bermuda\",\n",
655+
" \"British Virgin Islands\",\n",
656+
" \"Cayman Islands\",\n",
657+
" \"Montserrat\",\n",
658+
" \"Turks and Caicos Islands\",\n",
659+
" \"Guadeloupe\",\n",
660+
" \"Martinique\",\n",
661+
" \"Aruba\",\n",
662+
" \"Curaçao\",\n",
663+
" \"Sint Maarten (Dutch part)\",\n",
664+
" \"Bonaire\",\n",
665+
" \"Saba\",\n",
666+
" \"St. Eustatius\",\n",
667+
" \"Puerto Rico\",\n",
668+
" \"United States Virgin Islands\",\n",
669+
"]"
640670
]
641671
},
642672
{
@@ -646,7 +676,7 @@
646676
"metadata": {},
647677
"outputs": [],
648678
"source": [
649-
"# filter the dataset to keep data only for the caribbean region \n",
679+
"# filter the dataset to keep data only for the caribbean region\n",
650680
"df_caribbean = df[df[\"reporter_area_label\"].isin(caribbean_countries)]"
651681
]
652682
},

0 commit comments

Comments
 (0)