Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
__pycache__
*.pytest_cache
<<<<<<< HEAD
.venv/

=======
.env/
>>>>>>> origin/main
# .env
*.db
*.idea
Expand Down
20 changes: 19 additions & 1 deletion .ls-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ ls:
.md: snake_case | regex:[0-9A-Z\-]+
.txt: snake_case | kebab-case
.yml: snake_case | kebab-case
.ipynb: snake_case
.csv: snake_case
.py: snake_case

.: # Répertoires racine
.*/:
Expand All @@ -15,13 +18,28 @@ ignore:
- .git
- .github
- .vscode
- venv
- .ruff_cache
- .pytest_cache
- __pycache__
<<<<<<< HEAD
- .venv
- **/site-packages/**
- 1_datasets
- data
- dist
- build

rules:
some_rule: "true" # string entre guillemets
ignore:
- ".venv/"

description: "Check code formatting: true/false"
=======
- .ls-lint.yml
- .markdownlint.yml


rules:
some_rule: "true"
>>>>>>> origin/main
Binary file modified 1_datasets/haiti_catalog_raw_data/1st dataset_draft.....xlsx
Binary file not shown.
72 changes: 51 additions & 21 deletions 2_data_preparation/Extract_data_haiti_and_caribbean.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,13 @@
],
"source": [
"# Load dataset for inbound UN arrivals arrivals tourist in the world\n",
"df = pd.read_excel (\"UN_Tourism_inbound_arrivals_10_2025.xlsx\")\n",
"df = pd.read_excel(\"UN_Tourism_inbound_arrivals_10_2025.xlsx\")\n",
"df.head()\n",
"# Load dataset for inbound UN arrivals arrivals tourist by purpose in the world\n",
"df_purpose = pd.read_excel(\"UN_Tourism_inbound_arrivals_by_purpose_10_2025.xlsx\")\n",
"df_purpose.head()\n",
"# Load dataset for inbound UN arrivals arrivals tourist by transport in the world\n",
"df_transport = pd.read_excel (\"UN_Tourism_inbound_arrivals_by_transport_10_2025.xlsx\")\n",
"df_transport = pd.read_excel(\"UN_Tourism_inbound_arrivals_by_transport_10_2025.xlsx\")\n",
"df_transport.head()\n",
"# Load dataset for inbound expenditure by tourists in the world\n",
"df_expenditure = pd.read_excel(\"UN_Tourism_inbound_expenditure_10_2025.xlsx\")\n",
Expand Down Expand Up @@ -386,7 +386,9 @@
"df_haiti_by_transport = df_transport[(df_transport[\"reporter_area_label\"] == \"Haiti\")]\n",
"df_haiti_by_transport.head()\n",
"# Load the inbound expenditure by tourists in the world dataset to extract data only for Haiti\n",
"df_haiti_by_expenditure = df_expenditure[(df_expenditure[\"reporter_area_label\"] == \"Haiti\")]\n",
"df_haiti_by_expenditure = df_expenditure[\n",
" (df_expenditure[\"reporter_area_label\"] == \"Haiti\")\n",
"]\n",
"df_haiti_by_expenditure.head()"
]
},
Expand All @@ -398,13 +400,19 @@
"outputs": [],
"source": [
"# save the df_haiti in a csv file\n",
"df_haiti.to_csv (\"UN_tourism_inbound_Arrivals_haiti.csv\",index = False)\n",
"df_haiti.to_csv(\"UN_tourism_inbound_Arrivals_haiti.csv\", index=False)\n",
"# save the df_haiti_by_purpose in a csv file\n",
"df_haiti_by_purpose.to_csv(\"UN_tourism_inbound_Arrivals_haiti_by_purpose.csv\", index= False)\n",
"df_haiti_by_purpose.to_csv(\n",
" \"UN_tourism_inbound_Arrivals_haiti_by_purpose.csv\", index=False\n",
")\n",
"# save the df_haiti_by_transport in a csv file\n",
"df_haiti_by_transport.to_csv(\"UN_tourism_inbound_Arrivals_haiti_by_transport.csv\", index= False)\n",
"df_haiti_by_transport.to_csv(\n",
" \"UN_tourism_inbound_Arrivals_haiti_by_transport.csv\", index=False\n",
")\n",
"# save the df_haiti_by_expenditure in a csv file\n",
"df_haiti_by_expenditure.to_csv(\"UN_tourism_inbound_Arrivals_haiti_by_expenditure.csv\", index= False)"
"df_haiti_by_expenditure.to_csv(\n",
" \"UN_tourism_inbound_Arrivals_haiti_by_expenditure.csv\", index=False\n",
")"
]
},
{
Expand Down Expand Up @@ -612,10 +620,10 @@
],
"source": [
"# df[\"reporter_area_label\"].unique()\n",
"#df_haiti_by_purpose[\"year\"].unique()\n",
"#df_haiti_by_purpose.isnull().sum()\n",
"#df_haiti_by_transport.isnull().sum()\n",
"#df_haiti_by_expenditure.isnull().sum()\n",
"# df_haiti_by_purpose[\"year\"].unique()\n",
"# df_haiti_by_purpose.isnull().sum()\n",
"# df_haiti_by_transport.isnull().sum()\n",
"# df_haiti_by_expenditure.isnull().sum()\n",
"df_haiti_by_expenditure[\"year\"].unique()"
]
},
Expand All @@ -628,15 +636,37 @@
"source": [
"# create a array with the names the caribbean country to extract data for the caribbean region\n",
"caribbean_countries = [\n",
" \"Antigua and Barbuda\", \"Bahamas\", \"Barbados\", \"Belize\",\n",
" \"Cuba\", \"Dominica\", \"Dominican Republic\", \"Grenada\",\n",
" \"Jamaica\", \"Saint Kitts and Nevis\",\"Saint Lucia\",\"Haiti\",\n",
" \"Saint Vincent and the Grenadines\", \"Trinidad and Tobago\",\n",
" \"Anguilla\", \"Bermuda\", \"British Virgin Islands\", \"Cayman Islands\",\n",
" \"Montserrat\", \"Turks and Caicos Islands\", \"Guadeloupe\", \"Martinique\",\n",
" \"Aruba\", \"Curaçao\", \"Sint Maarten (Dutch part)\", \"Bonaire\",\n",
" \"Saba\", \"St. Eustatius\", \"Puerto Rico\", \"United States Virgin Islands\"\n",
"]\n"
" \"Antigua and Barbuda\",\n",
" \"Bahamas\",\n",
" \"Barbados\",\n",
" \"Belize\",\n",
" \"Cuba\",\n",
" \"Dominica\",\n",
" \"Dominican Republic\",\n",
" \"Grenada\",\n",
" \"Jamaica\",\n",
" \"Saint Kitts and Nevis\",\n",
" \"Saint Lucia\",\n",
" \"Haiti\",\n",
" \"Saint Vincent and the Grenadines\",\n",
" \"Trinidad and Tobago\",\n",
" \"Anguilla\",\n",
" \"Bermuda\",\n",
" \"British Virgin Islands\",\n",
" \"Cayman Islands\",\n",
" \"Montserrat\",\n",
" \"Turks and Caicos Islands\",\n",
" \"Guadeloupe\",\n",
" \"Martinique\",\n",
" \"Aruba\",\n",
" \"Curaçao\",\n",
" \"Sint Maarten (Dutch part)\",\n",
" \"Bonaire\",\n",
" \"Saba\",\n",
" \"St. Eustatius\",\n",
" \"Puerto Rico\",\n",
" \"United States Virgin Islands\",\n",
"]"
]
},
{
Expand All @@ -646,7 +676,7 @@
"metadata": {},
"outputs": [],
"source": [
"# filter the dataset to keep data only for the caribbean region \n",
"# filter the dataset to keep data only for the caribbean region\n",
"df_caribbean = df[df[\"reporter_area_label\"].isin(caribbean_countries)]"
]
},
Expand Down
Loading
Loading