diff --git a/.gitignore b/.gitignore index 258d77ac..99db0217 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ __pycache__ dvconfig.py ec2-create-instance.sh venv +dv_logo_*.png +*.DS_Store +sample.sh diff --git a/README.md b/README.md index 2c755113..82b1ddf0 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Activate the virtual environment you just created. Install dependencies into the virtual environment, especially [pyDataverse][]. - pip install -r requirements.txt + pip3 install -r requirements.txt Copy `dvconfig.py.sample` to `dvconfig.py` (see the `cp` command below) and add your API token (using your favorite text editor, which may not be `vi` as shown below). Note that the config file specifies which sample data will be created. @@ -35,12 +35,29 @@ Copy `dvconfig.py.sample` to `dvconfig.py` (see the `cp` command below) and add Note that the environment variable `$API_TOKEN` will override `api_token` in `dvconfig.py`. +## Adding a custom dataset with specific number of files + +You can add a specific number of files to the dataset "Dataverse performance test dataset" with: + + python create_sample_custom_dataset.py + +You'll be prompted to specify the number of files you wish to create. The application will then generate the requested number of files, each one with the Dataverse logo in a randomly chosen color. These files will be in PNG format. It's important to complete this step before adding any data, as the dataset will otherwise be empty. + +If you experience the `OSError: no library called "cairo-2" was found` error please declare the following env variable as documented [here](https://github.com/Kozea/CairoSVG/issues/392#issuecomment-1927435606 +): + + export DYLD_LIBRARY_PATH="/opt/homebrew/opt/cairo/lib:$DYLD_LIBRARY_PATH" + ## Adding sample data Assuming you have already run the `source` and `cd` commands above, you should be able to run the following command to create sample data. python create_sample_data.py + https://github.com/Kozea/CairoSVG/issues/392#issuecomment-1927435606 + + export DYLD_LIBRARY_PATH="/opt/homebrew/opt/cairo/lib:$DYLD_LIBRARY_PATH" + All of the steps above may be automated in a fresh installation of Dataverse on an EC2 instance on AWS by downloading [ec2-create-instance.sh][] and [main.yaml][]. Edit main.yml to set `dataverse.sampledata.enabled: true` and adjust any other settings to your liking, then execute the script with the config file like this: curl -O https://raw.githubusercontent.com/GlobalDataverseCommunityConsortium/dataverse-ansible/master/ec2/ec2-create-instance.sh diff --git a/create_sample_custom_dataset.py b/create_sample_custom_dataset.py new file mode 100644 index 00000000..8247ede5 --- /dev/null +++ b/create_sample_custom_dataset.py @@ -0,0 +1,19 @@ +import random +import re +import cairosvg + +#from CairoSVG import svg2png + +generated_files = input('Number of files to generate: ') +target_path = './data/dataverses/dataverse-performance-demo/datasets/performance-test/files' + +with open('dv_logo_hd.svg', 'r') as file: + svg_code = file.read() + +for iteration in range(int(generated_files)): + random_color = '#' + ''.join(random.choices('0123456789ABCDEF', k=6)) + svg_code_tmp = re.sub(r'#c65b28', random_color, svg_code) + destination_path = ( + f"{target_path}/dv_logo_{str(iteration).zfill(5)}.png" + ) + cairosvg.svg2png(bytestring=svg_code_tmp, write_to=destination_path) \ No newline at end of file diff --git a/data/dataverses/dataverse-performance-demo/datasets/performance-test/performance-test.json b/data/dataverses/dataverse-performance-demo/datasets/performance-test/performance-test.json new file mode 100644 index 00000000..30320440 --- /dev/null +++ b/data/dataverses/dataverse-performance-demo/datasets/performance-test/performance-test.json @@ -0,0 +1,103 @@ +{ + "datasetVersion": { + "id": 4, + "datasetId": 12, + "datasetPersistentId": "doi:10.5072/FK2/JPT050", + "storageIdentifier": "file://10.5072/FK2/JPT050", + "versionNumber": 1, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "UNF": "UNF:6:VDyWtJrNd0VRwAumtzYA1Q==", + "lastUpdateTime": "2021-09-20T18:38:32Z", + "releaseTime": "2021-09-20T18:38:32Z", + "createTime": "2021-09-20T18:16:38Z", + "license": "CC0 1.0", + "termsOfUse": "CC0 Waiver", + "fileAccessRequest": false, + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Dataverse performance test dataset" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Juan Pablo Tosca Villanueva" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactName": { + "typeName": "datasetContactName", + "multiple": false, + "typeClass": "primitive", + "value": "Juan Pablo Tosca Villanueva" + }, + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "dataverse@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "This is a test dataset to measure the performance of the Dataverse software." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Social Sciences" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Admin, Dataverse" + }, + { + "typeName": "dateOfDeposit", + "multiple": false, + "typeClass": "primitive", + "value": "2024-05-05" + } + ] + } + }, + "citation": "IQSS, 2024, \"Dataverse performance test\", https://doi.org/10.5072/FK2/JPT050, Root, V1" + } + } \ No newline at end of file diff --git a/data/dataverses/dataverse-performance-demo/dataverse-performance-demo.json b/data/dataverses/dataverse-performance-demo/dataverse-performance-demo.json new file mode 100644 index 00000000..94c1862e --- /dev/null +++ b/data/dataverses/dataverse-performance-demo/dataverse-performance-demo.json @@ -0,0 +1,12 @@ +{ + "name": "Dataverse performance demo", + "alias": "dataverse-performance-demo", + "dataverseContacts": [ + { + "contactEmail": "juan_tosca@iq.harvard.edu" + } + ], + "affiliation": "Harvard University", + "description": "Demo created for performance testing", + "dataverseType": "RESEARCH_PROJECTS" +} diff --git a/dv_logo_hd.svg b/dv_logo_hd.svg new file mode 100644 index 00000000..2644512e --- /dev/null +++ b/dv_logo_hd.svg @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dvconfig.py.sample b/dvconfig.py.sample index 8f97f284..fb7583f4 100644 --- a/dvconfig.py.sample +++ b/dvconfig.py.sample @@ -25,6 +25,8 @@ sample_data = [ 'data/dataverses/open-source-at-harvard/datasets/open-source-at-harvard/open-source-at-harvard.json', 'data/dataverses/king/king.json', 'data/dataverses/king/datasets/cause-of-death/cause-of-death.json', +'data/dataverses/dataverse-performance-demo/dataverse-performance-demo.json', +'data/dataverses/dataverse-performance-demo/datasets/performance-test/performance-test.json', ] # put this back at line 6 once https://github.com/IQSS/dataverse/pull/6924 is merged diff --git a/requirements.txt b/requirements.txt index 4a1177ee..3c036a0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ pyDataverse==0.2.1 +CairoSVG==2.7.1 \ No newline at end of file