diff --git a/README.md b/README.md index 48e4f12..8d34bc8 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,20 @@ Available languages: - [Italian 4.9.4](https://github.com/IQSS/dataverse-docker/blob/master/dataversedock/dataverse-property-files/it-IT/) maintained by [Centro Interdipartimentale UniData](http://www.unidata.unimib.it) - [Hungarian, 4.9.4](https://github.com/IQSS/dataverse-docker/tree/master/dataversedock/dataverse-property-files/hu-HU) maintained by [TARKI](http://tarki.hu) +## Generating tools + +Bash script `generate.sh` helps to maintains up to date i18n files. It use en_US files to detect all files and keys from latest version et try to fill value for desired language from latest available version for this language. + +Usage: + +```bash +bash ./generate.sh fr_CA => generate fr_CA from most recent version +bash ./generate.sh fr_FR develop => generate fr_FR from develop branch +``` + +Process: + +1. Create properties files by copying files from en_US, rename then with language code, and removing value inside. +2. Extract files from most recent version for this language (branch dataverse-vXXX) or from the specified branch +3. Fill values by searching them in extracted files, in the same filename or in all files +4. Print "not found" keys (new keys) \ No newline at end of file diff --git a/generate.sh b/generate.sh new file mode 100644 index 0000000..6e80ce7 --- /dev/null +++ b/generate.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +TARGET_LANG=$1 +BASE_LANG="en_US" + +function usage(){ + echo "Usage: bash $0 LANG [FROM_BRANCH]" + echo "Genetate i18n properties files for LANG language, based on keys found en_US/*.properties and value from LANG from a previous dataverse-v4.XX branch or specific branch (liek develop)" + echo "Ex:" + echo " bash $0 fr_CA => generate fr_CA from most recent version" + echo " bash $0 fr_FR develop => generate fr_FR from develop branch" + exit 1 +} + + +if [ -z $TARGET_LANG ]; then + usage +fi + +if [ ! -d $BASE_LANG ]; then + echo "Base language directory is missing: [$BASE_LANG]" + usage +fi + +if [ -d $TARGET_LANG ]; then + echo "Target language directory already exists. Please delete it manually : rm -r $TARGET_LANG" + usage +fi + +FROM_BRANCH=$2 + +if [ -z $FROM_BRANCH ];then + # Detect branch with existing language file + for version in $(git branch | grep dataverse | sed "s/.*dataverse-\(.*\)/\\1/g"); do + if [[ "$(git show dataverse-$version: | grep -o $TARGET_LANG)" == "$TARGET_LANG" ]]; then + FROM_BRANCH=dataverse-$version + fi + done +else + # Check existing language file + if [[ "$(git show $FROM_BRANCH: | grep -o $TARGET_LANG)" != "$TARGET_LANG" ]]; then + FROM_BRANCH="" + fi +fi + +if [ -z $FROM_BRANCH ]; then + echo "Can not find suitable version branch for language $TARGET_LANG" + exit 1 +fi + + +echo "=== Generate $TARGET_LANG based on branch $FROM_BRANCH" + +# Copie reference language files (en_US) +cp -r $BASE_LANG $TARGET_LANG + +# Force sed to continue parsing when it hits an “invalid” character +export LANG=C +find $TARGET_LANG -name '*.properties' -exec sed -i "s/^\([^=]*=\).*$/\\1/g" {} \; + +for file in $TARGET_LANG/* ; do + #append new line (while read line bug if no newline at EOF) + echo >> $file + mv $file ${file/.properties/_$(echo $TARGET_LANG | awk -F '_' '{print $1}').properties} +done + + +# extract files from original branch +TMP_DIR=$(mktemp -d ) +for f in $(git show $FROM_BRANCH:$TARGET_LANG | grep properties); do + git show $FROM_BRANCH:$TARGET_LANG/$f > $TMP_DIR/$f +done + +# fill files +for f in $(find $TARGET_LANG -name '*.properties') +do + CURRENT_FILE=$(basename $f) + while read originalline + do + if [[ "$originalline" =~ ^[0-9a-zA-Z].*$ ]]; then + originalline=${originalline//u0020/\\u0020} + originalline=${originalline//\\/\\\\} + originalline=${originalline//&/\\&} + targetline="" + # First search in same fille + if [ -f $TMP_DIR/$CURRENT_FILE ]; then + targetline=$(grep -rh "^$originalline" $TMP_DIR/$CURRENT_FILE | head -n 1 ) + targetline=${targetline//\\/\\\\\\} + targetline=${targetline//|/\\|} + targetline=${targetline//\\n/\\\\n} + targetline=${targetline//u0020/\\u0020} + targetline=${targetline//&/\\&} + fi + if [[ ! -z $targetline ]]; then + sed -i "s|^${originalline}$|${targetline}|g" $f + else + # Second find in all file + targetline=$(grep -rh "^$originalline" $TMP_DIR/) + targetline=${targetline//\\/\\\\\\} + targetline=${targetline//|/\\|} + targetline=${targetline//\\n/\\\\n} + targetline=${targetline//u0020/\\u0020} + targetline=${targetline//&/\\&} + targetline_count=$(grep -rh "^$originalline" $TMP_DIR/ | wc -l) + if [[ ! -z $targetline && "$targetline_count" -eq "1" ]]; then + sed -i "s|^${originalline}$|${targetline}|g" $f + else + echo "Can not translate $f>$originalline" + fi + fi + fi + done < $f +done + +find $TARGET_LANG -name '*.properties' -exec sed -i "s/\\\\n/n/g" {} \; +find $TARGET_LANG -name '*.properties' -exec sed -i "s/\\\\u/u/g" {} \; + + +rm -rf $TMP_DIR \ No newline at end of file