diff --git a/README.md b/README.md index 6604a79b4..808593fb3 100644 --- a/README.md +++ b/README.md @@ -1986,6 +1986,33 @@ Since Puppet 4.0.0, you can use the + operator to achieve the same merge. $merged_hash = $hash1 + $hash2 +If merge is given a single `Iterable` (`Array`, `Hash`, etc.) it will call a given block with +up to three parameters, and merge each resulting Hash into the accumulated result. All other types +of values returned from the block (typically `undef`) are skipped (not merged). + +The codeblock can take 2 or three parameters: +* with two, it gets the current hash (as built to this point), and each value (for hash the value is a [key, value] tuple) +* with three, it gets the current hash (as built to this point), the key/index of each value, and then the value + +If the iterable is empty, or no hash was returned from the given block, an empty hash is returned. In the given block, a call to `next()` +will skip that entry, and a call to `break()` will end the iteration. + +*Example: counting occurrences of strings in an array* +```puppet +['a', 'b', 'c', 'c', 'd', 'b'].merge | $hsh, $v | { { $v => $hsh[$v].lest || { 0 } + 1 } } +# would result in { a => 1, b => 2, c => 2, d => 1 } +``` + +*Example: skipping values for entries that are longer than 1 char* + +```puppet +['a', 'b', 'c', 'c', 'd', 'b', 'blah', 'blah'].merge | $hsh, $v | { if $v =~ String[1,1] { { $v => $hsh[$v].lest || { 0 } + 1 } } } +# would result in { a => 1, b => 2, c => 2, d => 1 } since 'blah' is longer than 2 chars +``` + +The iterative `merge()` has an advantage over doing the same with a general `reduce()` in that the constructed hash +does not have to be copied in each iteration and thus will perform much better with large inputs. + *Type*: rvalue. #### `min` diff --git a/lib/puppet/functions/merge.rb b/lib/puppet/functions/merge.rb new file mode 100644 index 000000000..62ddf99d0 --- /dev/null +++ b/lib/puppet/functions/merge.rb @@ -0,0 +1,94 @@ +# Merges two or more hashes together or hashes resulting from iteration, and returns the resulting hash. +# +# @example Using merge() +# +# $hash1 = {'one' => 1, 'two', => 2} +# $hash2 = {'two' => 'dos', 'three', => 'tres'} +# $merged_hash = merge($hash1, $hash2) +# # The resulting hash is equivalent to: +# # $merged_hash = {'one' => 1, 'two' => 'dos', 'three' => 'tres'} +# +# When there is a duplicate key, the key in the rightmost hash will "win." +# +# Note that since Puppet 4.0.0 the same merge can be achieved with the + operator. +# +# $merged_hash = $hash1 + $hash2 +# +# If merge is given a single Iterable (Array, Hash, etc.) it will call a given block with +# up to three parameters, and merge each resulting Hash into the accumulated result. All other types +# of values returned from the block (typically undef) are skipped (not merged). +# +# The codeblock can take 2 or three parameters: +# * with two, it gets the current hash (as built to this point), and each value (for hash the value is a [key, value] tuple) +# * with three, it gets the current hash (as built to this point), the key/index of each value, and then the value +# +# If the iterable is empty, or no hash was returned from the given block, an empty hash is returned. In the given block, a call to `next()` +# will skip that entry, and a call to `break()` will end the iteration. +# +# @example counting occurrences of strings in an array +# ['a', 'b', 'c', 'c', 'd', 'b'].merge | $hsh, $v | { { $v => $hsh[$v].lest || { 0 } + 1 } } +# # would result in { a => 1, b => 2, c => 2, d => 1 } +# +# @example skipping values for entries that are longer than 1 char +# ['a', 'b', 'c', 'c', 'd', 'b', 'blah', 'blah'].merge | $hsh, $v | { if $v =~ String[1,1] { { $v => $hsh[$v].lest || { 0 } + 1 } } } +# # would result in { a => 1, b => 2, c => 2, d => 1 } since 'blah' is longer than 2 chars +# +# The iterative `merge()` has an advantage over doing the same with a general `reduce()` in that the constructed hash +# does not have to be copied in each iteration and thus will perform much better with large inputs. +# +Puppet::Functions.create_function(:merge) do + dispatch :merge2hashes do + repeated_param 'Variant[Hash, Undef, String[0,0]]', :args # this strange type is backwards compatible + return_type 'Hash' + end + + dispatch :merge_iterable3 do + repeated_param 'Iterable', :args + block_param 'Callable[3,3]', :block + return_type 'Hash' + end + + dispatch :merge_iterable2 do + repeated_param 'Iterable', :args + block_param 'Callable[2,2]', :block + return_type 'Hash' + end + + def merge2hashes(*hashes) + accumulator = {} + hashes.each { |h| accumulator.merge!(h) if h.is_a?(Hash) } + accumulator + end + + def merge_iterable2(iterable) + accumulator = {} + enum = Puppet::Pops::Types::Iterable.asserted_iterable(self, iterable) + enum.each do |v| + r = yield(accumulator, v) + accumulator.merge!(r) if r.is_a?(Hash) + end + accumulator + end + + def merge_iterable3(iterable) + accumulator = {} + enum = Puppet::Pops::Types::Iterable.asserted_iterable(self, iterable) + if enum.hash_style? + enum.each do |entry| + r = yield(accumulator, *entry) + accumulator.merge!(r) if r.is_a?(Hash) + end + else + begin + index = 0 + loop do + r = yield(accumulator, index, enum.next) + accumulator.merge!(r) if r.is_a?(Hash) + index += 1 + end + rescue StopIteration # rubocop:disable Lint/HandleExceptions + end + end + accumulator + end +end diff --git a/spec/functions/merge_spec.rb b/spec/functions/merge_spec.rb index 5f9747282..7bffaab85 100644 --- a/spec/functions/merge_spec.rb +++ b/spec/functions/merge_spec.rb @@ -2,20 +2,34 @@ describe 'merge' do it { is_expected.not_to eq(nil) } - it { is_expected.to run.with_params.and_raise_error(Puppet::ParseError, %r{wrong number of arguments}i) } - it { is_expected.to run.with_params({}, 'two').and_raise_error(Puppet::ParseError, %r{unexpected argument type String}) } - it { is_expected.to run.with_params({}, 1).and_raise_error(Puppet::ParseError, %r{unexpected argument type (Fixnum|Integer)}) } - it { is_expected.to run.with_params({ 'one' => 1, 'three' => { 'four' => 4 } }, 'two' => 'dos', 'three' => { 'five' => 5 }).and_return('one' => 1, 'three' => { 'five' => 5 }, 'two' => 'dos') } - it { - pending 'should not special case this' - is_expected.to run.with_params({}).and_return({}) + is_expected.to run \ + .with_params({}, 'two') \ + .and_raise_error( + ArgumentError, \ + Regexp.new(Regexp.escape("rejected: parameter 'args' expects a value of type Undef, Hash, or String[0, 0], got String")), + ) + } + it { + is_expected.to run \ + .with_params({}, 1) \ + .and_raise_error(ArgumentError, %r{parameter 'args' expects a value of type Undef, Hash, or String, got Integer}) + } + it { + is_expected.to run \ + .with_params({ 'one' => 1, 'three' => { 'four' => 4 } }, 'two' => 'dos', 'three' => { 'five' => 5 }) \ + .and_return('one' => 1, 'three' => { 'five' => 5 }, 'two' => 'dos') } + + it { is_expected.to run.with_params.and_return({}) } + it { is_expected.to run.with_params({}).and_return({}) } it { is_expected.to run.with_params({}, {}).and_return({}) } it { is_expected.to run.with_params({}, {}, {}).and_return({}) } + describe 'should accept empty strings as puppet undef' do it { is_expected.to run.with_params({}, '').and_return({}) } end + it { is_expected.to run.with_params({ 'key' => 'value' }, {}).and_return('key' => 'value') } it { is_expected.to run.with_params({}, 'key' => 'value').and_return('key' => 'value') } it { is_expected.to run.with_params({ 'key' => 'value1' }, 'key' => 'value2').and_return('key' => 'value2') } @@ -24,4 +38,43 @@ .with_params({ 'key1' => 'value1' }, { 'key2' => 'value2' }, 'key3' => 'value3') \ .and_return('key1' => 'value1', 'key2' => 'value2', 'key3' => 'value3') } + describe 'should accept iterable and merge produced hashes' do + it { + is_expected.to run \ + .with_params([1, 2, 3]) \ + .with_lambda { |_hsh, val| { val => val } } \ + .and_return(1 => 1, 2 => 2, 3 => 3) + } + it { + is_expected.to run \ + .with_params([1, 2, 3]) \ + .with_lambda { |_hsh, val| { val => val } unless val == 2 } \ + .and_return(1 => 1, 3 => 3) + } + it { + is_expected.to run \ + .with_params([1, 2, 3]) \ + # rubocop:disable Style/Semicolon + .with_lambda { |_hsh, val| raise StopIteration if val == 3; { val => val } } \ + .and_return(1 => 1, 2 => 2) + } + it { + is_expected.to run \ + .with_params(['a', 'b', 'b', 'c', 'b']) \ + .with_lambda { |hsh, val| { val => (hsh[val] || 0) + 1 } } \ + .and_return('a' => 1, 'b' => 3, 'c' => 1) + } + it { + is_expected.to run \ + .with_params(['a', 'b', 'c']) \ + .with_lambda { |_hsh, idx, val| { idx => val } } \ + .and_return(0 => 'a', 1 => 'b', 2 => 'c') + } + it { + is_expected.to run \ + .with_params('a' => 'A', 'b' => 'B', 'c' => 'C') \ + .with_lambda { |_hsh, key, val| { key => "#{key}#{val}" } } \ + .and_return('a' => 'aA', 'b' => 'bB', 'c' => 'cC') + } + end end