Skip to content

Commit 0ce187d

Browse files
authored
Merge pull request #16 from rzuckerm/base64-decode-errors
Add error checking to Base64 decode
2 parents 22b5124 + 142c3c1 commit 0ce187d

2 files changed

Lines changed: 123 additions & 22 deletions

File tree

include/std/base64.e

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,25 @@ constant aleph = {
2121
'0','1','2','3','4','5','6','7','8','9','+','/'
2222
}
2323

24-
--#
25-
--# - see also ccha in which inverted decode table is built.
26-
--#
24+
--# Base64 decode table (-1 is invalid character)
25+
constant ccha = {
26+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52,
29+
53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1,
30+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
31+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1,
32+
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
33+
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, -1,
34+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
36+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
38+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
40+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
42+
}
2743
--# Pad character is '=' (61)
2844
--#
2945
--# encoded output of encode is mo more that 76 characters each.
@@ -154,16 +170,16 @@ end function
154170
-- # ##in## ~-- must be a simple sequence of length ##4## to ##76## .
155171
--
156172
-- Returns:
157-
-- A **sequence**, base256 decode of passed sequence.
158-
-- the length of data to decode must be a multiple of ##4## .
173+
-- A **sequence**, base256 decode of passed sequence if the length of
174+
-- the data to decode is a multiple of ##4## and a valid Base64 code.
175+
-- Otherwise, -1 is returned.
159176
--
160177
-- Comments:
161178
-- The calling program is expected to strip newlines and so on before calling.
162179
--
163180
public function decode(sequence in)
164-
integer len, oidx, case3, tmp
181+
integer len, oidx, case3, tmp, index
165182
sequence result
166-
sequence ccha
167183

168184
-- TODO: Surely this is not the most efficient way of doing this
169185
in = search:match_replace("\r\n", in, "")
@@ -172,23 +188,21 @@ public function decode(sequence in)
172188
if remainder(len, 4) != 0 then
173189
return -1
174190
end if
175-
191+
176192
oidx = (len / 4) * 3
177193
case3 = 3
178-
179-
while in[len] = '=' do --# should only happen 0 1 or 2 times
194+
195+
tmp = len
196+
while len > 0 and in[len] = '=' do --# should only happen 0 1 or 2 times
180197
oidx -= 1
181198
case3 = nc3[case3]
182199
len -= 1
183200
end while
184201

185-
--#
186-
--# invert aleph to a decode table
187-
--#
188-
ccha = repeat(0, 256)
189-
for i = 1 to 64 do
190-
ccha[aleph[i]] = i - 1
191-
end for
202+
if tmp - len > 2 --# too many pad characters
203+
then
204+
return -1
205+
end if
192206

193207
result = repeat('?', oidx)
194208
for i = oidx to 1 by -1 do
@@ -201,7 +215,19 @@ public function decode(sequence in)
201215
--# dmul = {4,16,64}
202216
--# ddiv = {16,4,1}
203217
--#
204-
tmp = remainder(ccha[in[len - 1]], drem[case3]) * dmul[case3]
218+
index = ccha[in[len - 1]]
219+
if index < 0 --# Invalid character
220+
then
221+
return -1
222+
end if
223+
224+
tmp = remainder(index, drem[case3]) * dmul[case3]
225+
index = ccha[in[len]]
226+
if index < 0 --# Invalid character
227+
then
228+
return -1
229+
end if
230+
205231
tmp += floor(ccha[in[len]] / ddiv[case3])
206232
result[i] = tmp
207233
len -= ldrop[case3]

tests/t_base64.e

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,88 @@ include std/unittest.e
33
include std/base64.e
44

55
sequence result, this
6+
object bad_result
67

78
result = encode("a")
9+
test_equal("1 char encode", result, "YQ==")
810
result = decode(result)
9-
test_equal("1 char conversion", result, "a")
11+
test_equal("1 char decode", result, "a")
1012

1113
result = encode("12")
14+
test_equal("2 char encode", result, "MTI=")
1215
result = decode(result)
13-
test_equal("2 char conversion", result, "12")
16+
test_equal("2 char decode", result, "12")
1417

1518
result = encode("XYZ")
19+
test_equal("3 char encode", result, "WFla")
1620
result = decode(result)
17-
test_equal("3 char conversion", result, "XYZ")
21+
test_equal("3 char decode", result, "XYZ")
22+
23+
constant encode_table = {
24+
"AQ==", --# 1
25+
"AgI=", --# 2
26+
"AwMD", --# 3
27+
"BAQEBA==", --# 4
28+
"BQUFBQU=", --# 5
29+
"BgYGBgYG", --# 6
30+
"BwcHBwcHBw==", --# 7
31+
"CAgICAgICAg=", --# 8
32+
"CQkJCQkJCQkJ", --# 9
33+
"CgoKCgoKCgoKCg==", --# 10
34+
"CwsLCwsLCwsLCws=", --# 11
35+
"DAwMDAwMDAwMDAwM", --# 12
36+
"DQ0NDQ0NDQ0NDQ0NDQ==", --# 13
37+
"Dg4ODg4ODg4ODg4ODg4=", --# 14
38+
"Dw8PDw8PDw8PDw8PDw8P", --# 15
39+
"EBAQEBAQEBAQEBAQEBAQEA==", --# 16
40+
"ERERERERERERERERERERERE=", --# 17
41+
"EhISEhISEhISEhISEhISEhIS", --# 18
42+
"ExMTExMTExMTExMTExMTExMTEw==", --# 19
43+
"FBQUFBQUFBQUFBQUFBQUFBQUFBQ=", --# 20
44+
"FRUVFRUVFRUVFRUVFRUVFRUVFRUV", --# 21
45+
"FhYWFhYWFhYWFhYWFhYWFhYWFhYWFg==", --# 22
46+
"FxcXFxcXFxcXFxcXFxcXFxcXFxcXFxc=", --# 23
47+
"GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY", --# 24
48+
"GRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZGQ==", --# 25
49+
"GhoaGhoaGhoaGhoaGhoaGhoaGhoaGhoaGho=", --# 26
50+
"GxsbGxsbGxsbGxsbGxsbGxsbGxsbGxsbGxsb", --# 27
51+
"HBwcHBwcHBwcHBwcHBwcHBwcHBwcHBwcHBwcHA==", --# 28
52+
"HR0dHR0dHR0dHR0dHR0dHR0dHR0dHR0dHR0dHR0=", --# 29
53+
"Hh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e", --# 30
54+
"Hx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHw==", --# 31
55+
"ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICA=", --# 32
56+
"ISEhISEhISEhISEhISEhISEhISEhISEhISEhISEhISEh", --# 33
57+
"IiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIg==", --# 34
58+
"IyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyM=", --# 35
59+
"JCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQkJCQk", --# 36
60+
"JSUlJSUlJSUlJSUlJSUlJSUlJSUlJSUlJSUlJSUlJSUlJSUlJQ==", --# 37
61+
"JiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiY=", --# 38
62+
"JycnJycnJycnJycnJycnJycnJycnJycnJycnJycnJycnJycnJycn", --# 39
63+
"KCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKA==", --# 40
64+
"KSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSkpKSk=", --# 41
65+
"KioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioq", --# 42
66+
"KysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKw==", --# 43
67+
"LCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCwsLCw=", --# 44
68+
"LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0t", --# 45
69+
"Li4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLi4uLg==", --# 46
70+
"Ly8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8vLy8=", --# 47
71+
"MDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAw", --# 48
72+
"MTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMTExMQ==", --# 49
73+
"MjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjI=", --# 50
74+
"MzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMz", --# 51
75+
"NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0NA==", --# 52
76+
"NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU1NTU=", --# 53
77+
"NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2", --# 54
78+
"Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nzc3Nw==", --# 55
79+
"ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg4ODg=", --# 56
80+
"OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5OTk5" --# 57
81+
}
1882

1983
for i = 1 to 57 do
2084
this = repeat(i,i)
21-
result = decode(encode(this))
85+
result = encode(this)
86+
test_equal(sprintf("%d byte encode", { i }), result, encode_table[i])
87+
result = decode(result)
2288
test_equal(sprintf("%d byte conversion", { i }), result, this)
2389
end for
2490

@@ -44,5 +110,14 @@ encoded =
44110
test_equal("Thomas Hobbes' Leviathan encode", encoded, encode(msg, 76))
45111
test_equal("Thomas Hobbes' Leviathan decode", msg, decode(encoded))
46112

113+
bad_result = decode("aA=")
114+
test_equal("Length not a multiple of 4", bad_result, -1)
115+
116+
bad_result = decode("a===")
117+
test_equal("Too many pad characters", bad_result, -1)
118+
119+
bad_result = decode("YX!q")
120+
test_equal("Invalid base64 character", bad_result, -1)
121+
47122
test_report()
48123

0 commit comments

Comments
 (0)