-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathBase.ds
More file actions
272 lines (206 loc) · 7.74 KB
/
Base.ds
File metadata and controls
272 lines (206 loc) · 7.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
-- | Basic types and operators on Text objects.
module Data.Text.Base
export
{ -- Runtime operators.
copyTextLitToVector;
copyTextVecToVector;
-- Construction.
textOfChar;
textOfWord8;
textOfVector;
-- Conversion.
vectorOfText;
textVectorOfText;
-- Projections.
sizeOfText;
indexText;
-- Operators.
eqChar;
eqText;
-- Primitives.
ddcPrimMakeTextLit;
ddcPrimSizeOfTextLit;
ddcPrimIndexTextLit;
}
import Data.Text.Char
import Data.Numeric.Nat
import Data.Numeric.Bool
import Data.Numeric.Word8
import Data.Numeric.Word32
import Data.Function
import Data.Maybe
import Class.Numeric
import Class.Ord
import Class.Eq
-- | A TextLit is a boxed object that contains a pointer to literal
-- UTF-8 data in static memory.
import foreign boxed type
TextLit : Data
-- | Runtime operators for working with unboxed text literals.
import foreign c value
-- | Make a text literal.
-- The TextLit# type is a pointer to literal UTF-8 data in
-- static memory, which we pack into a boxed object.
ddcPrimMakeTextLit : TextLit# -> TextLit
-- | Get the size of a boxed text literal.
ddcPrimSizeOfTextLit : TextLit -> Nat
-- | Get a single byte from a boxed text literal.
-- This is a byte rather than a character,
-- as UTF-8 encoded characters can consist of multiple bytes.
ddcPrimIndexTextLit : TextLit -> Nat -> Word8
-- | Top level region containing text vectors.
-- All our dynamic character data is in this region.
import foreign abstract type
RegionText : Region
-- | Capabilities to allocate and read top-level text vectors.
import foreign abstract capability
capTopTextAlloc : Alloc RegionText
capTopTextRead : Read RegionText
where
-- | A TextVec is a boxed object that contains UTF-8 data in the
-- garbage collected heap.
type TextVec = Vector# RegionText Word8
-- Text -----------------------------------------------------------------------
-- CAREFUL: Don't change the order of these constructors.
-- Reflection of text values in the System.Runtime.Reflect depends on them
-- being in this specific order, so that we know what the tags of the
-- runtime objects are.
data Text
-- | Wrap a text literal into a text object.
-- The character data is stored in static memory.
= TextLit TextLit
-- | Wrap some character data into a text object.
-- The character data is stored on the heap.
| TextVec (Vector# RegionText Word8)
-- | Append two text objects.
| TextApp Text Text
-- Construction ---------------------------------------------------------------
-- | O(1). Wrap a single character into a text object.
-- We're only taking the lowest 8 bit bytes,
-- rather than doing proper UTF-8 encoding.
textOfChar (c: Word32): Text
= textOfWord8 (truncate# c)
-- | O(1). Wrap a single byte into a text object.
textOfWord8 (w8: Word8): Text
= TextVec
$ extend RegionText using mutable r1 in
do -- Allocate the vector to hold the data,
-- including an extra null terminator byte.
vec = vectorAlloc# {@r1} 2
-- Write the character.
vectorWrite# vec 0 w8
-- Write the null terminator.
vectorWrite# vec 1 0w8
vec
-- | O(1). Wrap a vector of UTF-8 data into a text object.
textOfVector (vec: Vector# RegionText Word8): Text
= TextVec vec
-- Conversion -----------------------------------------------------------------
textVectorOfText (tt: Text): Vector# RegionText Word8
= vectorOfText {@RegionText} tt
-- | Copy a Text object into a flat vector of UTF-8 data.
vectorOfText
{@r1: Region} (tt: Text)
: S (Alloc r1) (Vector# r1 Word8)
= extend r1 using mutable r2 in
do
-- Allocate a vector to hold all the data,
-- including an extra null terminator byte.
vec = vectorAlloc# {@r2} (add (sizeOfText tt) 1)
-- Copy the text data into the vector.
iEnd = copyTextToVector tt vec 0
-- Write the null terminator.
-- TODO(BL): check that the null byte is being written to the correct place.
vectorWrite# vec iEnd 0w8
vec
-- | Copy a text object to a mutable vector of UTF-8 data.
copyTextToVector
{@r: Region} (tt: Text) (vec: Vector# r Word8) (i0: Nat)
: S (Write r) Nat
= case tt of
TextLit lit
-> copyTextLitToVector lit vec i0 0 (ddcPrimSizeOfTextLit lit)
TextVec vec2
-> copyTextVecToVector vec2 vec i0 0 (vectorLength# vec2 - 1)
TextApp t1 t2
-> do i1 = copyTextToVector t1 vec i0
i2 = copyTextToVector t2 vec i1
i2
-- | Copy a text literal to a mutable vector of UTF-8 data.
copyTextLitToVector
{@r: Region} (tt: TextLit) (vec: Vector# r Word8)
(iDst iSrc nSrc: Nat)
: S (Write r) Nat
| iSrc >= nSrc = iDst
| otherwise
= do vectorWrite# vec iDst (ddcPrimIndexTextLit tt iSrc)
copyTextLitToVector
tt vec (iDst + 1) (iSrc + 1) nSrc
-- | Copy a text source vector to a mutable vector of UTF-8 data.
copyTextVecToVector
{@r1 r2: Region}
(vecSrc: Vector# r1 Word8) (vecDst: Vector# r2 Word8)
(iDst iSrc nSrc: Nat)
: S (Read r1 + Write r2) Nat
| iSrc >= nSrc = iDst
| otherwise
= do vectorWrite# vecDst iDst (vectorRead# vecSrc iSrc)
copyTextVecToVector
vecSrc vecDst (iDst + 1) (iSrc + 1) nSrc
-- Projections ----------------------------------------------------------------
-- | Get the size of the utf8 data in a Text object, in bytes.
--
-- * This is NOT the same as the length of the text string in characters,
-- as single characters can be encoded using multiple bytes.
--
sizeOfText (tt: Text): Nat
= case tt of
TextLit lit
-> ddcPrimSizeOfTextLit lit
-- The size of a text vector is the vector size minus
-- the null terminator byte.
TextVec vec
-> vectorLength# vec - 1
TextApp t1 t2
-> sizeOfText t1 + sizeOfText t2
-- | Get a single word8 character from a Text object.
indexText (tt: Text) (ix: Nat): Maybe Word8
= case tt of
TextLit lit
| ix >= ddcPrimSizeOfTextLit lit -> Nothing
| otherwise -> Just (ddcPrimIndexTextLit lit ix)
TextVec vec
| ix >= vectorLength# vec - 1 -> Nothing
| otherwise -> Just (vectorRead# vec ix)
TextApp t1 t2
-> case indexText t1 ix of
Just x -> Just x
Nothing -> indexText t2 (ix - sizeOfText t1)
-- Comparison -----------------------------------------------------------------
-- | Check if two characters are identical.
eqChar (c1 c2: Char): Bool
= eqText (textOfChar c1) (textOfChar c2)
-- | O(max len1 len2).
-- Check if two text objects represent the same characters.
eqText (tx1 tx2: Text): Bool
= constant r in
do
-- We copy both strings into new vectors before doing the comparison.
-- It would be better to do it in-place.
vec1 = vectorOfText {@r} tx1
len1 = vectorLength# vec1
vec2 = vectorOfText {@r} tx2
len2 = vectorLength# vec2
match
| len1 /= len2 = False
| len1 == 0 = True
| otherwise = go vec1 vec2 len1 0
where
go (vec1 vec2: Vector# r Word8)
(len: Nat) (ix: Nat): S (Read r) Bool
| ix >= (len - 1)
= True
| not (eq# (vectorRead# vec1 ix) (vectorRead# vec2 ix))
= False
| otherwise
= go vec1 vec2 len (ix + 1)