forked from highlightjs/highlight.js
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregex.js
More file actions
125 lines (116 loc) · 3.1 KB
/
regex.js
File metadata and controls
125 lines (116 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/**
* @param {string} value
* @returns {RegExp}
* */
export function escape(value) {
return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function source(re) {
if (!re) return null;
if (typeof re === "string") return re;
return re.source;
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function lookahead(re) {
return concat('(?=', re, ')');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
export function optional(re) {
return concat('(', re, ')?');
}
/**
* @param {...(RegExp | string) } args
* @returns {string}
*/
export function concat(...args) {
const joined = args.map((x) => source(x)).join("");
return joined;
}
/**
* Any of the passed expresssions may match
*
* Creates a huge this | this | that | that match
* @param {(RegExp | string)[] } args
* @returns {string}
*/
export function either(...args) {
const joined = '(' + args.map((x) => source(x)).join("|") + ")";
return joined;
}
/**
* @param {RegExp} re
* @returns {number}
*/
export function countMatchGroups(re) {
return (new RegExp(re.toString() + '|')).exec('').length - 1;
}
/**
* Does lexeme start with a regular expression match at the beginning
* @param {RegExp} re
* @param {string} lexeme
*/
export function startsWith(re, lexeme) {
var match = re && re.exec(lexeme);
return match && match.index === 0;
}
// join logically computes regexps.join(separator), but fixes the
// backreferences so they continue to match.
// it also places each individual regular expression into it's own
// match group, keeping track of the sequencing of those match groups
// is currently an exercise for the caller. :-)
/**
* @param {(string | RegExp)[]} regexps
* @param {string} separator
* @returns {string}
*/
export function join(regexps, separator = "|") {
// backreferenceRe matches an open parenthesis or backreference. To avoid
// an incorrect parse, it additionally matches the following:
// - [...] elements, where the meaning of parentheses and escapes change
// - other escape sequences, so we do not misparse escape sequences as
// interesting elements
// - non-matching or lookahead parentheses, which do not capture. These
// follow the '(' with a '?'.
var backreferenceRe = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
var numCaptures = 0;
var ret = '';
for (var i = 0; i < regexps.length; i++) {
numCaptures += 1;
var offset = numCaptures;
var re = source(regexps[i]);
if (i > 0) {
ret += separator;
}
ret += "(";
while (re.length > 0) {
var match = backreferenceRe.exec(re);
if (match == null) {
ret += re;
break;
}
ret += re.substring(0, match.index);
re = re.substring(match.index + match[0].length);
if (match[0][0] === '\\' && match[1]) {
// Adjust the backreference.
ret += '\\' + String(Number(match[1]) + offset);
} else {
ret += match[0];
if (match[0] === '(') {
numCaptures++;
}
}
}
ret += ")";
}
return ret;
}