summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/UploadWizard/resources/mw.Escaper.js
blob: e57d42afaabfdbf5d144373ab940333c6da040af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
( function ( mw, OO ) {
	mw.Escaper = {
		/**
		 * Escapes wikitext for use inside {{templates}}.
		 *
		 * @param {string} wikitext
		 * @return {string}
		 */
		escapeForTemplate: function ( wikitext ) {
			return this.escapePipes( wikitext );
		},

		/**
		 * Escapes pipe characters, which could be problematic when the content is
		 * inserted in a template.
		 *
		 * @param {string} wikitext
		 * @return {string}
		 */
		escapePipes: function ( wikitext ) {
			var extractedTemplates, extractedLinks;

			// Pipes (`|`) must be escaped because we'll be inserting this
			// content into a templates & pipes would mess up the syntax.
			// First, urlencode pipes inside links:
			wikitext = wikitext.replace( /\bhttps?:\/\/[^\s]+/g, function ( match ) {
				return match.replace( /\|/g, '%7C' );
			} );

			// Second, pipes can be valid inside other templates or links in
			// wikitext, so we'll first extract those from the content, then
			// replace the pipes, then restore the original (extracted) content:
			extractedTemplates = this.extractTemplates( wikitext );
			extractedLinks = this.extractLinks( extractedTemplates[ 0 ] );
			wikitext = extractedLinks[ 0 ].replace( /\|/g, '{{!}}' );
			return this.restoreExtracts( wikitext, $.extend( extractedTemplates[ 1 ], extractedLinks[ 1 ] ) );
		},

		/**
		 * Extract all {{templates}} from wikitext, replacing them with
		 * placeholder content in the form of {{1}}, {{2}}.
		 *
		 * Nested templates will safely be extracted by first replacing inner
		 * templates, then moving outwards, ensuring we don't get closing
		 * bracket mismatches.
		 *
		 * Restoring the content is as simple as feeding the returned content &
		 * replacements back into this.restoreExtracts.
		 *
		 * @param {string} wikitext
		 * @return {array} [{string} wikitext, {Object} replacements]
		 */
		extractTemplates: function ( wikitext ) {
			var extracts = {},
				previousExtracts = {},
				extracted = wikitext,
				// the regex explained:
				// * `[^\{]`: character can not be {
				// * `\{(?!\{)`: or if it is, it can't be followed by another {
				// this excludes template opening brackets: {{
				// * `\{\{[0-9]+\}\}`: unless it's a complete {{[0-9]+}}
				//   sequence, generated by an earlier run of this regex
				regex = /\{\{([^{]|\{(?!\{)|\{\{[0-9]+\}\})*?\}\}/g,
				callback = function ( match ) {
					var replacement = '{{' + Object.keys( extracts ).length + '}}';

					// safeguard for not replacing already-replaced matches
					// this makes sure that when real content contains something
					// like {{1}}, it will still be replaced, while {{1}}
					// generated by this code can be recognized & ignored
					if ( match in previousExtracts ) {
						return match;
					}

					extracts[ replacement ] = match;
					return replacement;
				};

			do {
				wikitext = extracted;
				previousExtracts = OO.copy( extracts );
				extracted = wikitext.replace( regex, callback );
			} while ( wikitext !== extracted );

			return [ wikitext, extracts ];
		},

		/**
		 * Extract all [[links]] from wikitext, replacing them with placeholder
		 * content in the form of [[1]], [[2]].
		 *
		 * Restoring the content is as simple as feeding the returned content &
		 * replacements back into this.restoreExtracts.
		 *
		 * @param {string} wikitext
		 * @return {array} [{string} wikitext, {Object} replacements]
		 */
		extractLinks: function ( wikitext ) {
			var extracts = {};

			wikitext = wikitext.replace( /\[\[.*?\]\]/g, function ( match ) {
				var replacement = '[[' + Object.keys( extracts ).length + ']]';
				extracts[ replacement ] = match;
				return replacement;
			} );

			return [ wikitext, extracts ];
		},

		/**
		 * Restores content that was extracted from wikitext.
		 *
		 * @param {string} wikitext
		 * @param {Object} replacements
		 * @return {string}
		 */
		restoreExtracts: function ( wikitext, replacements ) {
			// turn search keys into a regular expression, allowing us to match
			// all of them at once
			var searchValues = Object.keys( replacements ).map( mw.RegExp.escape ),
				searchRegex = new RegExp( '(' + searchValues.join( '|' ) + ')', 'g' ),
				callback = function ( match ) {
					var replacement = replacements[ match ];

					// we matched something that has no replacement, must be valid
					// user input that just happens to look like on of the
					// replacement values
					if ( replacement === undefined ) {
						return match;
					}

					// if we find the replacement itself matches a search value, we
					// also don't want to go recursive: nesting doesn't work like
					// that, it's just a coincidence where user input happens to
					// look just like a replacement value (e.g. `{{1}}`)
					if ( replacement in replacements ) {
						return replacement;
					}

					// we must not replace this one again, to avoid getting stuck in
					// endless recursion
					delete replacements[ match ];

					// go recursive, there may be more replacements nested down there
					return this.restoreExtracts( replacement, replacements );
				}.bind( this );

			return wikitext.replace( searchRegex, callback );
		}
	};
}( mediaWiki, OO ) );