1 files changed, 205 insertions, 0 deletions
diff --git a/www/wiki/resources/src/mediawiki/mediawiki.String.js b/www/wiki/resources/src/mediawiki/mediawiki.String.js
new file mode 100644
index 00000000..5d9bef06
--- /dev/null
+++ b/www/wiki/resources/src/mediawiki/mediawiki.String.js
@@ -0,0 +1,205 @@
+( function () {
+
+	/**
+	 * @class mw.String
+	 * @singleton
+	 */
+
+	/**
+	 * Calculate the byte length of a string (accounting for UTF-8).
+	 *
+	 * @author Jan Paul Posma, 2011
+	 * @author Timo Tijhof, 2012
+	 * @author David Chan, 2013
+	 *
+	 * @param {string} str
+	 * @return {number}
+	 */
+	function byteLength( str ) {
+		// This basically figures out how many bytes a UTF-16 string (which is what js sees)
+		// will take in UTF-8 by replacing a 2 byte character with 2 *'s, etc, and counting that.
+		// Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
+		// and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
+		// edge cases such as illegal sequences, but that should never happen.
+
+		// https://en.wikipedia.org/wiki/UTF-8#Description
+		// The mapping from UTF-16 code units to UTF-8 bytes is as follows:
+		// > Range 0000-007F: codepoints that become 1 byte of UTF-8
+		// > Range 0080-07FF: codepoints that become 2 bytes of UTF-8
+		// > Range 0800-D7FF: codepoints that become 3 bytes of UTF-8
+		// > Range D800-DFFF: Surrogates (each pair becomes 4 bytes of UTF-8)
+		// > Range E000-FFFF: codepoints that become 3 bytes of UTF-8 (continued)
+
+		return str
+			.replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
+			.replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
+			.length;
+	}
+
+	/**
+	 * Calculate the character length of a string (accounting for UTF-16 surrogates).
+	 *
+	 * @param {string} str
+	 * @return {number}
+	 */
+	function codePointLength( str ) {
+		return str
+			// Low surrogate + high surrogate pairs represent one character (codepoint) each
+			.replace( /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, '*' )
+			.length;
+	}
+
+	// Like String#charAt, but return the pair of UTF-16 surrogates for characters outside of BMP.
+	function codePointAt( string, offset, backwards ) {
+		// We don't need to check for offsets at the beginning or end of string,
+		// String#slice will simply return a shorter (or empty) substring.
+		var maybePair = backwards ?
+			string.slice( offset - 1, offset + 1 ) :
+			string.slice( offset, offset + 2 );
+		if ( /^[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( maybePair ) ) {
+			return maybePair;
+		} else {
+			return string.charAt( offset );
+		}
+	}
+
+	function trimLength( safeVal, newVal, length, lengthFn ) {
+		var startMatches, endMatches, matchesLen, inpParts, chopOff, oldChar, newChar,
+			oldVal = safeVal;
+
+		// Run the hook if one was provided, but only on the length
+		// assessment. The value itself is not to be affected by the hook.
+		if ( lengthFn( newVal ) <= length ) {
+			// Limit was not reached, just remember the new value
+			// and let the user continue.
+			return {
+				newVal: newVal,
+				trimmed: false
+			};
+		}
+
+		// Current input is longer than the active limit.
+		// Figure out what was added and limit the addition.
+		startMatches = 0;
+		endMatches = 0;
+
+		// It is important that we keep the search within the range of
+		// the shortest string's length.
+		// Imagine a user adds text that matches the end of the old value
+		// (e.g. "foo" -> "foofoo"). startMatches would be 3, but without
+		// limiting both searches to the shortest length, endMatches would
+		// also be 3.
+		matchesLen = Math.min( newVal.length, oldVal.length );
+
+		// Count same characters from the left, first.
+		// (if "foo" -> "foofoo", assume addition was at the end).
+		while ( startMatches < matchesLen ) {
+			oldChar = codePointAt( oldVal, startMatches, false );
+			newChar = codePointAt( newVal, startMatches, false );
+			if ( oldChar !== newChar ) {
+				break;
+			}
+			startMatches += oldChar.length;
+		}
+
+		while ( endMatches < ( matchesLen - startMatches ) ) {
+			oldChar = codePointAt( oldVal, oldVal.length - 1 - endMatches, true );
+			newChar = codePointAt( newVal, newVal.length - 1 - endMatches, true );
+			if ( oldChar !== newChar ) {
+				break;
+			}
+			endMatches += oldChar.length;
+		}
+
+		inpParts = [
+			// Same start
+			newVal.slice( 0, startMatches ),
+			// Inserted content
+			newVal.slice( startMatches, newVal.length - endMatches ),
+			// Same end
+			newVal.slice( newVal.length - endMatches )
+		];
+
+		// Chop off characters from the end of the "inserted content" string
+		// until the limit is statisfied.
+		// Make sure to stop when there is nothing to slice (T43450).
+		while ( lengthFn( inpParts.join( '' ) ) > length && inpParts[ 1 ].length > 0 ) {
+			// Do not chop off halves of surrogate pairs
+			chopOff = /[\uD800-\uDBFF][\uDC00-\uDFFF]$/.test( inpParts[ 1 ] ) ? 2 : 1;
+			inpParts[ 1 ] = inpParts[ 1 ].slice( 0, -chopOff );
+		}
+
+		return {
+			newVal: inpParts.join( '' ),
+			// For pathological lengthFn() that always returns a length greater than the limit, we might have
+			// ended up not trimming - check for this case to avoid infinite loops
+			trimmed: newVal !== inpParts.join( '' )
+		};
+	}
+
+	/**
+	 * Utility function to trim down a string, based on byteLimit
+	 * and given a safe start position. It supports insertion anywhere
+	 * in the string, so "foo" to "fobaro" if limit is 4 will result in
+	 * "fobo", not "foba". Basically emulating the native maxlength by
+	 * reconstructing where the insertion occurred.
+	 *
+	 * @param {string} safeVal Known value that was previously returned by this
+	 * function, if none, pass empty string.
+	 * @param {string} newVal New value that may have to be trimmed down.
+	 * @param {number} byteLimit Number of bytes the value may be in size.
+	 * @param {Function} [filterFn] Function to call on the string before assessing the length.
+	 * @return {Object}
+	 * @return {string} return.newVal
+	 * @return {boolean} return.trimmed
+	 */
+	function trimByteLength( safeVal, newVal, byteLimit, filterFn ) {
+		var lengthFn;
+		if ( filterFn ) {
+			lengthFn = function ( val ) {
+				return byteLength( filterFn( val ) );
+			};
+		} else {
+			lengthFn = byteLength;
+		}
+
+		return trimLength( safeVal, newVal, byteLimit, lengthFn );
+	}
+
+	/**
+	 * Utility function to trim down a string, based on codePointLimit
+	 * and given a safe start position. It supports insertion anywhere
+	 * in the string, so "foo" to "fobaro" if limit is 4 will result in
+	 * "fobo", not "foba". Basically emulating the native maxlength by
+	 * reconstructing where the insertion occurred.
+	 *
+	 * @param {string} safeVal Known value that was previously returned by this
+	 * function, if none, pass empty string.
+	 * @param {string} newVal New value that may have to be trimmed down.
+	 * @param {number} codePointLimit Number of characters the value may be in size.
+	 * @param {Function} [filterFn] Function to call on the string before assessing the length.
+	 * @return {Object}
+	 * @return {string} return.newVal
+	 * @return {boolean} return.trimmed
+	 */
+	function trimCodePointLength( safeVal, newVal, codePointLimit, filterFn ) {
+		var lengthFn;
+		if ( filterFn ) {
+			lengthFn = function ( val ) {
+				return codePointLength( filterFn( val ) );
+			};
+		} else {
+			lengthFn = codePointLength;
+		}
+
+		return trimLength( safeVal, newVal, codePointLimit, lengthFn );
+	}
+
+	module.exports = {
+		byteLength: byteLength,
+		codePointLength: codePointLength,
+		trimByteLength: trimByteLength,
+		trimCodePointLength: trimCodePointLength
+	};
+
+}() );