summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/MultimediaViewer/resources/mmv/mmv.HtmlUtils.js
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/extensions/MultimediaViewer/resources/mmv/mmv.HtmlUtils.js')
-rw-r--r--www/wiki/extensions/MultimediaViewer/resources/mmv/mmv.HtmlUtils.js269
1 files changed, 269 insertions, 0 deletions
diff --git a/www/wiki/extensions/MultimediaViewer/resources/mmv/mmv.HtmlUtils.js b/www/wiki/extensions/MultimediaViewer/resources/mmv/mmv.HtmlUtils.js
new file mode 100644
index 00000000..25cc65e7
--- /dev/null
+++ b/www/wiki/extensions/MultimediaViewer/resources/mmv/mmv.HtmlUtils.js
@@ -0,0 +1,269 @@
+/*
+ * This file is part of the MediaWiki extension MediaViewer.
+ *
+ * MediaViewer is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MediaViewer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with MediaViewer. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+( function ( mw, $ ) {
+ var HUP, cache;
+
+ /**
+ * Shared cache between HtmlUtils instances to store the results of expensive text operations.
+ *
+ * @member mw.mmv.HtmlUtils
+ * @private
+ * @static
+ * @type {{text: Object.<string, string>, textWithLinks: Object.<string, string>, textWithTags: Object.<string, string>}}
+ */
+ cache = {
+ text: {},
+ textWithLinks: {},
+ textWithTags: {}
+ };
+
+ /**
+ * Helper class that does various HTML-to-text transformations
+ *
+ * @class mw.mmv.HtmlUtils
+ * @constructor
+ */
+ function HtmlUtils() {}
+ HUP = HtmlUtils.prototype;
+
+ /**
+ * Returns a jQuery node which contains the given HTML (wrapped into a `<div>` - this is
+ * necessary since an arbitrary HTML string might not have a jQuery representation).
+ *
+ * @param {string|HTMLElement|jQuery} html
+ * @return {jQuery}
+ */
+ HUP.wrapAndJquerify = function ( html ) {
+ if ( this.isJQueryOrHTMLElement( html ) ) {
+ return $( '<div>' ).append( $( html ).clone() );
+ } else if ( typeof html === 'string' ) {
+ return $( '<div>' + html + '</div>' );
+ } else {
+ mw.log.warn( 'wrapAndJquerify: unknown type', html );
+ throw new Error( 'wrapAndJquerify: unknown type' );
+ }
+ };
+
+ /**
+ * Returns true of the object is a jQuery object or an HTMLElement, false otherwise
+ *
+ * @param {string|HTMLElement|jQuery} html
+ * @return {boolean}
+ */
+ HUP.isJQueryOrHTMLElement = function ( html ) {
+ if ( html instanceof jQuery ) {
+ return true;
+ }
+
+ if ( window.HTMLElement ) {
+ if ( html instanceof HTMLElement ) {
+ return true;
+ }
+ }
+
+ return false;
+ };
+
+ /**
+ * Filters display:none children of a node.
+ * The root element is never filtered, and generally ignored (i.e. whether the root element is
+ * visible won't affect the filtering).
+ * Works in place.
+ *
+ * @param {jQuery} $jq
+ */
+ HUP.filterInvisible = function ( $jq ) {
+ // We are not using :visible because
+ // 1) it would require appending $jq to the document which makes things complicated;
+ // 2) the main difference is that it looks for CSS rules hiding the element;
+ // since this function is intended to be used on html originating from a different
+ // document, possibly a different site, that would probably have unexpected results.
+ $jq
+ .find( '[style]' )
+ .filter( function () { return this.style.display === 'none'; } )
+ .remove();
+ };
+
+ /**
+ * Discards all nodes which do not match the whitelist,
+ * but keeps the text and whitelisted nodes inside them.
+ * Works in-place.
+ *
+ * @param {jQuery} $el
+ * @param {string} whitelist a jQuery selector string such as 'a, span, br'
+ */
+ HUP.whitelistHtml = function ( $el, whitelist ) {
+ var child, $prev,
+ $child = $el.children().first();
+
+ while ( $child && $child.length ) {
+ child = $child.get( 0 );
+
+ if ( child.nodeType !== child.ELEMENT_NODE ) {
+ return;
+ }
+
+ this.whitelistHtml( $child, whitelist );
+
+ if ( !$child.is( whitelist ) ) {
+ $prev = $child.prev();
+ $child.replaceWith( $child.contents() );
+ } else {
+ $prev = $child;
+ }
+
+ if ( $prev && $prev.length === 1 ) {
+ $child = $prev.next();
+ } else {
+ $child = $el.children().first();
+ }
+ }
+ };
+
+ /**
+ * Adds a whitespace to block elements. This is useful if you want to convert the contents
+ * to text and don't want words that are visually separate (e.g. table cells) to be fused.
+ * Works in-place.
+ *
+ * @param {jQuery} $el
+ */
+ HUP.appendWhitespaceToBlockElements = function ( $el ) {
+ // the list of what elements to add whitespace to is somewhat ad-hoc (not all of these
+ // are technically block-level elements, and a lot of block-level elements are missing)
+ // but will hopefully cover the common cases where text is fused together.
+ $el
+ .find( 'blockquote, dd, dl, dt, li, td' )
+ .before( ' ' )
+ .after( ' ' );
+ $el
+ .find( 'br, tr, p' )
+ .before( '\n' )
+ .after( '\n' );
+ };
+
+ /**
+ * Returns the HTML code for a jQuery element (only the first one if passed a set of elements).
+ * Unlike .html(), this includes HTML code for the outermost element; compare
+ * - `$('<div>').html() // ''`
+ * - `mw.mmv.HtmlUtils.jqueryToHtml( $('<div>') ) // '<div></div>'`
+ *
+ * @param {jQuery} $el
+ * @return {string}
+ */
+ HUP.jqueryToHtml = function ( $el ) {
+ // There are two possible implementations for this:
+ // 1) load innto a wrapper element and get its innerHTML;
+ // 2) use outerHTML.
+ // We go with 1) because it handles the case when a jQuery object contains something
+ // that is not an element (this can happen with e.g. $x.children() which returns text
+ // nodes as well).
+ return $( '<div>' ).append( $el ).html();
+ };
+
+ /**
+ * Cleans up superfluous whitespace.
+ * Given that the results will be displayed in a HTML environment, this doesn't have any real
+ * effect. It is mostly there to make testing easier.
+ *
+ * @protected
+ * @param {string} html a HTML (or plaintext) string
+ * @return {string}
+ */
+ HUP.mergeWhitespace = function ( html ) {
+ html = html.replace( /^\s+|\s+$/g, '' );
+ html = html.replace( /\s*\n\s*/g, '\n' );
+ html = html.replace( / {2,}/g, ' ' );
+ return html;
+ };
+
+ /**
+ * Returns the text content of a html string.
+ * Tries to give an approximation of what would be visible if the HTML would be displayed.
+ *
+ * @param {string} html
+ * @return {string}
+ */
+ HUP.htmlToText = function ( html ) {
+ var $html;
+ if ( !cache.text[ html ] ) {
+ $html = this.wrapAndJquerify( html );
+ this.filterInvisible( $html );
+ this.appendWhitespaceToBlockElements( $html );
+ cache.text[ html ] = this.mergeWhitespace( $html.text() );
+ }
+ return cache.text[ html ];
+ };
+
+ /**
+ * Returns the text content of a html string, with the `<a>`, `<i>`, `<b>` tags left intact.
+ * Tries to give an approximation of what would be visible if the HTML would be displayed.
+ *
+ * @param {string} html
+ * @return {string}
+ */
+ HUP.htmlToTextWithTags = function ( html ) {
+ var $html;
+ if ( !cache.textWithTags[ html ] ) {
+ $html = this.wrapAndJquerify( html );
+ this.filterInvisible( $html );
+ this.appendWhitespaceToBlockElements( $html );
+ this.whitelistHtml( $html, 'a, span, i, b, sup, sub' );
+ cache.textWithTags[ html ] = this.mergeWhitespace( $html.html() );
+ }
+ return cache.textWithTags[ html ];
+ };
+
+ /**
+ * Returns the text content of a html string, with the `<a>` tags left intact.
+ * Tries to give an approximation of what would be visible if the HTML would be displayed.
+ *
+ * @param {string} html
+ * @return {string}
+ */
+ HUP.htmlToTextWithLinks = function ( html ) {
+ var $html;
+ if ( !cache.textWithLinks[ html ] ) {
+ $html = this.wrapAndJquerify( html );
+ this.filterInvisible( $html );
+ this.appendWhitespaceToBlockElements( $html );
+ this.whitelistHtml( $html, 'a, span' );
+ cache.textWithLinks[ html ] = this.mergeWhitespace( $html.html() );
+ }
+ return cache.textWithLinks[ html ];
+ };
+
+ /**
+ * Generates HTML code for a link.
+ *
+ * @param {string} text Link text (plain text; will be sanitized)
+ * @param {Object} props Link attributes (should at a minumum include href; will be sanitized)
+ * @return {string}
+ */
+ HUP.makeLinkText = function ( text, props ) {
+ var key;
+ for ( key in props ) {
+ if ( !props.hasOwnProperty( key ) ) {
+ continue;
+ }
+ props[ key ] = this.htmlToText( props[ key ] );
+ }
+ return this.jqueryToHtml( $( '<a>' ).prop( props ).text( text ) );
+ };
+
+ mw.mmv.HtmlUtils = HtmlUtils;
+}( mediaWiki, jQuery ) );