summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php
first commit
Diffstat (limited to 'www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php')
-rw-r--r--www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php513
1 files changed, 513 insertions, 0 deletions
diff --git a/www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php b/www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php
new file mode 100644
index 00000000..82cb6aa7
--- /dev/null
+++ b/www/wiki/extensions/SyntaxHighlight_GeSHi/SyntaxHighlight_GeSHi.class.php
@@ -0,0 +1,513 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+use Symfony\Component\Process\ProcessBuilder;
+
+// @codingStandardsIgnoreStart
+class SyntaxHighlight_GeSHi {
+// @codingStandardsIgnoreEnd
+
+ /** @var const The maximum number of lines that may be selected for highlighting. **/
+ const HIGHLIGHT_MAX_LINES = 1000;
+
+ /** @var const Maximum input size for the highlighter (100 kB). **/
+ const HIGHLIGHT_MAX_BYTES = 102400;
+
+ /** @var const CSS class for syntax-highlighted code. **/
+ const HIGHLIGHT_CSS_CLASS = 'mw-highlight';
+
+ /** @var const Cache version. Increment whenever the HTML changes. */
+ const CACHE_VERSION = 1;
+
+ /** @var array Mapping of MIME-types to lexer names. **/
+ private static $mimeLexers = array(
+ 'text/javascript' => 'javascript',
+ 'application/json' => 'javascript',
+ 'text/xml' => 'xml',
+ );
+
+ public static function onSetup() {
+ global $wgPygmentizePath;
+
+ // If $wgPygmentizePath is unset, use the bundled copy.
+ if ( $wgPygmentizePath === false ) {
+ $wgPygmentizePath = __DIR__ . '/pygments/pygmentize';
+ }
+ }
+
+ /**
+ * Get the Pygments lexer name for a particular language.
+ *
+ * @param string $lang Language name.
+ * @return string|null Lexer name, or null if no matching lexer.
+ */
+ private static function getLexer( $lang ) {
+ static $lexers = null;
+
+ if ( $lang === null ) {
+ return null;
+ }
+
+ if ( !$lexers ) {
+ $lexers = require __DIR__ . '/SyntaxHighlight_GeSHi.lexers.php';
+ }
+
+ $lexer = strtolower( $lang );
+
+ if ( in_array( $lexer, $lexers ) ) {
+ return $lexer;
+ }
+
+ $geshi2pygments = SyntaxHighlightGeSHiCompat::getGeSHiToPygmentsMap();
+
+ // Check if this is a GeSHi lexer name for which there exists
+ // a compatible Pygments lexer with a different name.
+ if ( isset( $geshi2pygments[$lexer] ) ) {
+ $lexer = $geshi2pygments[$lexer];
+ if ( in_array( $lexer, $lexers ) ) {
+ return $lexer;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Register parser hook
+ *
+ * @param $parser Parser
+ * @return bool
+ */
+ public static function onParserFirstCallInit( Parser &$parser ) {
+ foreach ( array( 'source', 'syntaxhighlight' ) as $tag ) {
+ $parser->setHook( $tag, array( 'SyntaxHighlight_GeSHi', 'parserHook' ) );
+ }
+ }
+
+ /**
+ * Parser hook
+ *
+ * @param string $text
+ * @param array $args
+ * @param Parser $parser
+ * @return string
+ */
+ public static function parserHook( $text, $args = array(), $parser ) {
+ global $wgUseTidy;
+
+ // Replace strip markers (For e.g. {{#tag:syntaxhighlight|<nowiki>...}})
+ $out = $parser->mStripState->unstripNoWiki( $text );
+
+ // Don't trim leading spaces away, just the linefeeds
+ $out = preg_replace( '/^\n+/', '', rtrim( $out ) );
+
+ // Convert deprecated attributes
+ if ( isset( $args['enclose'] ) ) {
+ if ( $args['enclose'] === 'none' ) {
+ $args['inline'] = true;
+ }
+ unset( $args['enclose'] );
+ }
+
+ $lexer = isset( $args['lang'] ) ? $args['lang'] : '';
+
+ $result = self::highlight( $out, $lexer, $args );
+ if ( !$result->isGood() ) {
+ $parser->addTrackingCategory( 'syntaxhighlight-error-category' );
+ }
+ $out = $result->getValue();
+
+ // HTML Tidy will convert tabs to spaces incorrectly (bug 30930).
+ // But the conversion from tab to space occurs while reading the input,
+ // before the conversion from &#9; to tab, so we can armor it that way.
+ if ( $wgUseTidy ) {
+ $out = str_replace( "\t", '&#9;', $out );
+ }
+
+ // Allow certain HTML attributes
+ $htmlAttribs = Sanitizer::validateAttributes( $args, array( 'style', 'class', 'id', 'dir' ) );
+ if ( !isset( $htmlAttribs['class'] ) ) {
+ $htmlAttribs['class'] = self::HIGHLIGHT_CSS_CLASS;
+ } else {
+ $htmlAttribs['class'] .= ' ' . self::HIGHLIGHT_CSS_CLASS;
+ }
+ if ( !( isset( $htmlAttribs['dir'] ) && $htmlAttribs['dir'] === 'rtl' ) ) {
+ $htmlAttribs['dir'] = 'ltr';
+ }
+
+ if ( isset( $args['inline'] ) ) {
+ // Enforce inlineness. Stray newlines may result in unexpected list and paragraph processing
+ // (also known as doBlockLevels()).
+ $out = str_replace( "\n", ' ', $out );
+ $out = Html::rawElement( 'code', $htmlAttribs, $out );
+
+ } else {
+ // Not entirely sure what benefit this provides, but it was here already
+ $htmlAttribs['class'] .= ' ' . 'mw-content-' . $htmlAttribs['dir'];
+
+ // Unwrap Pygments output to provide our own wrapper. We can't just always use the 'nowrap'
+ // option (pass 'inline'), since it disables other useful things like line highlighting.
+ // Tolerate absence of quotes for Html::element() and wgWellFormedXml=false.
+ $m = array();
+ if ( preg_match( '/^<div class="?mw-highlight"?>(.*)<\/div>$/s', trim( $out ), $m ) ) {
+ $out = trim( $m[1] );
+ } else {
+ throw new MWException( 'Unexpected output from Pygments encountered' );
+ }
+
+ // Use 'nowiki' strip marker to prevent list processing (also known as doBlockLevels()).
+ // However, leave the wrapping <div/> outside to prevent <p/>-wrapping.
+ $marker = $parser->mUniqPrefix . '-syntaxhighlightinner-' .
+ sprintf( '%08X', $parser->mMarkerIndex++ ) . $parser::MARKER_SUFFIX;
+ $parser->mStripState->addNoWiki( $marker, $out );
+
+ $out = Html::openElement( 'div', $htmlAttribs ) .
+ $marker .
+ Html::closeElement( 'div' );
+ }
+
+ // Register CSS
+ $parser->getOutput()->addModuleStyles( 'ext.pygments' );
+
+ return $out;
+ }
+
+ /**
+ * Highlight a code-block using a particular lexer.
+ *
+ * @param string $code Code to highlight.
+ * @param string|null $lang Language name, or null to use plain markup.
+ * @param array $args Associative array of additional arguments.
+ * If it contains a 'line' key, the output will include line numbers.
+ * If it includes a 'highlight' key, the value will be parsed as a
+ * comma-separated list of lines and line-ranges to highlight.
+ * If it contains a 'start' key, the value will be used as the line at which to
+ * start highlighting.
+ * If it contains a 'inline' key, the output will not be wrapped in `<div><pre/></div>`.
+ * @return Status Status object, with HTML representing the highlighted
+ * code as its value.
+ */
+ public static function highlight( $code, $lang = null, $args = array() ) {
+ global $wgPygmentizePath;
+
+ $status = new Status;
+
+ $lexer = self::getLexer( $lang );
+ if ( $lexer === null && $lang !== null ) {
+ $status->warning( 'syntaxhighlight-error-unknown-language', $lang );
+ }
+
+ $length = strlen( $code );
+ if ( strlen( $code ) > self::HIGHLIGHT_MAX_BYTES ) {
+ $status->warning( 'syntaxhighlight-error-exceeds-size-limit',
+ $length, self::HIGHLIGHT_MAX_BYTES );
+ $lexer = null;
+ }
+
+ if ( wfShellExecDisabled() !== false ) {
+ $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
+ wfWarn(
+ 'MediaWiki determined that it cannot invoke Pygments. ' .
+ 'As a result, SyntaxHighlight_GeSHi will not perform any syntax highlighting. ' .
+ 'See the debug log for details: ' .
+ 'https://www.mediawiki.org/wiki/Manual:$wgDebugLogFile'
+ );
+ $lexer = null;
+ }
+
+ $inline = isset( $args['inline'] );
+
+ if ( $lexer === null ) {
+ if ( $inline ) {
+ $status->value = htmlspecialchars( trim( $code ), ENT_NOQUOTES );
+ } else {
+ $pre = Html::element( 'pre', array(), $code );
+ $status->value = Html::rawElement( 'div', array( 'class' => self::HIGHLIGHT_CSS_CLASS ), $pre );
+ }
+ return $status;
+ }
+
+ $options = array(
+ 'cssclass' => self::HIGHLIGHT_CSS_CLASS,
+ 'encoding' => 'utf-8',
+ );
+
+ // Line numbers
+ if ( isset( $args['line'] ) ) {
+ $options['linenos'] = 'inline';
+ }
+
+ if ( $lexer === 'php' && strpos( $code, '<?php' ) === false ) {
+ $options['startinline'] = 1;
+ }
+
+ // Highlight specific lines
+ if ( isset( $args['highlight'] ) ) {
+ $lines = self::parseHighlightLines( $args['highlight'] );
+ if ( count( $lines ) ) {
+ $options['hl_lines'] = implode( ' ', $lines );
+ }
+ }
+
+ // Starting line number
+ if ( isset( $args['start'] ) && ctype_digit( $args['start'] ) ) {
+ $options['linenostart'] = (int)$args['start'];
+ }
+
+ if ( $inline ) {
+ $options['nowrap'] = 1;
+ }
+
+ $cache = ObjectCache::getMainWANInstance();
+ $cacheKey = self::makeCacheKey( $code, $lexer, $options );
+ $output = $cache->get( $cacheKey );
+
+ if ( $output === false ) {
+ $optionPairs = array();
+ foreach ( $options as $k => $v ) {
+ $optionPairs[] = "{$k}={$v}";
+ }
+ $builder = new ProcessBuilder();
+ $builder->setPrefix( $wgPygmentizePath );
+ $process = $builder
+ ->add( '-l' )->add( $lexer )
+ ->add( '-f' )->add( 'html' )
+ ->add( '-O' )->add( implode( ',', $optionPairs ) )
+ ->getProcess();
+
+ $process->setInput( $code );
+
+ /* Workaround for T151523 (buggy $process->getOutput()).
+ If/when this issue is fixed in HHVM or Symfony,
+ replace this with "$process->run(); $output = $process->getOutput();"
+ */
+ $output = '';
+ $process->run( function( $type, $capturedOutput ) use ( &$output ) {
+ $output .= $capturedOutput;
+ } );
+
+ if ( !$process->isSuccessful() ) {
+ $status->warning( 'syntaxhighlight-error-pygments-invocation-failure' );
+ wfWarn( 'Failed to invoke Pygments: ' . $process->getErrorOutput() );
+ $status->value = self::highlight( $code, null, $args )->getValue();
+ return $status;
+ }
+
+ $cache->set( $cacheKey, $output );
+ }
+
+ if ( $inline ) {
+ $output = trim( $output );
+ }
+
+ $status->value = $output;
+ return $status;
+
+ }
+
+ /**
+ * Construct a cache key for the results of a Pygments invocation.
+ *
+ * @param string $code Code to be highlighted.
+ * @param string $lexer Lexer name.
+ * @param array $options Options array.
+ * @return string Cache key.
+ */
+ private static function makeCacheKey( $code, $lexer, $options ) {
+ $optionString = FormatJson::encode( $options, false, FormatJson::ALL_OK );
+ $hash = md5( "{$code}|{$lexer}|{$optionString}|" . self::CACHE_VERSION );
+ if ( function_exists( 'wfGlobalCacheKey' ) ) {
+ return wfGlobalCacheKey( 'highlight', $hash );
+ } else {
+ return 'highlight:' . $hash;
+ }
+ }
+
+ /**
+ * Take an input specifying a list of lines to highlight, returning
+ * a raw list of matching line numbers.
+ *
+ * Input is comma-separated list of lines or line ranges.
+ *
+ * @param string $lineSpec
+ * @return int[] Line numbers.
+ */
+ protected static function parseHighlightLines( $lineSpec ) {
+ $lines = array();
+ $values = array_map( 'trim', explode( ',', $lineSpec ) );
+ foreach ( $values as $value ) {
+ if ( ctype_digit( $value ) ) {
+ $lines[] = (int)$value;
+ } elseif ( strpos( $value, '-' ) !== false ) {
+ list( $start, $end ) = array_map( 'trim', explode( '-', $value ) );
+ if ( self::validHighlightRange( $start, $end ) ) {
+ for ( $i = intval( $start ); $i <= $end; $i++ ) {
+ $lines[] = $i;
+ }
+ }
+ }
+ if ( count( $lines ) > self::HIGHLIGHT_MAX_LINES ) {
+ $lines = array_slice( $lines, 0, self::HIGHLIGHT_MAX_LINES );
+ break;
+ }
+ }
+ return $lines;
+ }
+
+ /**
+ * Validate a provided input range
+ * @param $start
+ * @param $end
+ * @return bool
+ */
+ protected static function validHighlightRange( $start, $end ) {
+ // Since we're taking this tiny range and producing a an
+ // array of every integer between them, it would be trivial
+ // to DoS the system by asking for a huge range.
+ // Impose an arbitrary limit on the number of lines in a
+ // given range to reduce the impact.
+ return
+ ctype_digit( $start ) &&
+ ctype_digit( $end ) &&
+ $start > 0 &&
+ $start < $end &&
+ $end - $start < self::HIGHLIGHT_MAX_LINES;
+ }
+
+ /**
+ * Hook into Content::getParserOutput to provide syntax highlighting for
+ * script content.
+ *
+ * @return bool
+ * @since MW 1.21
+ */
+ public static function onContentGetParserOutput( Content $content, Title $title,
+ $revId, ParserOptions $options, $generateHtml, ParserOutput &$output ) {
+
+ global $wgParser, $wgTextModelsToParse;
+
+ if ( !$generateHtml ) {
+ // Nothing special for us to do, let MediaWiki handle this.
+ return true;
+ }
+
+ // Determine the language
+ $extension = ExtensionRegistry::getInstance();
+ $models = $extension->getAttribute( 'SyntaxHighlightModels' );
+ $model = $content->getModel();
+ if ( !isset( $models[$model] ) ) {
+ // We don't care about this model, carry on.
+ return true;
+ }
+ $lexer = $models[$model];
+
+ // Hope that $wgSyntaxHighlightModels does not contain silly types.
+ $text = ContentHandler::getContentText( $content );
+ if ( !$text ) {
+ // Oops! Non-text content? Let MediaWiki handle this.
+ return true;
+ }
+
+ // Parse using the standard parser to get links etc. into the database, HTML is replaced below.
+ // We could do this using $content->fillParserOutput(), but alas it is 'protected'.
+ if ( $content instanceof TextContent && in_array( $model, $wgTextModelsToParse ) ) {
+ $output = $wgParser->parse( $text, $title, $options, true, true, $revId );
+ }
+
+ $status = self::highlight( $text, $lexer );
+ if ( !$status->isOK() ) {
+ return true;
+ }
+ $out = $status->getValue();
+
+ $output->addModuleStyles( 'ext.pygments' );
+ $output->setText( '<div dir="ltr">' . $out . '</div>' );
+
+ // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
+ return false;
+ }
+
+ /**
+ * Hook to provide syntax highlighting for API pretty-printed output
+ *
+ * @param IContextSource $context
+ * @param string $text
+ * @param string $mime
+ * @param string $format
+ * @since MW 1.24
+ */
+ public static function onApiFormatHighlight( IContextSource $context, $text, $mime, $format ) {
+ if ( !isset( self::$mimeLexers[$mime] ) ) {
+ return true;
+ }
+
+ $lexer = self::$mimeLexers[$mime];
+ $status = self::highlight( $text, $lexer );
+ if ( !$status->isOK() ) {
+ return true;
+ }
+
+ $out = $status->getValue();
+ if ( preg_match( '/^<pre([^>]*)>/i', $out, $m ) ) {
+ $attrs = Sanitizer::decodeTagAttributes( $m[1] );
+ $attrs['class'] .= ' api-pretty-content';
+ $encodedAttrs = Sanitizer::safeEncodeTagAttributes( $attrs );
+ $out = '<pre' . $encodedAttrs. '>' . substr( $out, strlen( $m[0] ) );
+ }
+ $output = $context->getOutput();
+ $output->addModuleStyles( 'ext.pygments' );
+ $output->addHTML( '<div dir="ltr">' . $out . '</div>' );
+
+ // Inform MediaWiki that we have parsed this page and it shouldn't mess with it.
+ return false;
+ }
+
+ /**
+ * Reject parser cache values that are for GeSHi since those
+ * ResourceLoader modules no longer exist
+ *
+ * @param ParserOutput $parserOutput
+ * @param WikiPage|Article $page
+ * @param ParserOptions $popts
+ * @return bool
+ */
+ public static function onRejectParserCacheValue(
+ ParserOutput $parserOutput, $page, ParserOptions $popts
+ ) {
+ foreach ( $parserOutput->getModuleStyles() as $module ) {
+ if ( strpos( $module, 'ext.geshi.' ) === 0 ) {
+ $page->getTitle()->purgeSquid();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /** Backward-compatibility shim for extensions. */
+ public static function prepare( $text, $lang ) {
+ wfDeprecated( __METHOD__ );
+ return new GeSHi( self::highlight( $text, $lang )->getValue() );
+ }
+
+ /** Backward-compatibility shim for extensions. */
+ public static function buildHeadItem( $geshi ) {
+ wfDeprecated( __METHOD__ );
+ $geshi->parse_code();
+ return '';
+ }
+}