summaryrefslogtreecommitdiff
path: root/www/wiki/tests/phpunit/includes/parser
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/tests/phpunit/includes/parser')
-rw-r--r--www/wiki/tests/phpunit/includes/parser/CoreParserFunctionsTest.php21
-rw-r--r--www/wiki/tests/phpunit/includes/parser/MagicVariableTest.php232
-rw-r--r--www/wiki/tests/phpunit/includes/parser/ParserIntegrationTest.php65
-rw-r--r--www/wiki/tests/phpunit/includes/parser/ParserMethodsTest.php185
-rw-r--r--www/wiki/tests/phpunit/includes/parser/ParserOptionsTest.php223
-rw-r--r--www/wiki/tests/phpunit/includes/parser/ParserOutputTest.php294
-rw-r--r--www/wiki/tests/phpunit/includes/parser/ParserPreloadTest.php95
-rw-r--r--www/wiki/tests/phpunit/includes/parser/PreprocessorTest.php294
-rw-r--r--www/wiki/tests/phpunit/includes/parser/SanitizerTest.php571
-rw-r--r--www/wiki/tests/phpunit/includes/parser/StripStateTest.php136
-rw-r--r--www/wiki/tests/phpunit/includes/parser/TagHooksTest.php134
-rw-r--r--www/wiki/tests/phpunit/includes/parser/TidyTest.php63
12 files changed, 2313 insertions, 0 deletions
diff --git a/www/wiki/tests/phpunit/includes/parser/CoreParserFunctionsTest.php b/www/wiki/tests/phpunit/includes/parser/CoreParserFunctionsTest.php
new file mode 100644
index 00000000..c6304477
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/CoreParserFunctionsTest.php
@@ -0,0 +1,21 @@
+<?php
+/**
+ * @group Database
+ * @covers CoreParserFunctions
+ */
+class CoreParserFunctionsTest extends MediaWikiTestCase {
+
+ public function testGender() {
+ $user = User::createNew( '*Female' );
+ $user->setOption( 'gender', 'female' );
+ $user->saveSettings();
+
+ $msg = ( new RawMessage( '{{GENDER:*Female|m|f|o}}' ) )->parse();
+ $this->assertEquals( $msg, 'f', 'Works unescaped' );
+ $escapedName = wfEscapeWikiText( '*Female' );
+ $msg2 = ( new RawMessage( '{{GENDER:' . $escapedName . '|m|f|o}}' ) )
+ ->parse();
+ $this->assertEquals( $msg, 'f', 'Works escaped' );
+ }
+
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/MagicVariableTest.php b/www/wiki/tests/phpunit/includes/parser/MagicVariableTest.php
new file mode 100644
index 00000000..86b496e2
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/MagicVariableTest.php
@@ -0,0 +1,232 @@
+<?php
+/**
+ * This file is intended to test magic variables in the parser
+ * It was inspired by Raymond & Matěj Grabovský commenting about r66200
+ *
+ * As of february 2011, it only tests some revisions and date related
+ * magic variables.
+ *
+ * @author Antoine Musso
+ * @copyright Copyright © 2011, Antoine Musso
+ * @file
+ */
+
+/**
+ * @group Database
+ * @covers Parser::getVariableValue
+ */
+class MagicVariableTest extends MediaWikiTestCase {
+ /**
+ * @var Parser
+ */
+ private $testParser = null;
+
+ /**
+ * An array of magicword returned as type integer by the parser
+ * They are usually returned as a string for i18n since we support
+ * persan numbers for example, but some magic explicitly return
+ * them as integer.
+ * @see MagicVariableTest::assertMagic()
+ */
+ private $expectedAsInteger = [
+ 'revisionday',
+ 'revisionmonth1',
+ ];
+
+ /** setup a basic parser object */
+ protected function setUp() {
+ parent::setUp();
+
+ $contLang = Language::factory( 'en' );
+ $this->setMwGlobals( [
+ 'wgLanguageCode' => 'en',
+ 'wgContLang' => $contLang,
+ ] );
+
+ $this->testParser = new Parser();
+ $this->testParser->Options( ParserOptions::newFromUserAndLang( new User, $contLang ) );
+
+ # initialize parser output
+ $this->testParser->clearState();
+
+ # Needs a title to do magic word stuff
+ $title = Title::newFromText( 'Tests' );
+ # Else it needs a db connection just to check if it's a redirect
+ # (when deciding the page language).
+ $title->mRedirect = false;
+
+ $this->testParser->setTitle( $title );
+ }
+
+ /**
+ * @param int $num Upper limit for numbers
+ * @return array Array of numbers from 1 up to $num
+ */
+ private static function createProviderUpTo( $num ) {
+ $ret = [];
+ for ( $i = 1; $i <= $num; $i++ ) {
+ $ret[] = [ $i ];
+ }
+
+ return $ret;
+ }
+
+ /**
+ * @return array Array of months numbers (as an integer)
+ */
+ public static function provideMonths() {
+ return self::createProviderUpTo( 12 );
+ }
+
+ /**
+ * @return array Array of days numbers (as an integer)
+ */
+ public static function provideDays() {
+ return self::createProviderUpTo( 31 );
+ }
+
+ # ############## TESTS #############################################
+ # @todo FIXME:
+ # - those got copy pasted, we can probably make them cleaner
+ # - tests are lacking useful messages
+
+ # day
+
+ /** @dataProvider provideDays */
+ public function testCurrentdayIsUnPadded( $day ) {
+ $this->assertUnPadded( 'currentday', $day );
+ }
+
+ /** @dataProvider provideDays */
+ public function testCurrentdaytwoIsZeroPadded( $day ) {
+ $this->assertZeroPadded( 'currentday2', $day );
+ }
+
+ /** @dataProvider provideDays */
+ public function testLocaldayIsUnPadded( $day ) {
+ $this->assertUnPadded( 'localday', $day );
+ }
+
+ /** @dataProvider provideDays */
+ public function testLocaldaytwoIsZeroPadded( $day ) {
+ $this->assertZeroPadded( 'localday2', $day );
+ }
+
+ # month
+
+ /** @dataProvider provideMonths */
+ public function testCurrentmonthIsZeroPadded( $month ) {
+ $this->assertZeroPadded( 'currentmonth', $month );
+ }
+
+ /** @dataProvider provideMonths */
+ public function testCurrentmonthoneIsUnPadded( $month ) {
+ $this->assertUnPadded( 'currentmonth1', $month );
+ }
+
+ /** @dataProvider provideMonths */
+ public function testLocalmonthIsZeroPadded( $month ) {
+ $this->assertZeroPadded( 'localmonth', $month );
+ }
+
+ /** @dataProvider provideMonths */
+ public function testLocalmonthoneIsUnPadded( $month ) {
+ $this->assertUnPadded( 'localmonth1', $month );
+ }
+
+ # revision day
+
+ /** @dataProvider provideDays */
+ public function testRevisiondayIsUnPadded( $day ) {
+ $this->assertUnPadded( 'revisionday', $day );
+ }
+
+ /** @dataProvider provideDays */
+ public function testRevisiondaytwoIsZeroPadded( $day ) {
+ $this->assertZeroPadded( 'revisionday2', $day );
+ }
+
+ # revision month
+
+ /** @dataProvider provideMonths */
+ public function testRevisionmonthIsZeroPadded( $month ) {
+ $this->assertZeroPadded( 'revisionmonth', $month );
+ }
+
+ /** @dataProvider provideMonths */
+ public function testRevisionmonthoneIsUnPadded( $month ) {
+ $this->assertUnPadded( 'revisionmonth1', $month );
+ }
+
+ # ############## HELPERS ############################################
+
+ /** assertion helper expecting a magic output which is zero padded */
+ public function assertZeroPadded( $magic, $value ) {
+ $this->assertMagicPadding( $magic, $value, '%02d' );
+ }
+
+ /** assertion helper expecting a magic output which is unpadded */
+ public function assertUnPadded( $magic, $value ) {
+ $this->assertMagicPadding( $magic, $value, '%d' );
+ }
+
+ /**
+ * Main assertion helper for magic variables padding
+ * @param string $magic Magic variable name
+ * @param mixed $value Month or day
+ * @param string $format Sprintf format for $value
+ */
+ private function assertMagicPadding( $magic, $value, $format ) {
+ # Initialize parser timestamp as year 2010 at 12h34 56s.
+ # month and day are given by the caller ($value). Month < 12!
+ if ( $value > 12 ) {
+ $month = $value % 12;
+ } else {
+ $month = $value;
+ }
+
+ $this->setParserTS(
+ sprintf( '2010%02d%02d123456', $month, $value )
+ );
+
+ # please keep the following commented line of code. It helps debugging.
+ // print "\nDEBUG (value $value):" . sprintf( '2010%02d%02d123456', $value, $value ) . "\n";
+
+ # format expectation and test it
+ $expected = sprintf( $format, $value );
+ $this->assertMagic( $expected, $magic );
+ }
+
+ /**
+ * helper to set the parser timestamp and revision timestamp
+ * @param string $ts
+ */
+ private function setParserTS( $ts ) {
+ $this->testParser->Options()->setTimestamp( $ts );
+ $this->testParser->mRevisionTimestamp = $ts;
+ }
+
+ /**
+ * Assertion helper to test a magic variable output
+ * @param string|int $expected
+ * @param string $magic
+ */
+ private function assertMagic( $expected, $magic ) {
+ if ( in_array( $magic, $this->expectedAsInteger ) ) {
+ $expected = (int)$expected;
+ }
+
+ # Generate a message for the assertion
+ $msg = sprintf( "Magic %s should be <%s:%s>",
+ $magic,
+ $expected,
+ gettype( $expected )
+ );
+
+ $this->assertSame(
+ $expected,
+ $this->testParser->getVariableValue( $magic ),
+ $msg
+ );
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/ParserIntegrationTest.php b/www/wiki/tests/phpunit/includes/parser/ParserIntegrationTest.php
new file mode 100644
index 00000000..91653b5d
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/ParserIntegrationTest.php
@@ -0,0 +1,65 @@
+<?php
+use Wikimedia\ScopedCallback;
+
+/**
+ * This is the TestCase subclass for running a single parser test via the
+ * ParserTestRunner integration test system.
+ *
+ * Note: the following groups are not used by PHPUnit.
+ * The list in ParserTestFileSuite::__construct() is used instead.
+ *
+ * @group large
+ * @group Database
+ * @group Parser
+ * @group ParserTests
+ *
+ * @covers Parser
+ * @covers BlockLevelPass
+ * @covers CoreParserFunctions
+ * @covers CoreTagHooks
+ * @covers Sanitizer
+ * @covers Preprocessor
+ * @covers Preprocessor_DOM
+ * @covers Preprocessor_Hash
+ * @covers DateFormatter
+ * @covers LinkHolderArray
+ * @covers StripState
+ * @covers ParserOptions
+ * @covers ParserOutput
+ */
+class ParserIntegrationTest extends PHPUnit\Framework\TestCase {
+
+ use MediaWikiCoversValidator;
+
+ /** @var array */
+ private $ptTest;
+
+ /** @var ParserTestRunner */
+ private $ptRunner;
+
+ /** @var ScopedCallback */
+ private $ptTeardownScope;
+
+ public function __construct( $runner, $fileName, $test ) {
+ parent::__construct( 'testParse', [ '[details omitted]' ],
+ basename( $fileName ) . ': ' . $test['desc'] );
+ $this->ptTest = $test;
+ $this->ptRunner = $runner;
+ }
+
+ public function testParse() {
+ $this->ptRunner->getRecorder()->setTestCase( $this );
+ $result = $this->ptRunner->runTest( $this->ptTest );
+ $this->assertEquals( $result->expected, $result->actual );
+ }
+
+ public function setUp() {
+ $this->ptTeardownScope = $this->ptRunner->staticSetup();
+ }
+
+ public function tearDown() {
+ if ( $this->ptTeardownScope ) {
+ ScopedCallback::consume( $this->ptTeardownScope );
+ }
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/ParserMethodsTest.php b/www/wiki/tests/phpunit/includes/parser/ParserMethodsTest.php
new file mode 100644
index 00000000..d2ed4415
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/ParserMethodsTest.php
@@ -0,0 +1,185 @@
+<?php
+
+/**
+ * @group Database
+ * @covers Parser
+ * @covers BlockLevelPass
+ */
+class ParserMethodsTest extends MediaWikiLangTestCase {
+
+ public static function providePreSaveTransform() {
+ return [
+ [ 'hello this is ~~~',
+ "hello this is [[Special:Contributions/127.0.0.1|127.0.0.1]]",
+ ],
+ [ 'hello \'\'this\'\' is <nowiki>~~~</nowiki>',
+ 'hello \'\'this\'\' is <nowiki>~~~</nowiki>',
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider providePreSaveTransform
+ */
+ public function testPreSaveTransform( $text, $expected ) {
+ global $wgParser;
+
+ $title = Title::newFromText( str_replace( '::', '__', __METHOD__ ) );
+ $user = new User();
+ $user->setName( "127.0.0.1" );
+ $popts = ParserOptions::newFromUser( $user );
+ $text = $wgParser->preSaveTransform( $text, $title, $user, $popts );
+
+ $this->assertEquals( $expected, $text );
+ }
+
+ public static function provideStripOuterParagraph() {
+ // This mimics the most common use case (stripping paragraphs generated by the parser).
+ $message = new RawMessage( "Message text." );
+
+ return [
+ [
+ "<p>Text.</p>",
+ "Text.",
+ ],
+ [
+ "<p class='foo'>Text.</p>",
+ "<p class='foo'>Text.</p>",
+ ],
+ [
+ "<p>Text.\n</p>\n",
+ "Text.",
+ ],
+ [
+ "<p>Text.</p><p>More text.</p>",
+ "<p>Text.</p><p>More text.</p>",
+ ],
+ [
+ $message->parse(),
+ "Message text.",
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideStripOuterParagraph
+ */
+ public function testStripOuterParagraph( $text, $expected ) {
+ $this->assertEquals( $expected, Parser::stripOuterParagraph( $text ) );
+ }
+
+ /**
+ * @expectedException MWException
+ * @expectedExceptionMessage Parser state cleared while parsing.
+ * Did you call Parser::parse recursively?
+ */
+ public function testRecursiveParse() {
+ global $wgParser;
+ $title = Title::newFromText( 'foo' );
+ $po = new ParserOptions;
+ $wgParser->setHook( 'recursivecallparser', [ $this, 'helperParserFunc' ] );
+ $wgParser->parse( '<recursivecallparser>baz</recursivecallparser>', $title, $po );
+ }
+
+ public function helperParserFunc( $input, $args, $parser ) {
+ $title = Title::newFromText( 'foo' );
+ $po = new ParserOptions;
+ $parser->parse( $input, $title, $po );
+ return 'bar';
+ }
+
+ public function testCallParserFunction() {
+ global $wgParser;
+
+ // Normal parses test passing PPNodes. Test passing an array.
+ $title = Title::newFromText( str_replace( '::', '__', __METHOD__ ) );
+ $wgParser->startExternalParse( $title, new ParserOptions(), Parser::OT_HTML );
+ $frame = $wgParser->getPreprocessor()->newFrame();
+ $ret = $wgParser->callParserFunction( $frame, '#tag',
+ [ 'pre', 'foo', 'style' => 'margin-left: 1.6em' ]
+ );
+ $ret['text'] = $wgParser->mStripState->unstripBoth( $ret['text'] );
+ $this->assertSame( [
+ 'found' => true,
+ 'text' => '<pre style="margin-left: 1.6em">foo</pre>',
+ ], $ret, 'callParserFunction works for {{#tag:pre|foo|style=margin-left: 1.6em}}' );
+ }
+
+ /**
+ * @covers Parser
+ * @covers ParserOutput::getSections
+ */
+ public function testGetSections() {
+ global $wgParser;
+
+ $title = Title::newFromText( str_replace( '::', '__', __METHOD__ ) );
+ $out = $wgParser->parse( "==foo==\n<h2>bar</h2>\n==baz==\n", $title, new ParserOptions() );
+ $this->assertSame( [
+ [
+ 'toclevel' => 1,
+ 'level' => '2',
+ 'line' => 'foo',
+ 'number' => '1',
+ 'index' => '1',
+ 'fromtitle' => $title->getPrefixedDBkey(),
+ 'byteoffset' => 0,
+ 'anchor' => 'foo',
+ ],
+ [
+ 'toclevel' => 1,
+ 'level' => '2',
+ 'line' => 'bar',
+ 'number' => '2',
+ 'index' => '',
+ 'fromtitle' => false,
+ 'byteoffset' => null,
+ 'anchor' => 'bar',
+ ],
+ [
+ 'toclevel' => 1,
+ 'level' => '2',
+ 'line' => 'baz',
+ 'number' => '3',
+ 'index' => '2',
+ 'fromtitle' => $title->getPrefixedDBkey(),
+ 'byteoffset' => 21,
+ 'anchor' => 'baz',
+ ],
+ ], $out->getSections(), 'getSections() with proper value when <h2> is used' );
+ }
+
+ /**
+ * @dataProvider provideNormalizeLinkUrl
+ */
+ public function testNormalizeLinkUrl( $explanation, $url, $expected ) {
+ $this->assertEquals( $expected, Parser::normalizeLinkUrl( $url ), $explanation );
+ }
+
+ public static function provideNormalizeLinkUrl() {
+ return [
+ [
+ 'Escaping of unsafe characters',
+ 'http://example.org/foo bar?param[]="value"&param[]=valüe',
+ 'http://example.org/foo%20bar?param%5B%5D=%22value%22&param%5B%5D=val%C3%BCe',
+ ],
+ [
+ 'Case normalization of percent-encoded characters',
+ 'http://example.org/%ab%cD%Ef%FF',
+ 'http://example.org/%AB%CD%EF%FF',
+ ],
+ [
+ 'Unescaping of safe characters',
+ 'http://example.org/%3C%66%6f%6F%3E?%3C%66%6f%6F%3E#%3C%66%6f%6F%3E',
+ 'http://example.org/%3Cfoo%3E?%3Cfoo%3E#%3Cfoo%3E',
+ ],
+ [
+ 'Context-sensitive replacement of sometimes-safe characters',
+ 'http://example.org/%23%2F%3F%26%3D%2B%3B?%23%2F%3F%26%3D%2B%3B#%23%2F%3F%26%3D%2B%3B',
+ 'http://example.org/%23%2F%3F&=+;?%23/?%26%3D%2B%3B#%23/?&=+;',
+ ],
+ ];
+ }
+
+ // @todo Add tests for cleanSig() / cleanSigInSig(), getSection(),
+ // replaceSection(), getPreloadText()
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/ParserOptionsTest.php b/www/wiki/tests/phpunit/includes/parser/ParserOptionsTest.php
new file mode 100644
index 00000000..e2ed1d57
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/ParserOptionsTest.php
@@ -0,0 +1,223 @@
+<?php
+
+use Wikimedia\TestingAccessWrapper;
+use Wikimedia\ScopedCallback;
+
+/**
+ * @covers ParserOptions
+ */
+class ParserOptionsTest extends MediaWikiTestCase {
+
+ private static function clearCache() {
+ $wrap = TestingAccessWrapper::newFromClass( ParserOptions::class );
+ $wrap->defaults = null;
+ $wrap->lazyOptions = [
+ 'dateformat' => [ ParserOptions::class, 'initDateFormat' ],
+ ];
+ $wrap->inCacheKey = [
+ 'dateformat' => true,
+ 'numberheadings' => true,
+ 'thumbsize' => true,
+ 'stubthreshold' => true,
+ 'printable' => true,
+ 'userlang' => true,
+ ];
+ }
+
+ protected function setUp() {
+ global $wgHooks;
+
+ parent::setUp();
+ self::clearCache();
+
+ $this->setMwGlobals( [
+ 'wgRenderHashAppend' => '',
+ 'wgHooks' => [
+ 'PageRenderingHash' => [],
+ ] + $wgHooks,
+ ] );
+ }
+
+ protected function tearDown() {
+ self::clearCache();
+ parent::tearDown();
+ }
+
+ /**
+ * @dataProvider provideIsSafeToCache
+ * @param bool $expect Expected value
+ * @param array $options Options to set
+ */
+ public function testIsSafeToCache( $expect, $options ) {
+ $popt = ParserOptions::newCanonical();
+ foreach ( $options as $name => $value ) {
+ $popt->setOption( $name, $value );
+ }
+ $this->assertSame( $expect, $popt->isSafeToCache() );
+ }
+
+ public static function provideIsSafeToCache() {
+ return [
+ 'No overrides' => [ true, [] ],
+ 'In-key options are ok' => [ true, [
+ 'thumbsize' => 1e100,
+ 'printable' => false,
+ ] ],
+ 'Non-in-key options are not ok' => [ false, [
+ 'removeComments' => false,
+ ] ],
+ 'Non-in-key options are not ok (2)' => [ false, [
+ 'wrapclass' => 'foobar',
+ ] ],
+ 'Canonical override, not default (1)' => [ true, [
+ 'tidy' => true,
+ ] ],
+ 'Canonical override, not default (2)' => [ false, [
+ 'tidy' => false,
+ ] ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideOptionsHash
+ * @param array $usedOptions Used options
+ * @param string $expect Expected value
+ * @param array $options Options to set
+ * @param array $globals Globals to set
+ */
+ public function testOptionsHash( $usedOptions, $expect, $options, $globals = [] ) {
+ global $wgHooks;
+
+ $globals += [
+ 'wgHooks' => [],
+ ];
+ $globals['wgHooks'] += [
+ 'PageRenderingHash' => [],
+ ] + $wgHooks;
+ $this->setMwGlobals( $globals );
+
+ $popt = ParserOptions::newCanonical();
+ foreach ( $options as $name => $value ) {
+ $popt->setOption( $name, $value );
+ }
+ $this->assertSame( $expect, $popt->optionsHash( $usedOptions ) );
+ }
+
+ public static function provideOptionsHash() {
+ $used = [ 'thumbsize', 'printable' ];
+
+ $classWrapper = TestingAccessWrapper::newFromClass( ParserOptions::class );
+ $classWrapper->getDefaults();
+ $allUsableOptions = array_diff(
+ array_keys( $classWrapper->inCacheKey ),
+ array_keys( $classWrapper->lazyOptions )
+ );
+
+ return [
+ 'Canonical options, nothing used' => [ [], 'canonical', [] ],
+ 'Canonical options, used some options' => [ $used, 'canonical', [] ],
+ 'Used some options, non-default values' => [
+ $used,
+ 'printable=1!thumbsize=200',
+ [
+ 'thumbsize' => 200,
+ 'printable' => true,
+ ]
+ ],
+ 'Canonical options, used all non-lazy options' => [ $allUsableOptions, 'canonical', [] ],
+ 'Canonical options, nothing used, but with hooks and $wgRenderHashAppend' => [
+ [],
+ 'canonical!wgRenderHashAppend!onPageRenderingHash',
+ [],
+ [
+ 'wgRenderHashAppend' => '!wgRenderHashAppend',
+ 'wgHooks' => [ 'PageRenderingHash' => [ [ __CLASS__ . '::onPageRenderingHash' ] ] ],
+ ]
+ ],
+ ];
+ }
+
+ public static function onPageRenderingHash( &$confstr ) {
+ $confstr .= '!onPageRenderingHash';
+ }
+
+ /**
+ * @expectedException InvalidArgumentException
+ * @expectedExceptionMessage Unknown parser option bogus
+ */
+ public function testGetInvalidOption() {
+ $popt = ParserOptions::newCanonical();
+ $popt->getOption( 'bogus' );
+ }
+
+ /**
+ * @expectedException InvalidArgumentException
+ * @expectedExceptionMessage Unknown parser option bogus
+ */
+ public function testSetInvalidOption() {
+ $popt = ParserOptions::newCanonical();
+ $popt->setOption( 'bogus', true );
+ }
+
+ public function testMatches() {
+ $classWrapper = TestingAccessWrapper::newFromClass( ParserOptions::class );
+ $oldDefaults = $classWrapper->defaults;
+ $oldLazy = $classWrapper->lazyOptions;
+ $reset = new ScopedCallback( function () use ( $classWrapper, $oldDefaults, $oldLazy ) {
+ $classWrapper->defaults = $oldDefaults;
+ $classWrapper->lazyOptions = $oldLazy;
+ } );
+
+ $popt1 = ParserOptions::newCanonical();
+ $popt2 = ParserOptions::newCanonical();
+ $this->assertTrue( $popt1->matches( $popt2 ) );
+
+ $popt1->enableLimitReport( true );
+ $popt2->enableLimitReport( false );
+ $this->assertTrue( $popt1->matches( $popt2 ) );
+
+ $popt2->setTidy( !$popt2->getTidy() );
+ $this->assertFalse( $popt1->matches( $popt2 ) );
+
+ $ctr = 0;
+ $classWrapper->defaults += [ __METHOD__ => null ];
+ $classWrapper->lazyOptions += [ __METHOD__ => function () use ( &$ctr ) {
+ return ++$ctr;
+ } ];
+ $popt1 = ParserOptions::newCanonical();
+ $popt2 = ParserOptions::newCanonical();
+ $this->assertFalse( $popt1->matches( $popt2 ) );
+
+ ScopedCallback::consume( $reset );
+ }
+
+ public function testAllCacheVaryingOptions() {
+ global $wgHooks;
+
+ // $wgHooks is already saved in self::setUp(), so we can modify it freely here
+ $wgHooks['ParserOptionsRegister'] = [];
+ $this->assertSame( [
+ 'dateformat', 'numberheadings', 'printable', 'stubthreshold',
+ 'thumbsize', 'userlang'
+ ], ParserOptions::allCacheVaryingOptions() );
+
+ self::clearCache();
+
+ $wgHooks['ParserOptionsRegister'][] = function ( &$defaults, &$inCacheKey ) {
+ $defaults += [
+ 'foo' => 'foo',
+ 'bar' => 'bar',
+ 'baz' => 'baz',
+ ];
+ $inCacheKey += [
+ 'foo' => true,
+ 'bar' => false,
+ ];
+ };
+ $this->assertSame( [
+ 'dateformat', 'foo', 'numberheadings', 'printable', 'stubthreshold',
+ 'thumbsize', 'userlang'
+ ], ParserOptions::allCacheVaryingOptions() );
+ }
+
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/ParserOutputTest.php b/www/wiki/tests/phpunit/includes/parser/ParserOutputTest.php
new file mode 100644
index 00000000..b08ba6c4
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/ParserOutputTest.php
@@ -0,0 +1,294 @@
+<?php
+
+/**
+ * @group Database
+ * ^--- trigger DB shadowing because we are using Title magic
+ */
+class ParserOutputTest extends MediaWikiTestCase {
+
+ public static function provideIsLinkInternal() {
+ return [
+ // Different domains
+ [ false, 'http://example.org', 'http://mediawiki.org' ],
+ // Same domains
+ [ true, 'http://example.org', 'http://example.org' ],
+ [ true, 'https://example.org', 'https://example.org' ],
+ [ true, '//example.org', '//example.org' ],
+ // Same domain different cases
+ [ true, 'http://example.org', 'http://EXAMPLE.ORG' ],
+ // Paths, queries, and fragments are not relevant
+ [ true, 'http://example.org', 'http://example.org/wiki/Main_Page' ],
+ [ true, 'http://example.org', 'http://example.org?my=query' ],
+ [ true, 'http://example.org', 'http://example.org#its-a-fragment' ],
+ // Different protocols
+ [ false, 'http://example.org', 'https://example.org' ],
+ [ false, 'https://example.org', 'http://example.org' ],
+ // Protocol relative servers always match http and https links
+ [ true, '//example.org', 'http://example.org' ],
+ [ true, '//example.org', 'https://example.org' ],
+ // But they don't match strange things like this
+ [ false, '//example.org', 'irc://example.org' ],
+ ];
+ }
+
+ /**
+ * Test to make sure ParserOutput::isLinkInternal behaves properly
+ * @dataProvider provideIsLinkInternal
+ * @covers ParserOutput::isLinkInternal
+ */
+ public function testIsLinkInternal( $shouldMatch, $server, $url ) {
+ $this->assertEquals( $shouldMatch, ParserOutput::isLinkInternal( $server, $url ) );
+ }
+
+ /**
+ * @covers ParserOutput::setExtensionData
+ * @covers ParserOutput::getExtensionData
+ */
+ public function testExtensionData() {
+ $po = new ParserOutput();
+
+ $po->setExtensionData( "one", "Foo" );
+
+ $this->assertEquals( "Foo", $po->getExtensionData( "one" ) );
+ $this->assertNull( $po->getExtensionData( "spam" ) );
+
+ $po->setExtensionData( "two", "Bar" );
+ $this->assertEquals( "Foo", $po->getExtensionData( "one" ) );
+ $this->assertEquals( "Bar", $po->getExtensionData( "two" ) );
+
+ $po->setExtensionData( "one", null );
+ $this->assertNull( $po->getExtensionData( "one" ) );
+ $this->assertEquals( "Bar", $po->getExtensionData( "two" ) );
+ }
+
+ /**
+ * @covers ParserOutput::setProperty
+ * @covers ParserOutput::getProperty
+ * @covers ParserOutput::unsetProperty
+ * @covers ParserOutput::getProperties
+ */
+ public function testProperties() {
+ $po = new ParserOutput();
+
+ $po->setProperty( 'foo', 'val' );
+
+ $properties = $po->getProperties();
+ $this->assertEquals( $po->getProperty( 'foo' ), 'val' );
+ $this->assertEquals( $properties['foo'], 'val' );
+
+ $po->setProperty( 'foo', 'second val' );
+
+ $properties = $po->getProperties();
+ $this->assertEquals( $po->getProperty( 'foo' ), 'second val' );
+ $this->assertEquals( $properties['foo'], 'second val' );
+
+ $po->unsetProperty( 'foo' );
+
+ $properties = $po->getProperties();
+ $this->assertEquals( $po->getProperty( 'foo' ), false );
+ $this->assertArrayNotHasKey( 'foo', $properties );
+ }
+
+ /**
+ * @covers ParserOutput::getText
+ * @dataProvider provideGetText
+ * @param array $options Options to getText()
+ * @param string $text Parser text
+ * @param string $expect Expected output
+ */
+ public function testGetText( $options, $text, $expect ) {
+ $this->setMwGlobals( [
+ 'wgArticlePath' => '/wiki/$1',
+ 'wgScriptPath' => '/w',
+ 'wgScript' => '/w/index.php',
+ ] );
+
+ $po = new ParserOutput( $text );
+ $actual = $po->getText( $options );
+ $this->assertSame( $expect, $actual );
+ }
+
+ public static function provideGetText() {
+ // phpcs:disable Generic.Files.LineLength
+ $text = <<<EOF
+<div class="mw-parser-output"><p>Test document.
+</p>
+<mw:toc><div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Section_1"><span class="tocnumber">1</span> <span class="toctext">Section 1</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#Section_2"><span class="tocnumber">2</span> <span class="toctext">Section 2</span></a>
+<ul>
+<li class="toclevel-2 tocsection-3"><a href="#Section_2.1"><span class="tocnumber">2.1</span> <span class="toctext">Section 2.1</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1 tocsection-4"><a href="#Section_3"><span class="tocnumber">3</span> <span class="toctext">Section 3</span></a></li>
+</ul>
+</div>
+</mw:toc>
+<h2><span class="mw-headline" id="Section_1">Section 1</span><mw:editsection page="Test Page" section="1">Section 1</mw:editsection></h2>
+<p>One
+</p>
+<h2><span class="mw-headline" id="Section_2">Section 2</span><mw:editsection page="Test Page" section="2">Section 2</mw:editsection></h2>
+<p>Two
+</p>
+<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span><mw:editsection page="Test Page" section="3">Section 2.1</mw:editsection></h3>
+<p>Two point one
+</p>
+<h2><span class="mw-headline" id="Section_3">Section 3</span><mw:editsection page="Test Page" section="4">Section 3</mw:editsection></h2>
+<p>Three
+</p></div>
+EOF;
+
+ $dedupText = <<<EOF
+<p>This is a test document.</p>
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
+<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
+<style data-mw-deduplicate="duplicate1">.Same-attribute-different-content {}</style>
+<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
+<style>.Duplicate1 {}</style>
+EOF;
+
+ return [
+ 'No options' => [
+ [], $text, <<<EOF
+<div class="mw-parser-output"><p>Test document.
+</p>
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Section_1"><span class="tocnumber">1</span> <span class="toctext">Section 1</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#Section_2"><span class="tocnumber">2</span> <span class="toctext">Section 2</span></a>
+<ul>
+<li class="toclevel-2 tocsection-3"><a href="#Section_2.1"><span class="tocnumber">2.1</span> <span class="toctext">Section 2.1</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1 tocsection-4"><a href="#Section_3"><span class="tocnumber">3</span> <span class="toctext">Section 3</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>One
+</p>
+<h2><span class="mw-headline" id="Section_2">Section 2</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=2" title="Edit section: Section 2">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Two
+</p>
+<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=3" title="Edit section: Section 2.1">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
+<p>Two point one
+</p>
+<h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Three
+</p></div>
+EOF
+ ],
+ 'Disable section edit links' => [
+ [ 'enableSectionEditLinks' => false ], $text, <<<EOF
+<div class="mw-parser-output"><p>Test document.
+</p>
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Section_1"><span class="tocnumber">1</span> <span class="toctext">Section 1</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#Section_2"><span class="tocnumber">2</span> <span class="toctext">Section 2</span></a>
+<ul>
+<li class="toclevel-2 tocsection-3"><a href="#Section_2.1"><span class="tocnumber">2.1</span> <span class="toctext">Section 2.1</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1 tocsection-4"><a href="#Section_3"><span class="tocnumber">3</span> <span class="toctext">Section 3</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Section_1">Section 1</span></h2>
+<p>One
+</p>
+<h2><span class="mw-headline" id="Section_2">Section 2</span></h2>
+<p>Two
+</p>
+<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span></h3>
+<p>Two point one
+</p>
+<h2><span class="mw-headline" id="Section_3">Section 3</span></h2>
+<p>Three
+</p></div>
+EOF
+ ],
+ 'Disable TOC' => [
+ [ 'allowTOC' => false ], $text, <<<EOF
+<div class="mw-parser-output"><p>Test document.
+</p>
+
+<h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>One
+</p>
+<h2><span class="mw-headline" id="Section_2">Section 2</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=2" title="Edit section: Section 2">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Two
+</p>
+<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=3" title="Edit section: Section 2.1">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
+<p>Two point one
+</p>
+<h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Three
+</p></div>
+EOF
+ ],
+ 'Unwrap text' => [
+ [ 'unwrap' => true ], $text, <<<EOF
+<p>Test document.
+</p>
+<div id="toc" class="toc"><div class="toctitle"><h2>Contents</h2></div>
+<ul>
+<li class="toclevel-1 tocsection-1"><a href="#Section_1"><span class="tocnumber">1</span> <span class="toctext">Section 1</span></a></li>
+<li class="toclevel-1 tocsection-2"><a href="#Section_2"><span class="tocnumber">2</span> <span class="toctext">Section 2</span></a>
+<ul>
+<li class="toclevel-2 tocsection-3"><a href="#Section_2.1"><span class="tocnumber">2.1</span> <span class="toctext">Section 2.1</span></a></li>
+</ul>
+</li>
+<li class="toclevel-1 tocsection-4"><a href="#Section_3"><span class="tocnumber">3</span> <span class="toctext">Section 3</span></a></li>
+</ul>
+</div>
+
+<h2><span class="mw-headline" id="Section_1">Section 1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=1" title="Edit section: Section 1">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>One
+</p>
+<h2><span class="mw-headline" id="Section_2">Section 2</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=2" title="Edit section: Section 2">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Two
+</p>
+<h3><span class="mw-headline" id="Section_2.1">Section 2.1</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=3" title="Edit section: Section 2.1">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
+<p>Two point one
+</p>
+<h2><span class="mw-headline" id="Section_3">Section 3</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Test_Page&amp;action=edit&amp;section=4" title="Edit section: Section 3">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<p>Three
+</p>
+EOF
+ ],
+ 'Unwrap without a mw-parser-output wrapper' => [
+ [ 'unwrap' => true ], '<div class="foobar">Content</div>', '<div class="foobar">Content</div>'
+ ],
+ 'Unwrap with extra comment at end' => [
+ [ 'unwrap' => true ], '<div class="mw-parser-output"><p>Test document.</p></div>
+<!-- Saved in parser cache... -->', '<p>Test document.</p>
+<!-- Saved in parser cache... -->'
+ ],
+ 'Style deduplication' => [
+ [], $dedupText, <<<EOF
+<p>This is a test document.</p>
+<style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style>
+<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"/>
+<style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style>
+<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"/>
+<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2"/>
+<style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style>
+<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"/>
+<style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style>
+<style>.Duplicate1 {}</style>
+EOF
+ ],
+ 'Style deduplication disabled' => [
+ [ 'deduplicateStyles' => false ], $dedupText, $dedupText
+ ],
+ ];
+ // phpcs:enable
+ }
+
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/ParserPreloadTest.php b/www/wiki/tests/phpunit/includes/parser/ParserPreloadTest.php
new file mode 100644
index 00000000..77073955
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/ParserPreloadTest.php
@@ -0,0 +1,95 @@
+<?php
+/**
+ * Basic tests for Parser::getPreloadText
+ * @author Antoine Musso
+ *
+ * @covers Parser
+ * @covers StripState
+ *
+ * @covers Preprocessor_DOM
+ * @covers PPDStack
+ * @covers PPDStackElement
+ * @covers PPDPart
+ * @covers PPFrame_DOM
+ * @covers PPTemplateFrame_DOM
+ * @covers PPCustomFrame_DOM
+ * @covers PPNode_DOM
+ *
+ * @covers Preprocessor_Hash
+ * @covers PPDStack_Hash
+ * @covers PPDStackElement_Hash
+ * @covers PPDPart_Hash
+ * @covers PPFrame_Hash
+ * @covers PPTemplateFrame_Hash
+ * @covers PPCustomFrame_Hash
+ * @covers PPNode_Hash_Tree
+ * @covers PPNode_Hash_Text
+ * @covers PPNode_Hash_Array
+ * @covers PPNode_Hash_Attr
+ */
+class ParserPreloadTest extends MediaWikiTestCase {
+ /**
+ * @var Parser
+ */
+ private $testParser;
+ /**
+ * @var ParserOptions
+ */
+ private $testParserOptions;
+ /**
+ * @var Title
+ */
+ private $title;
+
+ protected function setUp() {
+ global $wgContLang;
+
+ parent::setUp();
+ $this->testParserOptions = ParserOptions::newFromUserAndLang( new User, $wgContLang );
+
+ $this->testParser = new Parser();
+ $this->testParser->Options( $this->testParserOptions );
+ $this->testParser->clearState();
+
+ $this->title = Title::newFromText( 'Preload Test' );
+ }
+
+ protected function tearDown() {
+ parent::tearDown();
+
+ unset( $this->testParser );
+ unset( $this->title );
+ }
+
+ public function testPreloadSimpleText() {
+ $this->assertPreloaded( 'simple', 'simple' );
+ }
+
+ public function testPreloadedPreIsUnstripped() {
+ $this->assertPreloaded(
+ '<pre>monospaced</pre>',
+ '<pre>monospaced</pre>',
+ '<pre> in preloaded text must be unstripped (T29467)'
+ );
+ }
+
+ public function testPreloadedNowikiIsUnstripped() {
+ $this->assertPreloaded(
+ '<nowiki>[[Dummy title]]</nowiki>',
+ '<nowiki>[[Dummy title]]</nowiki>',
+ '<nowiki> in preloaded text must be unstripped (T29467)'
+ );
+ }
+
+ protected function assertPreloaded( $expected, $text, $msg = '' ) {
+ $this->assertEquals(
+ $expected,
+ $this->testParser->getPreloadText(
+ $text,
+ $this->title,
+ $this->testParserOptions
+ ),
+ $msg
+ );
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/PreprocessorTest.php b/www/wiki/tests/phpunit/includes/parser/PreprocessorTest.php
new file mode 100644
index 00000000..c415b586
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/PreprocessorTest.php
@@ -0,0 +1,294 @@
+<?php
+
+/**
+ * @covers Preprocessor
+ *
+ * @covers Preprocessor_DOM
+ * @covers PPDStack
+ * @covers PPDStackElement
+ * @covers PPDPart
+ * @covers PPFrame_DOM
+ * @covers PPTemplateFrame_DOM
+ * @covers PPCustomFrame_DOM
+ * @covers PPNode_DOM
+ *
+ * @covers Preprocessor_Hash
+ * @covers PPDStack_Hash
+ * @covers PPDStackElement_Hash
+ * @covers PPDPart_Hash
+ * @covers PPFrame_Hash
+ * @covers PPTemplateFrame_Hash
+ * @covers PPCustomFrame_Hash
+ * @covers PPNode_Hash_Tree
+ * @covers PPNode_Hash_Text
+ * @covers PPNode_Hash_Array
+ * @covers PPNode_Hash_Attr
+ */
+class PreprocessorTest extends MediaWikiTestCase {
+ protected $mTitle = 'Page title';
+ protected $mPPNodeCount = 0;
+ /**
+ * @var ParserOptions
+ */
+ protected $mOptions;
+ /**
+ * @var array
+ */
+ protected $mPreprocessors;
+
+ protected static $classNames = [
+ Preprocessor_DOM::class,
+ Preprocessor_Hash::class
+ ];
+
+ protected function setUp() {
+ global $wgContLang;
+ parent::setUp();
+ $this->mOptions = ParserOptions::newFromUserAndLang( new User, $wgContLang );
+
+ $this->mPreprocessors = [];
+ foreach ( self::$classNames as $className ) {
+ $this->mPreprocessors[$className] = new $className( $this );
+ }
+ }
+
+ function getStripList() {
+ return [ 'gallery', 'display map' /* Used by Maps, see r80025 CR */, '/foo' ];
+ }
+
+ protected static function addClassArg( $testCases ) {
+ $newTestCases = [];
+ foreach ( self::$classNames as $className ) {
+ foreach ( $testCases as $testCase ) {
+ array_unshift( $testCase, $className );
+ $newTestCases[] = $testCase;
+ }
+ }
+ return $newTestCases;
+ }
+
+ public static function provideCases() {
+ // phpcs:disable Generic.Files.LineLength
+ return self::addClassArg( [
+ [ "Foo", "<root>Foo</root>" ],
+ [ "<!-- Foo -->", "<root><comment>&lt;!-- Foo --&gt;</comment></root>" ],
+ [ "<!-- Foo --><!-- Bar -->", "<root><comment>&lt;!-- Foo --&gt;</comment><comment>&lt;!-- Bar --&gt;</comment></root>" ],
+ [ "<!-- Foo --> <!-- Bar -->", "<root><comment>&lt;!-- Foo --&gt;</comment> <comment>&lt;!-- Bar --&gt;</comment></root>" ],
+ [ "<!-- Foo --> \n <!-- Bar -->", "<root><comment>&lt;!-- Foo --&gt;</comment> \n <comment>&lt;!-- Bar --&gt;</comment></root>" ],
+ [ "<!-- Foo --> \n <!-- Bar -->\n", "<root><comment>&lt;!-- Foo --&gt;</comment> \n<comment> &lt;!-- Bar --&gt;\n</comment></root>" ],
+ [ "<!-- Foo --> <!-- Bar -->\n", "<root><comment>&lt;!-- Foo --&gt;</comment> <comment>&lt;!-- Bar --&gt;</comment>\n</root>" ],
+ [ "<!-->Bar", "<root><comment>&lt;!--&gt;Bar</comment></root>" ],
+ [ "<!-- Comment -- comment", "<root><comment>&lt;!-- Comment -- comment</comment></root>" ],
+ [ "== Foo ==\n <!-- Bar -->\n== Baz ==\n", "<root><h level=\"2\" i=\"1\">== Foo ==</h>\n<comment> &lt;!-- Bar --&gt;\n</comment><h level=\"2\" i=\"2\">== Baz ==</h>\n</root>" ],
+ [ "<gallery/>", "<root><ext><name>gallery</name><attr></attr></ext></root>" ],
+ [ "Foo <gallery/> Bar", "<root>Foo <ext><name>gallery</name><attr></attr></ext> Bar</root>" ],
+ [ "<gallery></gallery>", "<root><ext><name>gallery</name><attr></attr><inner></inner><close>&lt;/gallery&gt;</close></ext></root>" ],
+ [ "<foo> <gallery></gallery>", "<root>&lt;foo&gt; <ext><name>gallery</name><attr></attr><inner></inner><close>&lt;/gallery&gt;</close></ext></root>" ],
+ [ "<foo> <gallery><gallery></gallery>", "<root>&lt;foo&gt; <ext><name>gallery</name><attr></attr><inner>&lt;gallery&gt;</inner><close>&lt;/gallery&gt;</close></ext></root>" ],
+ [ "<noinclude> Foo bar </noinclude>", "<root><ignore>&lt;noinclude&gt;</ignore> Foo bar <ignore>&lt;/noinclude&gt;</ignore></root>" ],
+ [ "<noinclude>\n{{Foo}}\n</noinclude>", "<root><ignore>&lt;noinclude&gt;</ignore>\n<template lineStart=\"1\"><title>Foo</title></template>\n<ignore>&lt;/noinclude&gt;</ignore></root>" ],
+ [ "<noinclude>\n{{Foo}}\n</noinclude>\n", "<root><ignore>&lt;noinclude&gt;</ignore>\n<template lineStart=\"1\"><title>Foo</title></template>\n<ignore>&lt;/noinclude&gt;</ignore>\n</root>" ],
+ [ "<gallery>foo bar", "<root>&lt;gallery&gt;foo bar</root>" ],
+ [ "<{{foo}}>", "<root>&lt;<template><title>foo</title></template>&gt;</root>" ],
+ [ "<{{{foo}}}>", "<root>&lt;<tplarg><title>foo</title></tplarg>&gt;</root>" ],
+ [ "<gallery></gallery</gallery>", "<root><ext><name>gallery</name><attr></attr><inner>&lt;/gallery</inner><close>&lt;/gallery&gt;</close></ext></root>" ],
+ [ "=== Foo === ", "<root><h level=\"3\" i=\"1\">=== Foo === </h></root>" ],
+ [ "==<!-- -->= Foo === ", "<root><h level=\"2\" i=\"1\">==<comment>&lt;!-- --&gt;</comment>= Foo === </h></root>" ],
+ [ "=== Foo ==<!-- -->= ", "<root><h level=\"1\" i=\"1\">=== Foo ==<comment>&lt;!-- --&gt;</comment>= </h></root>" ],
+ [ "=== Foo ===<!-- -->\n", "<root><h level=\"3\" i=\"1\">=== Foo ===<comment>&lt;!-- --&gt;</comment></h>\n</root>" ],
+ [ "=== Foo ===<!-- --> <!-- -->\n", "<root><h level=\"3\" i=\"1\">=== Foo ===<comment>&lt;!-- --&gt;</comment> <comment>&lt;!-- --&gt;</comment></h>\n</root>" ],
+ [ "== Foo ==\n== Bar == \n", "<root><h level=\"2\" i=\"1\">== Foo ==</h>\n<h level=\"2\" i=\"2\">== Bar == </h>\n</root>" ],
+ [ "===========", "<root><h level=\"5\" i=\"1\">===========</h></root>" ],
+ [ "Foo\n=\n==\n=\n", "<root>Foo\n=\n==\n=\n</root>" ],
+ [ "{{Foo}}", "<root><template><title>Foo</title></template></root>" ],
+ [ "\n{{Foo}}", "<root>\n<template lineStart=\"1\"><title>Foo</title></template></root>" ],
+ [ "{{Foo|bar}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part></template></root>" ],
+ [ "{{Foo|bar}}a", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part></template>a</root>" ],
+ [ "{{Foo|bar|baz}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part><part><name index=\"2\" /><value>baz</value></part></template></root>" ],
+ [ "{{Foo|1=bar}}", "<root><template><title>Foo</title><part><name>1</name>=<value>bar</value></part></template></root>" ],
+ [ "{{Foo|=bar}}", "<root><template><title>Foo</title><part><name></name>=<value>bar</value></part></template></root>" ],
+ [ "{{Foo|bar=baz}}", "<root><template><title>Foo</title><part><name>bar</name>=<value>baz</value></part></template></root>" ],
+ [ "{{Foo|{{bar}}=baz}}", "<root><template><title>Foo</title><part><name><template><title>bar</title></template></name>=<value>baz</value></part></template></root>" ],
+ [ "{{Foo|1=bar|baz}}", "<root><template><title>Foo</title><part><name>1</name>=<value>bar</value></part><part><name index=\"1\" /><value>baz</value></part></template></root>" ],
+ [ "{{Foo|1=bar|2=baz}}", "<root><template><title>Foo</title><part><name>1</name>=<value>bar</value></part><part><name>2</name>=<value>baz</value></part></template></root>" ],
+ [ "{{Foo|bar|foo=baz}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>bar</value></part><part><name>foo</name>=<value>baz</value></part></template></root>" ],
+ [ "{{{1}}}", "<root><tplarg><title>1</title></tplarg></root>" ],
+ [ "{{{1|}}}", "<root><tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg></root>" ],
+ [ "{{{Foo}}}", "<root><tplarg><title>Foo</title></tplarg></root>" ],
+ [ "{{{Foo|}}}", "<root><tplarg><title>Foo</title><part><name index=\"1\" /><value></value></part></tplarg></root>" ],
+ [ "{{{Foo|bar|baz}}}", "<root><tplarg><title>Foo</title><part><name index=\"1\" /><value>bar</value></part><part><name index=\"2\" /><value>baz</value></part></tplarg></root>" ],
+ [ "{<!-- -->{Foo}}", "<root>{<comment>&lt;!-- --&gt;</comment>{Foo}}</root>" ],
+ [ "{{{{Foobar}}}}", "<root>{<tplarg><title>Foobar</title></tplarg>}</root>" ],
+ [ "{{{ {{Foo}} }}}", "<root><tplarg><title> <template><title>Foo</title></template> </title></tplarg></root>" ],
+ [ "{{ {{{Foo}}} }}", "<root><template><title> <tplarg><title>Foo</title></tplarg> </title></template></root>" ],
+ [ "{{{{{Foo}}}}}", "<root><template><title><tplarg><title>Foo</title></tplarg></title></template></root>" ],
+ [ "{{{{{Foo}} }}}", "<root><tplarg><title><template><title>Foo</title></template> </title></tplarg></root>" ],
+ [ "{{{{{{Foo}}}}}}", "<root><tplarg><title><tplarg><title>Foo</title></tplarg></title></tplarg></root>" ],
+ [ "{{{{{{Foo}}}}}", "<root>{<template><title><tplarg><title>Foo</title></tplarg></title></template></root>" ],
+ [ "[[[Foo]]", "<root>[[[Foo]]</root>" ],
+ [ "{{Foo|[[[[bar]]|baz]]}}", "<root><template><title>Foo</title><part><name index=\"1\" /><value>[[[[bar]]|baz]]</value></part></template></root>" ], // This test is important, since it means the difference between having the [[ rule stacked or not
+ [ "{{Foo|[[[[bar]|baz]]}}", "<root>{{Foo|[[[[bar]|baz]]}}</root>" ],
+ [ "{{Foo|Foo [[[[bar]|baz]]}}", "<root>{{Foo|Foo [[[[bar]|baz]]}}</root>" ],
+ [ "Foo <display map>Bar</display map >Baz", "<root>Foo <ext><name>display map</name><attr></attr><inner>Bar</inner><close>&lt;/display map &gt;</close></ext>Baz</root>" ],
+ [ "Foo <display map foo>Bar</display map >Baz", "<root>Foo <ext><name>display map</name><attr> foo</attr><inner>Bar</inner><close>&lt;/display map &gt;</close></ext>Baz</root>" ],
+ [ "Foo <gallery bar=\"baz\" />", "<root>Foo <ext><name>gallery</name><attr> bar=&quot;baz&quot; </attr></ext></root>" ],
+ [ "Foo <gallery bar=\"1\" baz=2 />", "<root>Foo <ext><name>gallery</name><attr> bar=&quot;1&quot; baz=2 </attr></ext></root>" ],
+ [ "</foo>Foo<//foo>", "<root><ext><name>/foo</name><attr></attr><inner>Foo</inner><close>&lt;//foo&gt;</close></ext></root>" ], # Worth blacklisting IMHO
+ [ "{{#ifexpr: ({{{1|1}}} = 2) | Foo | Bar }}", "<root><template><title>#ifexpr: (<tplarg><title>1</title><part><name index=\"1\" /><value>1</value></part></tplarg> = 2) </title><part><name index=\"1\" /><value> Foo </value></part><part><name index=\"2\" /><value> Bar </value></part></template></root>" ],
+ [ "{{#if: {{{1|}}} | Foo | {{Bar}} }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> Foo </value></part><part><name index=\"2\" /><value> <template><title>Bar</title></template> </value></part></template></root>" ],
+ [ "{{#if: {{{1|}}} | Foo | [[Bar]] }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> Foo </value></part><part><name index=\"2\" /><value> [[Bar]] </value></part></template></root>" ],
+ [ "{{#if: {{{1|}}} | [[Foo]] | Bar }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> [[Foo]] </value></part><part><name index=\"2\" /><value> Bar </value></part></template></root>" ],
+ [ "{{#if: {{{1|}}} | 1 | {{#if: {{{1|}}} | 2 | 3 }} }}", "<root><template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> 1 </value></part><part><name index=\"2\" /><value> <template><title>#if: <tplarg><title>1</title><part><name index=\"1\" /><value></value></part></tplarg> </title><part><name index=\"1\" /><value> 2 </value></part><part><name index=\"2\" /><value> 3 </value></part></template> </value></part></template></root>" ],
+ [ "{{ {{Foo}}", "<root>{{ <template><title>Foo</title></template></root>" ],
+ [ "{{Foobar {{Foo}} {{Bar}} {{Baz}} ", "<root>{{Foobar <template><title>Foo</title></template> <template><title>Bar</title></template> <template><title>Baz</title></template> </root>" ],
+ [ "[[Foo]] |", "<root>[[Foo]] |</root>" ],
+ [ "{{Foo|Bar|", "<root>{{Foo|Bar|</root>" ],
+ [ "[[Foo]", "<root>[[Foo]</root>" ],
+ [ "[[Foo|Bar]", "<root>[[Foo|Bar]</root>" ],
+ [ "{{Foo| [[Bar] }}", "<root>{{Foo| [[Bar] }}</root>" ],
+ [ "{{Foo| [[Bar|Baz] }}", "<root>{{Foo| [[Bar|Baz] }}</root>" ],
+ [ "{{Foo|bar=[[baz]}}", "<root>{{Foo|bar=[[baz]}}</root>" ],
+ [ "{{foo|", "<root>{{foo|</root>" ],
+ [ "{{foo|}", "<root>{{foo|}</root>" ],
+ [ "{{foo|} }}", "<root><template><title>foo</title><part><name index=\"1\" /><value>} </value></part></template></root>" ],
+ [ "{{foo|bar=|}", "<root>{{foo|bar=|}</root>" ],
+ [ "{{Foo|} Bar=", "<root>{{Foo|} Bar=</root>" ],
+ [ "{{Foo|} Bar=}}", "<root><template><title>Foo</title><part><name>} Bar</name>=<value></value></part></template></root>" ],
+ /* [ file_get_contents( __DIR__ . '/QuoteQuran.txt' ], file_get_contents( __DIR__ . '/QuoteQuranExpanded.txt' ) ], */
+ ] );
+ // phpcs:enable
+ }
+
+ /**
+ * Get XML preprocessor tree from the preprocessor (which may not be the
+ * native XML-based one).
+ *
+ * @param string $className
+ * @param string $wikiText
+ * @return string
+ */
+ protected function preprocessToXml( $className, $wikiText ) {
+ $preprocessor = $this->mPreprocessors[$className];
+ if ( method_exists( $preprocessor, 'preprocessToXml' ) ) {
+ return $this->normalizeXml( $preprocessor->preprocessToXml( $wikiText ) );
+ }
+
+ $dom = $preprocessor->preprocessToObj( $wikiText );
+ if ( is_callable( [ $dom, 'saveXML' ] ) ) {
+ return $dom->saveXML();
+ } else {
+ return $this->normalizeXml( $dom->__toString() );
+ }
+ }
+
+ /**
+ * Normalize XML string to the form that a DOMDocument saves out.
+ *
+ * @param string $xml
+ * @return string
+ */
+ protected function normalizeXml( $xml ) {
+ // Normalize self-closing tags
+ $xml = preg_replace( '!<([a-z]+)/>!', '<$1></$1>', str_replace( ' />', '/>', $xml ) );
+ // Remove <equals> tags, which only occur in Preprocessor_Hash and
+ // have no semantic value
+ $xml = preg_replace( '!</?equals>!', '', $xml );
+ return $xml;
+ }
+
+ /**
+ * @dataProvider provideCases
+ */
+ public function testPreprocessorOutput( $className, $wikiText, $expectedXml ) {
+ $this->assertEquals( $this->normalizeXml( $expectedXml ),
+ $this->preprocessToXml( $className, $wikiText ) );
+ }
+
+ /**
+ * These are more complex test cases taken out of wiki articles.
+ */
+ public static function provideFiles() {
+ // phpcs:disable Generic.Files.LineLength
+ return self::addClassArg( [
+ [ "QuoteQuran" ], # https://en.wikipedia.org/w/index.php?title=Template:QuoteQuran/sandbox&oldid=237348988 GFDL + CC BY-SA by Striver
+ [ "Factorial" ], # https://en.wikipedia.org/w/index.php?title=Template:Factorial&oldid=98548758 GFDL + CC BY-SA by Polonium
+ [ "All_system_messages" ], # https://tl.wiktionary.org/w/index.php?title=Suleras:All_system_messages&oldid=2765 GPL text generated by MediaWiki
+ [ "Fundraising" ], # https://tl.wiktionary.org/w/index.php?title=MediaWiki:Sitenotice&oldid=5716 GFDL + CC BY-SA, copied there by Sky Harbor.
+ [ "NestedTemplates" ], # T29936
+ ] );
+ // phpcs:enable
+ }
+
+ /**
+ * @dataProvider provideFiles
+ */
+ public function testPreprocessorOutputFiles( $className, $filename ) {
+ $folder = __DIR__ . "/../../../parser/preprocess";
+ $wikiText = file_get_contents( "$folder/$filename.txt" );
+ $output = $this->preprocessToXml( $className, $wikiText );
+
+ $expectedFilename = "$folder/$filename.expected";
+ if ( file_exists( $expectedFilename ) ) {
+ $expectedXml = $this->normalizeXml( file_get_contents( $expectedFilename ) );
+ $this->assertEquals( $expectedXml, $output );
+ } else {
+ $tempFilename = tempnam( $folder, "$filename." );
+ file_put_contents( $tempFilename, $output );
+ $this->markTestIncomplete( "File $expectedFilename missing. Output stored as $tempFilename" );
+ }
+ }
+
+ /**
+ * Tests from T30642 · https://phabricator.wikimedia.org/T30642
+ */
+ public static function provideHeadings() {
+ // phpcs:disable Generic.Files.LineLength
+ return self::addClassArg( [
+ /* These should become headings: */
+ [ "== h ==<!--c1-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> </h></root>" ],
+ [ "== h ==<!--c1--><!--c2-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1--><!--c2-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--><!--c2--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--><!--c2--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--> <!--c2-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--> <!--c2--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--> <!--c2--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> </h></root>" ],
+ [ "== h ==<!--c1--><!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--> <!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--><!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--> <!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1--><!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1--> <!--c2--><!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1--><!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1--> <!--c2--> <!--c3-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--><!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h ==<!--c1--> <!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h ==<!--c1--><!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h ==<!--c1--> <!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--><!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--> <!--c2--><!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--><!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h == <!--c1--> <!--c2--> <!--c3--> ", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> <comment>&lt;!--c3--&gt;</comment> </h></root>" ],
+ [ "== h ==<!--c1--> <!--c2-->", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment></h></root>" ],
+ [ "== h == <!--c1--> <!--c2-->", "<root><h level=\"2\" i=\"1\">== h == <comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment></h></root>" ],
+ [ "== h ==<!--c1--> <!--c2--> ", "<root><h level=\"2\" i=\"1\">== h ==<comment>&lt;!--c1--&gt;</comment> <comment>&lt;!--c2--&gt;</comment> </h></root>" ],
+
+ /* These are not working: */
+ [ "== h == x <!--c1--><!--c2--><!--c3--> ", "<root>== h == x <comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> </root>" ],
+ [ "== h ==<!--c1--> x <!--c2--><!--c3--> ", "<root>== h ==<comment>&lt;!--c1--&gt;</comment> x <comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> </root>" ],
+ [ "== h ==<!--c1--><!--c2--><!--c3--> x ", "<root>== h ==<comment>&lt;!--c1--&gt;</comment><comment>&lt;!--c2--&gt;</comment><comment>&lt;!--c3--&gt;</comment> x </root>" ],
+ ] );
+ // phpcs:enable
+ }
+
+ /**
+ * @dataProvider provideHeadings
+ */
+ public function testHeadings( $className, $wikiText, $expectedXml ) {
+ $this->assertEquals( $this->normalizeXml( $expectedXml ),
+ $this->preprocessToXml( $className, $wikiText ) );
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/SanitizerTest.php b/www/wiki/tests/phpunit/includes/parser/SanitizerTest.php
new file mode 100644
index 00000000..35b81fb9
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/SanitizerTest.php
@@ -0,0 +1,571 @@
+<?php
+
+/**
+ * @todo Tests covering decodeCharReferences can be refactored into a single
+ * method and dataprovider.
+ *
+ * @group Sanitizer
+ */
+class SanitizerTest extends MediaWikiTestCase {
+
+ protected function tearDown() {
+ MWTidy::destroySingleton();
+ parent::tearDown();
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeNamedEntities() {
+ $this->assertEquals(
+ "\xc3\xa9cole",
+ Sanitizer::decodeCharReferences( '&eacute;cole' ),
+ 'decode named entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeNumericEntities() {
+ $this->assertEquals(
+ "\xc4\x88io bonas dans l'\xc3\xa9cole!",
+ Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&#233;cole!" ),
+ 'decode numeric entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeMixedEntities() {
+ $this->assertEquals(
+ "\xc4\x88io bonas dans l'\xc3\xa9cole!",
+ Sanitizer::decodeCharReferences( "&#x108;io bonas dans l'&eacute;cole!" ),
+ 'decode mixed numeric/named entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testDecodeMixedComplexEntities() {
+ $this->assertEquals(
+ "\xc4\x88io bonas dans l'\xc3\xa9cole! (mais pas &#x108;io dans l'&eacute;cole)",
+ Sanitizer::decodeCharReferences(
+ "&#x108;io bonas dans l'&eacute;cole! (mais pas &amp;#x108;io dans l'&#38;eacute;cole)"
+ ),
+ 'decode mixed complex entities'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testInvalidAmpersand() {
+ $this->assertEquals(
+ 'a & b',
+ Sanitizer::decodeCharReferences( 'a & b' ),
+ 'Invalid ampersand'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testInvalidEntities() {
+ $this->assertEquals(
+ '&foo;',
+ Sanitizer::decodeCharReferences( '&foo;' ),
+ 'Invalid named entity'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::decodeCharReferences
+ */
+ public function testInvalidNumberedEntities() {
+ $this->assertEquals(
+ UtfNormal\Constants::UTF8_REPLACEMENT,
+ Sanitizer::decodeCharReferences( "&#88888888888888;" ),
+ 'Invalid numbered entity'
+ );
+ }
+
+ /**
+ * @covers Sanitizer::removeHTMLtags
+ * @dataProvider provideHtml5Tags
+ *
+ * @param string $tag Name of an HTML5 element (ie: 'video')
+ * @param bool $escaped Whether sanitizer let the tag in or escape it (ie: '&lt;video&gt;')
+ */
+ public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
+ MWTidy::setInstance( false );
+
+ if ( $escaped ) {
+ $this->assertEquals( "&lt;$tag&gt;",
+ Sanitizer::removeHTMLtags( "<$tag>" )
+ );
+ } else {
+ $this->assertEquals( "<$tag></$tag>\n",
+ Sanitizer::removeHTMLtags( "<$tag>" )
+ );
+ }
+ }
+
+ /**
+ * Provide HTML5 tags
+ */
+ public static function provideHtml5Tags() {
+ $ESCAPED = true; # We want tag to be escaped
+ $VERBATIM = false; # We want to keep the tag
+ return [
+ [ 'data', $VERBATIM ],
+ [ 'mark', $VERBATIM ],
+ [ 'time', $VERBATIM ],
+ [ 'video', $ESCAPED ],
+ ];
+ }
+
+ function dataRemoveHTMLtags() {
+ return [
+ // former testSelfClosingTag
+ [
+ '<div>Hello world</div />',
+ '<div>Hello world</div>',
+ 'Self-closing closing div'
+ ],
+ // Make sure special nested HTML5 semantics are not broken
+ // https://html.spec.whatwg.org/multipage/semantics.html#the-kbd-element
+ [
+ '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
+ '<kbd><kbd>Shift</kbd>+<kbd>F3</kbd></kbd>',
+ 'Nested <kbd>.'
+ ],
+ // https://html.spec.whatwg.org/multipage/semantics.html#the-sub-and-sup-elements
+ [
+ '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
+ '<var>x<sub><var>i</var></sub></var>, <var>y<sub><var>i</var></sub></var>',
+ 'Nested <var>.'
+ ],
+ // https://html.spec.whatwg.org/multipage/semantics.html#the-dfn-element
+ [
+ '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
+ '<dfn><abbr title="Garage Door Opener">GDO</abbr></dfn>',
+ '<abbr> inside <dfn>',
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider dataRemoveHTMLtags
+ * @covers Sanitizer::removeHTMLtags
+ */
+ public function testRemoveHTMLtags( $input, $output, $msg = null ) {
+ MWTidy::setInstance( false );
+ $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input ), $msg );
+ }
+
+ /**
+ * @dataProvider provideTagAttributesToDecode
+ * @covers Sanitizer::decodeTagAttributes
+ */
+ public function testDecodeTagAttributes( $expected, $attributes, $message = '' ) {
+ $this->assertEquals( $expected,
+ Sanitizer::decodeTagAttributes( $attributes ),
+ $message
+ );
+ }
+
+ public static function provideTagAttributesToDecode() {
+ return [
+ [ [ 'foo' => 'bar' ], 'foo=bar', 'Unquoted attribute' ],
+ [ [ 'עברית' => 'bar' ], 'עברית=bar', 'Non-Latin attribute' ],
+ [ [ '६' => 'bar' ], '६=bar', 'Devanagari number' ],
+ [ [ '搭𨋢' => 'bar' ], '搭𨋢=bar', 'Non-BMP character' ],
+ [ [], 'ńgh=bar', 'Combining accent is not allowed' ],
+ [ [ 'foo' => 'bar' ], ' foo = bar ', 'Spaced attribute' ],
+ [ [ 'foo' => 'bar' ], 'foo="bar"', 'Double-quoted attribute' ],
+ [ [ 'foo' => 'bar' ], 'foo=\'bar\'', 'Single-quoted attribute' ],
+ [
+ [ 'foo' => 'bar', 'baz' => 'foo' ],
+ 'foo=\'bar\' baz="foo"',
+ 'Several attributes'
+ ],
+ [
+ [ 'foo' => 'bar', 'baz' => 'foo' ],
+ 'foo=\'bar\' baz="foo"',
+ 'Several attributes'
+ ],
+ [
+ [ 'foo' => 'bar', 'baz' => 'foo' ],
+ 'foo=\'bar\' baz="foo"',
+ 'Several attributes'
+ ],
+ [ [ ':foo' => 'bar' ], ':foo=\'bar\'', 'Leading :' ],
+ [ [ '_foo' => 'bar' ], '_foo=\'bar\'', 'Leading _' ],
+ [ [ 'foo' => 'bar' ], 'Foo=\'bar\'', 'Leading capital' ],
+ [ [ 'foo' => 'BAR' ], 'FOO=BAR', 'Attribute keys are normalized to lowercase' ],
+
+ # Invalid beginning
+ [ [], '-foo=bar', 'Leading - is forbidden' ],
+ [ [], '.foo=bar', 'Leading . is forbidden' ],
+ [ [ 'foo-bar' => 'bar' ], 'foo-bar=bar', 'A - is allowed inside the attribute' ],
+ [ [ 'foo-' => 'bar' ], 'foo-=bar', 'A - is allowed inside the attribute' ],
+ [ [ 'foo.bar' => 'baz' ], 'foo.bar=baz', 'A . is allowed inside the attribute' ],
+ [ [ 'foo.' => 'baz' ], 'foo.=baz', 'A . is allowed as last character' ],
+ [ [ 'foo6' => 'baz' ], 'foo6=baz', 'Numbers are allowed' ],
+
+ # This bit is more relaxed than XML rules, but some extensions use
+ # it, like ProofreadPage (see T29539)
+ [ [ '1foo' => 'baz' ], '1foo=baz', 'Leading numbers are allowed' ],
+ [ [], 'foo$=baz', 'Symbols are not allowed' ],
+ [ [], 'foo@=baz', 'Symbols are not allowed' ],
+ [ [], 'foo~=baz', 'Symbols are not allowed' ],
+ [
+ [ 'foo' => '1[#^`*%w/(' ],
+ 'foo=1[#^`*%w/(',
+ 'All kind of characters are allowed as values'
+ ],
+ [
+ [ 'foo' => '1[#^`*%\'w/(' ],
+ 'foo="1[#^`*%\'w/("',
+ 'Double quotes are allowed if quoted by single quotes'
+ ],
+ [
+ [ 'foo' => '1[#^`*%"w/(' ],
+ 'foo=\'1[#^`*%"w/(\'',
+ 'Single quotes are allowed if quoted by double quotes'
+ ],
+ [ [ 'foo' => '&"' ], 'foo=&amp;&quot;', 'Special chars can be provided as entities' ],
+ [ [ 'foo' => '&foobar;' ], 'foo=&foobar;', 'Entity-like items are accepted' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideDeprecatedAttributes
+ * @covers Sanitizer::fixTagAttributes
+ */
+ public function testDeprecatedAttributesUnaltered( $inputAttr, $inputEl, $message = '' ) {
+ $this->assertEquals( " $inputAttr",
+ Sanitizer::fixTagAttributes( $inputAttr, $inputEl ),
+ $message
+ );
+ }
+
+ public static function provideDeprecatedAttributes() {
+ /** [ <attribute>, <element>, [message] ] */
+ return [
+ [ 'clear="left"', 'br' ],
+ [ 'clear="all"', 'br' ],
+ [ 'width="100"', 'td' ],
+ [ 'nowrap="true"', 'td' ],
+ [ 'nowrap=""', 'td' ],
+ [ 'align="right"', 'td' ],
+ [ 'align="center"', 'table' ],
+ [ 'align="left"', 'tr' ],
+ [ 'align="center"', 'div' ],
+ [ 'align="left"', 'h1' ],
+ [ 'align="left"', 'p' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideCssCommentsFixtures
+ * @covers Sanitizer::checkCss
+ */
+ public function testCssCommentsChecking( $expected, $css, $message = '' ) {
+ $this->assertEquals( $expected,
+ Sanitizer::checkCss( $css ),
+ $message
+ );
+ }
+
+ public static function provideCssCommentsFixtures() {
+ /** [ <expected>, <css>, [message] ] */
+ return [
+ // Valid comments spanning entire input
+ [ '/**/', '/**/' ],
+ [ '/* comment */', '/* comment */' ],
+ // Weird stuff
+ [ ' ', '/****/' ],
+ [ ' ', '/* /* */' ],
+ [ 'display: block;', "display:/* foo */block;" ],
+ [ 'display: block;', "display:\\2f\\2a foo \\2a\\2f block;",
+ 'Backslash-escaped comments must be stripped (T30450)' ],
+ [ '', '/* unfinished comment structure',
+ 'Remove anything after a comment-start token' ],
+ [ '', "\\2f\\2a unifinished comment'",
+ 'Remove anything after a backslash-escaped comment-start token' ],
+ [
+ '/* insecure input */',
+ 'filter: progid:DXImageTransform.Microsoft.AlphaImageLoader'
+ . '(src=\'asdf.png\',sizingMethod=\'scale\');'
+ ],
+ [
+ '/* insecure input */',
+ '-ms-filter: "progid:DXImageTransform.Microsoft.AlphaImageLoader'
+ . '(src=\'asdf.png\',sizingMethod=\'scale\')";'
+ ],
+ [ '/* insecure input */', 'width: expression(1+1);' ],
+ [ '/* insecure input */', 'background-image: image(asdf.png);' ],
+ [ '/* insecure input */', 'background-image: -webkit-image(asdf.png);' ],
+ [ '/* insecure input */', 'background-image: -moz-image(asdf.png);' ],
+ [ '/* insecure input */', 'background-image: image-set("asdf.png" 1x, "asdf.png" 2x);' ],
+ [
+ '/* insecure input */',
+ 'background-image: -webkit-image-set("asdf.png" 1x, "asdf.png" 2x);'
+ ],
+ [
+ '/* insecure input */',
+ 'background-image: -moz-image-set("asdf.png" 1x, "asdf.png" 2x);'
+ ],
+ [ '/* insecure input */', 'foo: attr( title, url );' ],
+ [ '/* insecure input */', 'foo: attr( title url );' ],
+ [ '/* insecure input */', 'foo: var(--evil-attribute)' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideEscapeHtmlAllowEntities
+ * @covers Sanitizer::escapeHtmlAllowEntities
+ */
+ public function testEscapeHtmlAllowEntities( $expected, $html ) {
+ $this->assertEquals(
+ $expected,
+ Sanitizer::escapeHtmlAllowEntities( $html )
+ );
+ }
+
+ public static function provideEscapeHtmlAllowEntities() {
+ return [
+ [ 'foo', 'foo' ],
+ [ 'a¡b', 'a&#161;b' ],
+ [ 'foo&#039;bar', "foo'bar" ],
+ [ '&lt;script&gt;foo&lt;/script&gt;', '<script>foo</script>' ],
+ ];
+ }
+
+ /**
+ * Test Sanitizer::escapeId
+ *
+ * @dataProvider provideEscapeId
+ * @covers Sanitizer::escapeId
+ */
+ public function testEscapeId( $input, $output ) {
+ $this->assertEquals(
+ $output,
+ Sanitizer::escapeId( $input, [ 'noninitial', 'legacy' ] )
+ );
+ }
+
+ public static function provideEscapeId() {
+ return [
+ [ '+', '.2B' ],
+ [ '&', '.26' ],
+ [ '=', '.3D' ],
+ [ ':', ':' ],
+ [ ';', '.3B' ],
+ [ '@', '.40' ],
+ [ '$', '.24' ],
+ [ '-_.', '-_.' ],
+ [ '!', '.21' ],
+ [ '*', '.2A' ],
+ [ '/', '.2F' ],
+ [ '[]', '.5B.5D' ],
+ [ '<>', '.3C.3E' ],
+ [ '\'', '.27' ],
+ [ '§', '.C2.A7' ],
+ [ 'Test:A & B/Here', 'Test:A_.26_B.2FHere' ],
+ [ 'A&B&amp;C&amp;amp;D&amp;amp;amp;E', 'A.26B.26amp.3BC.26amp.3Bamp.3BD.26amp.3Bamp.3Bamp.3BE' ],
+ ];
+ }
+
+ /**
+ * Test escapeIdReferenceList for consistency with escapeIdForAttribute
+ *
+ * @dataProvider provideEscapeIdReferenceList
+ * @covers Sanitizer::escapeIdReferenceList
+ */
+ public function testEscapeIdReferenceList( $referenceList, $id1, $id2 ) {
+ $this->assertEquals(
+ Sanitizer::escapeIdReferenceList( $referenceList ),
+ Sanitizer::escapeIdForAttribute( $id1 )
+ . ' '
+ . Sanitizer::escapeIdForAttribute( $id2 )
+ );
+ }
+
+ public static function provideEscapeIdReferenceList() {
+ /** [ <reference list>, <individual id 1>, <individual id 2> ] */
+ return [
+ [ 'foo bar', 'foo', 'bar' ],
+ [ '#1 #2', '#1', '#2' ],
+ [ '+1 +2', '+1', '+2' ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideIsReservedDataAttribute
+ * @covers Sanitizer::isReservedDataAttribute
+ */
+ public function testIsReservedDataAttribute( $attr, $expected ) {
+ $this->assertSame( $expected, Sanitizer::isReservedDataAttribute( $attr ) );
+ }
+
+ public static function provideIsReservedDataAttribute() {
+ return [
+ [ 'foo', false ],
+ [ 'data', false ],
+ [ 'data-foo', false ],
+ [ 'data-mw', true ],
+ [ 'data-ooui', true ],
+ [ 'data-parsoid', true ],
+ [ 'data-mw-foo', true ],
+ [ 'data-ooui-foo', true ],
+ [ 'data-mwfoo', true ], // could be false but this is how it's implemented currently
+ ];
+ }
+
+ /**
+ * @dataProvider provideEscapeIdForStuff
+ *
+ * @covers Sanitizer::escapeIdForAttribute()
+ * @covers Sanitizer::escapeIdForLink()
+ * @covers Sanitizer::escapeIdForExternalInterwiki()
+ * @covers Sanitizer::escapeIdInternal()
+ *
+ * @param string $stuff
+ * @param string[] $config
+ * @param string $id
+ * @param string|false $expected
+ * @param int|null $mode
+ */
+ public function testEscapeIdForStuff( $stuff, array $config, $id, $expected, $mode = null ) {
+ $func = "Sanitizer::escapeIdFor{$stuff}";
+ $iwFlavor = array_pop( $config );
+ $this->setMwGlobals( [
+ 'wgFragmentMode' => $config,
+ 'wgExternalInterwikiFragmentMode' => $iwFlavor,
+ ] );
+ $escaped = call_user_func( $func, $id, $mode );
+ self::assertEquals( $expected, $escaped );
+ }
+
+ public function provideEscapeIdForStuff() {
+ // Test inputs and outputs
+ $text = 'foo тест_#%!\'()[]:<>&&amp;&amp;amp;';
+ $legacyEncoded = 'foo_.D1.82.D0.B5.D1.81.D1.82_.23.25.21.27.28.29.5B.5D:.3C.3E' .
+ '.26.26amp.3B.26amp.3Bamp.3B';
+ $html5Encoded = 'foo_тест_#%!\'()[]:<>&&amp;&amp;amp;';
+ $html5Experimental = 'foo_тест_!_()[]:<>_amp;_amp;amp;';
+
+ // Settings: last element is $wgExternalInterwikiFragmentMode, the rest is $wgFragmentMode
+ $legacy = [ 'legacy', 'legacy' ];
+ $legacyNew = [ 'legacy', 'html5', 'legacy' ];
+ $newLegacy = [ 'html5', 'legacy', 'legacy' ];
+ $new = [ 'html5', 'legacy' ];
+ $allNew = [ 'html5', 'html5' ];
+ $experimentalLegacy = [ 'html5-legacy', 'legacy', 'legacy' ];
+ $newExperimental = [ 'html5', 'html5-legacy', 'legacy' ];
+
+ return [
+ // Pure legacy: how MW worked before 2017
+ [ 'Attribute', $legacy, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $legacy, $text, false, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $legacy, $text, $legacyEncoded ],
+ [ 'ExternalInterwiki', $legacy, $text, $legacyEncoded ],
+
+ // Transition to a new world: legacy links with HTML5 fallback
+ [ 'Attribute', $legacyNew, $text, $legacyEncoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $legacyNew, $text, $html5Encoded, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $legacyNew, $text, $legacyEncoded ],
+ [ 'ExternalInterwiki', $legacyNew, $text, $legacyEncoded ],
+
+ // New world: HTML5 links, legacy fallbacks
+ [ 'Attribute', $newLegacy, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $newLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $newLegacy, $text, $html5Encoded ],
+ [ 'ExternalInterwiki', $newLegacy, $text, $legacyEncoded ],
+
+ // Distant future: no legacy fallbacks, but still linking to leagacy wikis
+ [ 'Attribute', $new, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $new, $text, false, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $new, $text, $html5Encoded ],
+ [ 'ExternalInterwiki', $new, $text, $legacyEncoded ],
+
+ // Just before the heat death of universe: external interwikis are also HTML5 \m/
+ [ 'Attribute', $allNew, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $allNew, $text, false, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $allNew, $text, $html5Encoded ],
+ [ 'ExternalInterwiki', $allNew, $text, $html5Encoded ],
+
+ // Someone flipped $wgExperimentalHtmlIds on
+ [ 'Attribute', $experimentalLegacy, $text, $html5Experimental, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $experimentalLegacy, $text, $legacyEncoded, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $experimentalLegacy, $text, $html5Experimental ],
+ [ 'ExternalInterwiki', $experimentalLegacy, $text, $legacyEncoded ],
+
+ // Migration from $wgExperimentalHtmlIds to modern HTML5
+ [ 'Attribute', $newExperimental, $text, $html5Encoded, Sanitizer::ID_PRIMARY ],
+ [ 'Attribute', $newExperimental, $text, $html5Experimental, Sanitizer::ID_FALLBACK ],
+ [ 'Link', $newExperimental, $text, $html5Encoded ],
+ [ 'ExternalInterwiki', $newExperimental, $text, $legacyEncoded ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideStripAllTags
+ *
+ * @covers Sanitizer::stripAllTags()
+ * @covers RemexStripTagHandler
+ *
+ * @param string $input
+ * @param string $expected
+ */
+ public function testStripAllTags( $input, $expected ) {
+ $this->assertEquals( $expected, Sanitizer::stripAllTags( $input ) );
+ }
+
+ public function provideStripAllTags() {
+ return [
+ [ '<p>Foo</p>', 'Foo' ],
+ [ '<p id="one">Foo</p><p id="two">Bar</p>', 'FooBar' ],
+ [ "<p>Foo</p>\n<p>Bar</p>", 'Foo Bar' ],
+ [ '<p>Hello &lt;strong&gt; wor&#x6c;&#100; caf&eacute;</p>', 'Hello <strong> world café' ],
+ [
+ '<p><small data-foo=\'bar"&lt;baz>quux\'><a href="./Foo">Bar</a></small> Whee!</p>',
+ 'Bar Whee!'
+ ],
+ [ '1<span class="<?php">2</span>3', '123' ],
+ [ '1<span class="<?">2</span>3', '123' ],
+ ];
+ }
+
+ /**
+ * @expectedException InvalidArgumentException
+ * @covers Sanitizer::escapeIdInternal()
+ */
+ public function testInvalidFragmentThrows() {
+ $this->setMwGlobals( 'wgFragmentMode', [ 'boom!' ] );
+ Sanitizer::escapeIdForAttribute( 'This should throw' );
+ }
+
+ /**
+ * @expectedException UnexpectedValueException
+ * @covers Sanitizer::escapeIdForAttribute()
+ */
+ public function testNoPrimaryFragmentModeThrows() {
+ $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
+ Sanitizer::escapeIdForAttribute( 'This should throw' );
+ }
+
+ /**
+ * @expectedException UnexpectedValueException
+ * @covers Sanitizer::escapeIdForLink()
+ */
+ public function testNoPrimaryFragmentModeThrows2() {
+ $this->setMwGlobals( 'wgFragmentMode', [ 666 => 'html5' ] );
+ Sanitizer::escapeIdForLink( 'This should throw' );
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/StripStateTest.php b/www/wiki/tests/phpunit/includes/parser/StripStateTest.php
new file mode 100644
index 00000000..0f4f6e0f
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/StripStateTest.php
@@ -0,0 +1,136 @@
+<?php
+
+/**
+ * @covers StripState
+ */
+class StripStateTest extends MediaWikiTestCase {
+ public function setUp() {
+ parent::setUp();
+ $this->setContentLang( 'qqx' );
+ }
+
+ private function getMarker() {
+ static $i;
+ return Parser::MARKER_PREFIX . '-blah-' . sprintf( '%08X', $i++ ) . Parser::MARKER_SUFFIX;
+ }
+
+ private static function getWarning( $message, $max = '' ) {
+ return "<span class=\"error\">($message: $max)</span>";
+ }
+
+ public function testAddNoWiki() {
+ $ss = new StripState;
+ $marker = $this->getMarker();
+ $ss->addNoWiki( $marker, '<>' );
+ $text = "x{$marker}y";
+ $text = $ss->unstripGeneral( $text );
+ $text = str_replace( '<', '', $text );
+ $text = $ss->unstripNoWiki( $text );
+ $this->assertSame( 'x<>y', $text );
+ }
+
+ public function testAddGeneral() {
+ $ss = new StripState;
+ $marker = $this->getMarker();
+ $ss->addGeneral( $marker, '<>' );
+ $text = "x{$marker}y";
+ $text = $ss->unstripNoWiki( $text );
+ $text = str_replace( '<', '', $text );
+ $text = $ss->unstripGeneral( $text );
+ $this->assertSame( 'x<>y', $text );
+ }
+
+ public function testUnstripBoth() {
+ $ss = new StripState;
+ $mk1 = $this->getMarker();
+ $mk2 = $this->getMarker();
+ $ss->addNoWiki( $mk1, '<1>' );
+ $ss->addGeneral( $mk2, '<2>' );
+ $text = "x{$mk1}{$mk2}y";
+ $text = str_replace( '<', '', $text );
+ $text = $ss->unstripBoth( $text );
+ $this->assertSame( 'x<1><2>y', $text );
+ }
+
+ public static function provideUnstripRecursive() {
+ return [
+ [ 0, 'text' ],
+ [ 1, '=text=' ],
+ [ 2, '==text==' ],
+ [ 3, '==' . self::getWarning( 'unstrip-depth-warning', 2 ) . '==' ],
+ ];
+ }
+
+ /** @dataProvider provideUnstripRecursive */
+ public function testUnstripRecursive( $depth, $expected ) {
+ $ss = new StripState( null, [ 'depthLimit' => 2 ] );
+ $text = 'text';
+ for ( $i = 0; $i < $depth; $i++ ) {
+ $mk = $this->getMarker();
+ $ss->addNoWiki( $mk, "={$text}=" );
+ $text = $mk;
+ }
+ $text = $ss->unstripNoWiki( $text );
+ $this->assertSame( $expected, $text );
+ }
+
+ public function testUnstripLoop() {
+ $ss = new StripState( null, [ 'depthLimit' => 2 ] );
+ $mk = $this->getMarker();
+ $ss->addNoWiki( $mk, $mk );
+ $text = $ss->unstripNoWiki( $mk );
+ $this->assertSame( self::getWarning( 'parser-unstrip-loop-warning' ), $text );
+ }
+
+ public static function provideUnstripSize() {
+ return [
+ [ 0, 'x' ],
+ [ 1, 'xx' ],
+ [ 2, str_repeat( self::getWarning( 'unstrip-size-warning', 5 ), 2 ) ]
+ ];
+ }
+
+ /** @dataProvider provideUnstripSize */
+ public function testUnstripSize( $depth, $expected ) {
+ $ss = new StripState( null, [ 'sizeLimit' => 5 ] );
+ $text = 'x';
+ for ( $i = 0; $i < $depth; $i++ ) {
+ $mk = $this->getMarker();
+ $ss->addNoWiki( $mk, $text );
+ $text = "$mk$mk";
+ }
+ $text = $ss->unstripNoWiki( $text );
+ $this->assertSame( $expected, $text );
+ }
+
+ public function provideGetLimitReport() {
+ for ( $i = 1; $i < 4; $i++ ) {
+ yield [ $i ];
+ }
+ }
+
+ /** @dataProvider provideGetLimitReport */
+ public function testGetLimitReport( $depth ) {
+ $sizeLimit = 100000;
+ $ss = new StripState( null, [ 'depthLimit' => 5, 'sizeLimit' => $sizeLimit ] );
+ $text = 'x';
+ for ( $i = 0; $i < $depth; $i++ ) {
+ $mk = $this->getMarker();
+ $ss->addNoWiki( $mk, $text );
+ $text = "$mk$mk";
+ }
+ $text = $ss->unstripNoWiki( $text );
+ $report = $ss->getLimitReport();
+ $messages = [];
+ foreach ( $report as list( $msg, $params ) ) {
+ $messages[$msg] = $params;
+ }
+ $this->assertSame( [ $depth - 1, 5 ], $messages['limitreport-unstrip-depth'] );
+ $this->assertSame(
+ [
+ strlen( $this->getMarker() ) * 2 * ( pow( 2, $depth ) - 2 ) + pow( 2, $depth ),
+ $sizeLimit
+ ],
+ $messages['limitreport-unstrip-size' ] );
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/TagHooksTest.php b/www/wiki/tests/phpunit/includes/parser/TagHooksTest.php
new file mode 100644
index 00000000..bc09adc8
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/TagHooksTest.php
@@ -0,0 +1,134 @@
+<?php
+
+/**
+ * @group Database
+ * @group Parser
+ *
+ * @covers Parser
+ * @covers BlockLevelPass
+ * @covers StripState
+ *
+ * @covers Preprocessor_DOM
+ * @covers PPDStack
+ * @covers PPDStackElement
+ * @covers PPDPart
+ * @covers PPFrame_DOM
+ * @covers PPTemplateFrame_DOM
+ * @covers PPCustomFrame_DOM
+ * @covers PPNode_DOM
+ *
+ * @covers Preprocessor_Hash
+ * @covers PPDStack_Hash
+ * @covers PPDStackElement_Hash
+ * @covers PPDPart_Hash
+ * @covers PPFrame_Hash
+ * @covers PPTemplateFrame_Hash
+ * @covers PPCustomFrame_Hash
+ * @covers PPNode_Hash_Tree
+ * @covers PPNode_Hash_Text
+ * @covers PPNode_Hash_Array
+ * @covers PPNode_Hash_Attr
+ */
+class TagHooksTest extends MediaWikiTestCase {
+ public static function provideValidNames() {
+ return [
+ [ 'foo' ],
+ [ 'foo-bar' ],
+ [ 'foo_bar' ],
+ [ 'FOO-BAR' ],
+ [ 'foo bar' ]
+ ];
+ }
+
+ public static function provideBadNames() {
+ return [ [ "foo<bar" ], [ "foo>bar" ], [ "foo\nbar" ], [ "foo\rbar" ] ];
+ }
+
+ private function getParserOptions() {
+ global $wgContLang;
+ $popt = ParserOptions::newFromUserAndLang( new User, $wgContLang );
+ return $popt;
+ }
+
+ /**
+ * @dataProvider provideValidNames
+ */
+ public function testTagHooks( $tag ) {
+ global $wgParserConf;
+ $parser = new Parser( $wgParserConf );
+
+ $parser->setHook( $tag, [ $this, 'tagCallback' ] );
+ $parserOutput = $parser->parse(
+ "Foo<$tag>Bar</$tag>Baz",
+ Title::newFromText( 'Test' ),
+ $this->getParserOptions()
+ );
+ $this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText( [ 'unwrap' => true ] ) );
+
+ $parser->mPreprocessor = null; # Break the Parser <-> Preprocessor cycle
+ }
+
+ /**
+ * @dataProvider provideBadNames
+ * @expectedException MWException
+ */
+ public function testBadTagHooks( $tag ) {
+ global $wgParserConf;
+ $parser = new Parser( $wgParserConf );
+
+ $parser->setHook( $tag, [ $this, 'tagCallback' ] );
+ $parser->parse(
+ "Foo<$tag>Bar</$tag>Baz",
+ Title::newFromText( 'Test' ),
+ $this->getParserOptions()
+ );
+ $this->fail( 'Exception not thrown.' );
+ }
+
+ /**
+ * @dataProvider provideValidNames
+ */
+ public function testFunctionTagHooks( $tag ) {
+ global $wgParserConf;
+ $parser = new Parser( $wgParserConf );
+
+ $parser->setFunctionTagHook( $tag, [ $this, 'functionTagCallback' ], 0 );
+ $parserOutput = $parser->parse(
+ "Foo<$tag>Bar</$tag>Baz",
+ Title::newFromText( 'Test' ),
+ $this->getParserOptions()
+ );
+ $this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText( [ 'unwrap' => true ] ) );
+
+ $parser->mPreprocessor = null; # Break the Parser <-> Preprocessor cycle
+ }
+
+ /**
+ * @dataProvider provideBadNames
+ * @expectedException MWException
+ */
+ public function testBadFunctionTagHooks( $tag ) {
+ global $wgParserConf;
+ $parser = new Parser( $wgParserConf );
+
+ $parser->setFunctionTagHook(
+ $tag,
+ [ $this, 'functionTagCallback' ],
+ Parser::SFH_OBJECT_ARGS
+ );
+ $parser->parse(
+ "Foo<$tag>Bar</$tag>Baz",
+ Title::newFromText( 'Test' ),
+ $this->getParserOptions()
+ );
+ $this->fail( 'Exception not thrown.' );
+ }
+
+ function tagCallback( $text, $params, $parser ) {
+ return str_rot13( $text );
+ }
+
+ function functionTagCallback( &$parser, $frame, $code, $attribs ) {
+ return str_rot13( $code );
+ }
+}
diff --git a/www/wiki/tests/phpunit/includes/parser/TidyTest.php b/www/wiki/tests/phpunit/includes/parser/TidyTest.php
new file mode 100644
index 00000000..be5125c7
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/parser/TidyTest.php
@@ -0,0 +1,63 @@
+<?php
+
+/**
+ * @group Parser
+ */
+class TidyTest extends MediaWikiTestCase {
+
+ protected function setUp() {
+ parent::setUp();
+ if ( !MWTidy::isEnabled() ) {
+ $this->markTestSkipped( 'Tidy not found' );
+ }
+ }
+
+ /**
+ * @dataProvider provideTestWrapping
+ */
+ public function testTidyWrapping( $expected, $text, $msg = '' ) {
+ $text = MWTidy::tidy( $text );
+ // We don't care about where Tidy wants to stick is <p>s
+ $text = trim( preg_replace( '#</?p>#', '', $text ) );
+ // Windows, we love you!
+ $text = str_replace( "\r", '', $text );
+ $this->assertEquals( $expected, $text, $msg );
+ }
+
+ public static function provideTestWrapping() {
+ $testMathML = <<<'MathML'
+<math xmlns="http://www.w3.org/1998/Math/MathML">
+ <mrow>
+ <mi>a</mi>
+ <mo>&InvisibleTimes;</mo>
+ <msup>
+ <mi>x</mi>
+ <mn>2</mn>
+ </msup>
+ <mo>+</mo>
+ <mi>b</mi>
+ <mo>&InvisibleTimes; </mo>
+ <mi>x</mi>
+ <mo>+</mo>
+ <mi>c</mi>
+ </mrow>
+ </math>
+MathML;
+ return [
+ [
+ '<mw:editsection page="foo" section="bar">foo</mw:editsection>',
+ '<mw:editsection page="foo" section="bar">foo</mw:editsection>',
+ '<mw:editsection> should survive tidy'
+ ],
+ [
+ '<editsection page="foo" section="bar">foo</editsection>',
+ '<editsection page="foo" section="bar">foo</editsection>',
+ '<editsection> should survive tidy'
+ ],
+ [ '<mw:toc>foo</mw:toc>', '<mw:toc>foo</mw:toc>', '<mw:toc> should survive tidy' ],
+ [ "<link foo=\"bar\" />foo", '<link foo="bar"/>foo', '<link> should survive tidy' ],
+ [ "<meta foo=\"bar\" />foo", '<meta foo="bar"/>foo', '<meta> should survive tidy' ],
+ [ $testMathML, $testMathML, '<math> should survive tidy' ],
+ ];
+ }
+}