diff options
Diffstat (limited to 'www/wiki/tests/phpunit/includes/tidy/BalancerTest.php')
-rw-r--r-- | www/wiki/tests/phpunit/includes/tidy/BalancerTest.php | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php b/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php new file mode 100644 index 00000000..8a4f662a --- /dev/null +++ b/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php @@ -0,0 +1,169 @@ +<?php + +class BalancerTest extends MediaWikiTestCase { + + /** + * Anything that needs to happen before your tests should go here. + */ + protected function setUp() { + // Be sure to do call the parent setup and teardown functions. + // This makes sure that all the various cleanup and restorations + // happen as they should (including the restoration for setMwGlobals). + parent::setUp(); + } + + /** + * @covers MediaWiki\Tidy\Balancer + * @covers MediaWiki\Tidy\BalanceSets + * @covers MediaWiki\Tidy\BalanceElement + * @covers MediaWiki\Tidy\BalanceStack + * @covers MediaWiki\Tidy\BalanceMarker + * @covers MediaWiki\Tidy\BalanceActiveFormattingElements + * @dataProvider provideBalancerTests + */ + public function testBalancer( $description, $input, $expected, $useTidy ) { + $balancer = new MediaWiki\Tidy\Balancer( [ + 'strict' => false, /* not strict */ + 'allowedHtmlElements' => null, /* no sanitization */ + 'tidyCompat' => $useTidy, /* standard parser */ + 'allowComments' => true, /* comment parsing */ + ] ); + $output = $balancer->balance( $input ); + + // Ignore self-closing tags + $output = preg_replace( '/\s*\/>/', '>', $output ); + + $this->assertEquals( $expected, $output, $description ); + } + + public static function provideBalancerTests() { + // Get the tests from html5lib-tests.json + $json = json_decode( file_get_contents( + __DIR__ . '/html5lib-tests.json' + ), true ); + // Munge this slightly into the format phpunit expects + // for providers, and filter out HTML constructs which + // the balancer doesn't support. + $tests = []; + $okre = "~ \A + (?i:<!DOCTYPE\ html>)? + <html><head></head><body> + .* + </body></html> + \z ~xs"; + foreach ( $json as $filename => $cases ) { + foreach ( $cases as $case ) { + $html = $case['document']['html']; + if ( !preg_match( $okre, $html ) ) { + // Skip tests which involve stuff in the <head> or + // weird doctypes. + continue; + } + // We used to do this: + // $html = substr( $html, strlen( $start ), -strlen( $end ) ); + // But now we use a different field in the test case, + // which reports how domino would parse this case in a + // no-quirks <body> context. (The original test case may + // have had a different context, or relied on quirks mode.) + $html = $case['document']['noQuirksBodyHtml']; + // Normalize case of SVG attributes. + $html = str_replace( 'foreignObject', 'foreignobject', $html ); + // Normalize case of MathML attributes. + $html = str_replace( 'definitionURL', 'definitionurl', $html ); + + if ( + isset( $case['document']['props']['comment'] ) && + preg_match( ',<!--[^>]*<,', $html ) + ) { + // Skip tests which include HTML comments containing + // the < character, which we don't support. + continue; + } + if ( strpos( $case['data'], '<![CDATA[' ) !== false ) { + // Skip tests involving <![CDATA[ ]]> quoting. + continue; + } + if ( + stripos( $case['data'], '<!DOCTYPE' ) !== false && + stripos( $case['data'], '<!DOCTYPE html>' ) === false + ) { + // Skip tests involving unusual doctypes. + continue; + } + $literalre = "~ <rdar: | < /? ( + html | head | body | frame | frameset | plaintext + ) > ~xi"; + if ( preg_match( $literalre, $case['data'] ) ) { + // Skip tests involving some literal tags, which are + // unsupported but don't show up in the expected output. + continue; + } + if ( + isset( $case['document']['props']['tags']['iframe'] ) || + isset( $case['document']['props']['tags']['noembed'] ) || + isset( $case['document']['props']['tags']['noscript'] ) || + isset( $case['document']['props']['tags']['script'] ) || + isset( $case['document']['props']['tags']['svg script'] ) || + isset( $case['document']['props']['tags']['svg title'] ) || + isset( $case['document']['props']['tags']['title'] ) || + isset( $case['document']['props']['tags']['xmp'] ) + ) { + // Skip tests with unsupported tags which *do* show + // up in the expected output. + continue; + } + if ( + $filename === 'entities01.dat' || + $filename === 'entities02.dat' || + preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) || + preg_match( '/^(&|&#|&#X|&#x|-|&x-test|&)$/', $case['data'] ) + ) { + // Skip tests involving entity encoding. + continue; + } + if ( + isset( $case['document']['props']['tagWithLt'] ) || + isset( $case['document']['props']['attrWithFunnyChar'] ) || + preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) || + preg_match( ':</p<p>:', $case['data'] ) || + preg_match( ':<b &=&>|<p/x/y/z>:', $case['data'] ) + ) { + // Skip tests with funny tag or attribute names, + // which are really tests of the HTML tokenizer, not + // the tree builder. + continue; + } + if ( + preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] ) + ) { + // The Sanitizer normalizes whitespace in attribute + // values, which makes this test case invalid. + continue; + } + if ( $filename === 'plain-text-unsafe.dat' ) { + // Skip tests with ASCII null, etc. + continue; + } + $data = preg_replace( + '~<!DOCTYPE html>~i', '', $case['data'] + ); + $tests[] = [ + $filename, # use better description? + $data, + $html, + false # strict HTML5 compat mode, no tidy + ]; + } + } + + # Some additional tests for mediawiki-specific features + $tests[] = [ + 'Round-trip serialization for <pre>/<listing>/<textarea>', + "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>", + "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>", + true # use the tidy-compatible mode + ]; + + return $tests; + } +} |