summaryrefslogtreecommitdiff
path: root/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php
diff options
context:
space:
mode:
Diffstat (limited to 'www/wiki/tests/phpunit/includes/tidy/BalancerTest.php')
-rw-r--r--www/wiki/tests/phpunit/includes/tidy/BalancerTest.php169
1 files changed, 169 insertions, 0 deletions
diff --git a/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php b/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php
new file mode 100644
index 00000000..8a4f662a
--- /dev/null
+++ b/www/wiki/tests/phpunit/includes/tidy/BalancerTest.php
@@ -0,0 +1,169 @@
+<?php
+
+class BalancerTest extends MediaWikiTestCase {
+
+ /**
+ * Anything that needs to happen before your tests should go here.
+ */
+ protected function setUp() {
+ // Be sure to do call the parent setup and teardown functions.
+ // This makes sure that all the various cleanup and restorations
+ // happen as they should (including the restoration for setMwGlobals).
+ parent::setUp();
+ }
+
+ /**
+ * @covers MediaWiki\Tidy\Balancer
+ * @covers MediaWiki\Tidy\BalanceSets
+ * @covers MediaWiki\Tidy\BalanceElement
+ * @covers MediaWiki\Tidy\BalanceStack
+ * @covers MediaWiki\Tidy\BalanceMarker
+ * @covers MediaWiki\Tidy\BalanceActiveFormattingElements
+ * @dataProvider provideBalancerTests
+ */
+ public function testBalancer( $description, $input, $expected, $useTidy ) {
+ $balancer = new MediaWiki\Tidy\Balancer( [
+ 'strict' => false, /* not strict */
+ 'allowedHtmlElements' => null, /* no sanitization */
+ 'tidyCompat' => $useTidy, /* standard parser */
+ 'allowComments' => true, /* comment parsing */
+ ] );
+ $output = $balancer->balance( $input );
+
+ // Ignore self-closing tags
+ $output = preg_replace( '/\s*\/>/', '>', $output );
+
+ $this->assertEquals( $expected, $output, $description );
+ }
+
+ public static function provideBalancerTests() {
+ // Get the tests from html5lib-tests.json
+ $json = json_decode( file_get_contents(
+ __DIR__ . '/html5lib-tests.json'
+ ), true );
+ // Munge this slightly into the format phpunit expects
+ // for providers, and filter out HTML constructs which
+ // the balancer doesn't support.
+ $tests = [];
+ $okre = "~ \A
+ (?i:<!DOCTYPE\ html>)?
+ <html><head></head><body>
+ .*
+ </body></html>
+ \z ~xs";
+ foreach ( $json as $filename => $cases ) {
+ foreach ( $cases as $case ) {
+ $html = $case['document']['html'];
+ if ( !preg_match( $okre, $html ) ) {
+ // Skip tests which involve stuff in the <head> or
+ // weird doctypes.
+ continue;
+ }
+ // We used to do this:
+ // $html = substr( $html, strlen( $start ), -strlen( $end ) );
+ // But now we use a different field in the test case,
+ // which reports how domino would parse this case in a
+ // no-quirks <body> context. (The original test case may
+ // have had a different context, or relied on quirks mode.)
+ $html = $case['document']['noQuirksBodyHtml'];
+ // Normalize case of SVG attributes.
+ $html = str_replace( 'foreignObject', 'foreignobject', $html );
+ // Normalize case of MathML attributes.
+ $html = str_replace( 'definitionURL', 'definitionurl', $html );
+
+ if (
+ isset( $case['document']['props']['comment'] ) &&
+ preg_match( ',<!--[^>]*<,', $html )
+ ) {
+ // Skip tests which include HTML comments containing
+ // the < character, which we don't support.
+ continue;
+ }
+ if ( strpos( $case['data'], '<![CDATA[' ) !== false ) {
+ // Skip tests involving <![CDATA[ ]]> quoting.
+ continue;
+ }
+ if (
+ stripos( $case['data'], '<!DOCTYPE' ) !== false &&
+ stripos( $case['data'], '<!DOCTYPE html>' ) === false
+ ) {
+ // Skip tests involving unusual doctypes.
+ continue;
+ }
+ $literalre = "~ <rdar: | < /? (
+ html | head | body | frame | frameset | plaintext
+ ) > ~xi";
+ if ( preg_match( $literalre, $case['data'] ) ) {
+ // Skip tests involving some literal tags, which are
+ // unsupported but don't show up in the expected output.
+ continue;
+ }
+ if (
+ isset( $case['document']['props']['tags']['iframe'] ) ||
+ isset( $case['document']['props']['tags']['noembed'] ) ||
+ isset( $case['document']['props']['tags']['noscript'] ) ||
+ isset( $case['document']['props']['tags']['script'] ) ||
+ isset( $case['document']['props']['tags']['svg script'] ) ||
+ isset( $case['document']['props']['tags']['svg title'] ) ||
+ isset( $case['document']['props']['tags']['title'] ) ||
+ isset( $case['document']['props']['tags']['xmp'] )
+ ) {
+ // Skip tests with unsupported tags which *do* show
+ // up in the expected output.
+ continue;
+ }
+ if (
+ $filename === 'entities01.dat' ||
+ $filename === 'entities02.dat' ||
+ preg_match( '/&([a-z]+|#x[0-9A-F]+);/i', $case['data'] ) ||
+ preg_match( '/^(&|&#|&#X|&#x|&#45|&x-test|&AMP)$/', $case['data'] )
+ ) {
+ // Skip tests involving entity encoding.
+ continue;
+ }
+ if (
+ isset( $case['document']['props']['tagWithLt'] ) ||
+ isset( $case['document']['props']['attrWithFunnyChar'] ) ||
+ preg_match( ':^(</b test|<di|<foo bar=qux/>)$:', $case['data'] ) ||
+ preg_match( ':</p<p>:', $case['data'] ) ||
+ preg_match( ':<b &=&amp>|<p/x/y/z>:', $case['data'] )
+ ) {
+ // Skip tests with funny tag or attribute names,
+ // which are really tests of the HTML tokenizer, not
+ // the tree builder.
+ continue;
+ }
+ if (
+ preg_match( ':encoding=" text/html "|type=" hidden":', $case['data'] )
+ ) {
+ // The Sanitizer normalizes whitespace in attribute
+ // values, which makes this test case invalid.
+ continue;
+ }
+ if ( $filename === 'plain-text-unsafe.dat' ) {
+ // Skip tests with ASCII null, etc.
+ continue;
+ }
+ $data = preg_replace(
+ '~<!DOCTYPE html>~i', '', $case['data']
+ );
+ $tests[] = [
+ $filename, # use better description?
+ $data,
+ $html,
+ false # strict HTML5 compat mode, no tidy
+ ];
+ }
+ }
+
+ # Some additional tests for mediawiki-specific features
+ $tests[] = [
+ 'Round-trip serialization for <pre>/<listing>/<textarea>',
+ "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
+ "<pre>\n\na</pre><listing>\n\nb</listing><textarea>\n\nc</textarea>",
+ true # use the tidy-compatible mode
+ ];
+
+ return $tests;
+ }
+}