summaryrefslogtreecommitdiff
path: root/www/wiki/includes/tidy/RemexDriver.php
blob: e02af88fd9f2bdf05dee00878c9079c20c7f5df1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
<?php

namespace MediaWiki\Tidy;

use RemexHtml\Serializer\Serializer;
use RemexHtml\Tokenizer\Tokenizer;
use RemexHtml\TreeBuilder\Dispatcher;
use RemexHtml\TreeBuilder\TreeBuilder;
use RemexHtml\TreeBuilder\TreeMutationTracer;

class RemexDriver extends TidyDriverBase {
	private $trace;
	private $pwrap;

	public function __construct( array $config ) {
		$config += [
			'treeMutationTrace' => false,
			'pwrap' => true
		];
		$this->trace = $config['treeMutationTrace'];
		$this->pwrap = $config['pwrap'];
		parent::__construct( $config );
	}

	public function tidy( $text ) {
		$formatter = new RemexCompatFormatter;
		$serializer = new Serializer( $formatter );
		if ( $this->pwrap ) {
			$munger = new RemexCompatMunger( $serializer );
		} else {
			$munger = $serializer;
		}
		if ( $this->trace ) {
			$tracer = new TreeMutationTracer( $munger, function ( $msg ) {
				wfDebug( "RemexHtml: $msg" );
			} );
		} else {
			$tracer = $munger;
		}
		$treeBuilder = new TreeBuilder( $tracer, [
			'ignoreErrors' => true,
			'ignoreNulls' => true,
		] );
		$dispatcher = new Dispatcher( $treeBuilder );
		$tokenizer = new Tokenizer( $dispatcher, $text, [
			'ignoreErrors' => true,
			'ignoreCharRefs' => true,
			'ignoreNulls' => true,
			'skipPreprocess' => true,
		] );
		$tokenizer->execute( [
			'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
			'fragmentName' => 'body'
		] );
		return $serializer->getResult();
	}
}