diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/diff |
first commit
Diffstat (limited to 'www/wiki/includes/diff')
-rw-r--r-- | www/wiki/includes/diff/ArrayDiffFormatter.php | 82 | ||||
-rw-r--r-- | www/wiki/includes/diff/ComplexityException.php | 30 | ||||
-rw-r--r-- | www/wiki/includes/diff/DairikiDiff.php | 334 | ||||
-rw-r--r-- | www/wiki/includes/diff/DiffEngine.php | 841 | ||||
-rw-r--r-- | www/wiki/includes/diff/DiffFormatter.php | 254 | ||||
-rw-r--r-- | www/wiki/includes/diff/DifferenceEngine.php | 1558 | ||||
-rw-r--r-- | www/wiki/includes/diff/TableDiffFormatter.php | 215 | ||||
-rw-r--r-- | www/wiki/includes/diff/UnifiedDiffFormatter.php | 84 | ||||
-rw-r--r-- | www/wiki/includes/diff/WordAccumulator.php | 105 | ||||
-rw-r--r-- | www/wiki/includes/diff/WordLevelDiff.php | 139 |
10 files changed, 3642 insertions, 0 deletions
diff --git a/www/wiki/includes/diff/ArrayDiffFormatter.php b/www/wiki/includes/diff/ArrayDiffFormatter.php new file mode 100644 index 00000000..70a963ba --- /dev/null +++ b/www/wiki/includes/diff/ArrayDiffFormatter.php @@ -0,0 +1,82 @@ +<?php +/** + * Portions taken from phpwiki-1.3.3. + * + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ + +/** + * A pseudo-formatter that just passes along the Diff::$edits array + * @ingroup DifferenceEngine + */ +class ArrayDiffFormatter extends DiffFormatter { + + /** + * @param Diff $diff A Diff object. + * + * @return array[] List of associative arrays, each describing a difference. + */ + public function format( $diff ) { + $oldline = 1; + $newline = 1; + $retval = []; + foreach ( $diff->getEdits() as $edit ) { + switch ( $edit->getType() ) { + case 'add': + foreach ( $edit->getClosing() as $line ) { + $retval[] = [ + 'action' => 'add', + 'new' => $line, + 'newline' => $newline++ + ]; + } + break; + case 'delete': + foreach ( $edit->getOrig() as $line ) { + $retval[] = [ + 'action' => 'delete', + 'old' => $line, + 'oldline' => $oldline++, + ]; + } + break; + case 'change': + foreach ( $edit->getOrig() as $key => $line ) { + $retval[] = [ + 'action' => 'change', + 'old' => $line, + 'new' => $edit->getClosing( $key ), + 'oldline' => $oldline++, + 'newline' => $newline++, + ]; + } + break; + case 'copy': + $oldline += count( $edit->getOrig() ); + $newline += count( $edit->getOrig() ); + } + } + + return $retval; + } + +} diff --git a/www/wiki/includes/diff/ComplexityException.php b/www/wiki/includes/diff/ComplexityException.php new file mode 100644 index 00000000..10ca964a --- /dev/null +++ b/www/wiki/includes/diff/ComplexityException.php @@ -0,0 +1,30 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ + +namespace MediaWiki\Diff; + +use Exception; + +class ComplexityException extends Exception { + public function __construct() { + parent::__construct( 'Diff is too complex to generate' ); + } +} diff --git a/www/wiki/includes/diff/DairikiDiff.php b/www/wiki/includes/diff/DairikiDiff.php new file mode 100644 index 00000000..d76af31a --- /dev/null +++ b/www/wiki/includes/diff/DairikiDiff.php @@ -0,0 +1,334 @@ +<?php +/** + * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3) + * + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + * @defgroup DifferenceEngine DifferenceEngine + */ + +/** + * The base class for all other DiffOp classes. + * + * The classes that extend DiffOp are: DiffOpCopy, DiffOpDelete, DiffOpAdd and + * DiffOpChange. FakeDiffOp also extends DiffOp, but it is not located in this file. + * + * @private + * @ingroup DifferenceEngine + */ +abstract class DiffOp { + + /** + * @var string + */ + public $type; + + /** + * @var string[] + */ + public $orig; + + /** + * @var string[] + */ + public $closing; + + /** + * @return string + */ + public function getType() { + return $this->type; + } + + /** + * @return string[] + */ + public function getOrig() { + return $this->orig; + } + + /** + * @param int $i + * @return string[]|string|null + */ + public function getClosing( $i = null ) { + if ( $i === null ) { + return $this->closing; + } + if ( array_key_exists( $i, $this->closing ) ) { + return $this->closing[$i]; + } + return null; + } + + abstract public function reverse(); + + /** + * @return int + */ + public function norig() { + return $this->orig ? count( $this->orig ) : 0; + } + + /** + * @return int + */ + public function nclosing() { + return $this->closing ? count( $this->closing ) : 0; + } +} + +/** + * Extends DiffOp. Used to mark strings that have been + * copied from one string array to the other. + * + * @private + * @ingroup DifferenceEngine + */ +class DiffOpCopy extends DiffOp { + public $type = 'copy'; + + public function __construct( $orig, $closing = false ) { + if ( !is_array( $closing ) ) { + $closing = $orig; + } + $this->orig = $orig; + $this->closing = $closing; + } + + /** + * @return DiffOpCopy + */ + public function reverse() { + return new DiffOpCopy( $this->closing, $this->orig ); + } +} + +/** + * Extends DiffOp. Used to mark strings that have been + * deleted from the first string array. + * + * @private + * @ingroup DifferenceEngine + */ +class DiffOpDelete extends DiffOp { + public $type = 'delete'; + + public function __construct( $lines ) { + $this->orig = $lines; + $this->closing = false; + } + + /** + * @return DiffOpAdd + */ + public function reverse() { + return new DiffOpAdd( $this->orig ); + } +} + +/** + * Extends DiffOp. Used to mark strings that have been + * added from the first string array. + * + * @private + * @ingroup DifferenceEngine + */ +class DiffOpAdd extends DiffOp { + public $type = 'add'; + + public function __construct( $lines ) { + $this->closing = $lines; + $this->orig = false; + } + + /** + * @return DiffOpDelete + */ + public function reverse() { + return new DiffOpDelete( $this->closing ); + } +} + +/** + * Extends DiffOp. Used to mark strings that have been + * changed from the first string array (both added and subtracted). + * + * @private + * @ingroup DifferenceEngine + */ +class DiffOpChange extends DiffOp { + public $type = 'change'; + + public function __construct( $orig, $closing ) { + $this->orig = $orig; + $this->closing = $closing; + } + + /** + * @return DiffOpChange + */ + public function reverse() { + return new DiffOpChange( $this->closing, $this->orig ); + } +} + +/** + * Class representing a 'diff' between two sequences of strings. + * @todo document + * @private + * @ingroup DifferenceEngine + */ +class Diff { + + /** + * @var DiffOp[] + */ + public $edits; + + /** + * @var int If this diff complexity is exceeded, a ComplexityException is thrown + * 0 means no limit. + */ + protected $bailoutComplexity = 0; + + /** + * Computes diff between sequences of strings. + * + * @param string[] $from_lines An array of strings. + * Typically these are lines from a file. + * @param string[] $to_lines An array of strings. + * @throws \MediaWiki\Diff\ComplexityException + */ + public function __construct( $from_lines, $to_lines ) { + $eng = new DiffEngine; + $eng->setBailoutComplexity( $this->bailoutComplexity ); + $this->edits = $eng->diff( $from_lines, $to_lines ); + } + + /** + * @return DiffOp[] + */ + public function getEdits() { + return $this->edits; + } + + /** + * Compute reversed Diff. + * + * SYNOPSIS: + * + * $diff = new Diff($lines1, $lines2); + * $rev = $diff->reverse(); + * + * @return Object A Diff object representing the inverse of the + * original diff. + */ + public function reverse() { + $rev = $this; + $rev->edits = []; + /** @var DiffOp $edit */ + foreach ( $this->edits as $edit ) { + $rev->edits[] = $edit->reverse(); + } + + return $rev; + } + + /** + * Check for empty diff. + * + * @return bool True if two sequences were identical. + */ + public function isEmpty() { + foreach ( $this->edits as $edit ) { + if ( $edit->type != 'copy' ) { + return false; + } + } + + return true; + } + + /** + * Compute the length of the Longest Common Subsequence (LCS). + * + * This is mostly for diagnostic purposed. + * + * @return int The length of the LCS. + */ + public function lcs() { + $lcs = 0; + foreach ( $this->edits as $edit ) { + if ( $edit->type == 'copy' ) { + $lcs += count( $edit->orig ); + } + } + + return $lcs; + } + + /** + * Get the original set of lines. + * + * This reconstructs the $from_lines parameter passed to the + * constructor. + * + * @return string[] The original sequence of strings. + */ + public function orig() { + $lines = []; + + foreach ( $this->edits as $edit ) { + if ( $edit->orig ) { + array_splice( $lines, count( $lines ), 0, $edit->orig ); + } + } + + return $lines; + } + + /** + * Get the closing set of lines. + * + * This reconstructs the $to_lines parameter passed to the + * constructor. + * + * @return string[] The sequence of strings. + */ + public function closing() { + $lines = []; + + foreach ( $this->edits as $edit ) { + if ( $edit->closing ) { + array_splice( $lines, count( $lines ), 0, $edit->closing ); + } + } + + return $lines; + } +} + +/** + * @deprecated Alias for WordAccumulator, to be soon removed + */ +class HWLDFWordAccumulator extends MediaWiki\Diff\WordAccumulator { +} diff --git a/www/wiki/includes/diff/DiffEngine.php b/www/wiki/includes/diff/DiffEngine.php new file mode 100644 index 00000000..53378e58 --- /dev/null +++ b/www/wiki/includes/diff/DiffEngine.php @@ -0,0 +1,841 @@ +<?php +/** + * New version of the difference engine + * + * Copyright © 2008 Guy Van den Broeck <guy@guyvdb.eu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ +use MediaWiki\Diff\ComplexityException; + +/** + * This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which + * in turn is based on Myers' "An O(ND) difference algorithm and its variations" + * (http://citeseer.ist.psu.edu/myers86ond.html) with range compression (see Wu et al.'s + * "An O(NP) Sequence Comparison Algorithm"). + * + * This implementation supports an upper bound on the execution time. + * + * Some ideas (and a bit of code) are from analyze.c, from GNU + * diffutils-2.7, which can be found at: + * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz + * + * Complexity: O((M + N)D) worst case time, O(M + N + D^2) expected time, O(M + N) space + * + * @author Guy Van den Broeck, Geoffrey T. Dairiki, Tim Starling + * @ingroup DifferenceEngine + */ +class DiffEngine { + + // Input variables + private $from; + private $to; + private $m; + private $n; + + private $tooLong; + private $powLimit; + + protected $bailoutComplexity = 0; + + // State variables + private $maxDifferences; + private $lcsLengthCorrectedForHeuristic = false; + + // Output variables + public $length; + public $removed; + public $added; + public $heuristicUsed; + + function __construct( $tooLong = 2000000, $powLimit = 1.45 ) { + $this->tooLong = $tooLong; + $this->powLimit = $powLimit; + } + + /** + * Performs diff + * + * @param string[] $from_lines + * @param string[] $to_lines + * @throws ComplexityException + * + * @return DiffOp[] + */ + public function diff( $from_lines, $to_lines ) { + // Diff and store locally + $this->diffInternal( $from_lines, $to_lines ); + + // Merge edits when possible + $this->shiftBoundaries( $from_lines, $this->removed, $this->added ); + $this->shiftBoundaries( $to_lines, $this->added, $this->removed ); + + // Compute the edit operations. + $n_from = count( $from_lines ); + $n_to = count( $to_lines ); + + $edits = []; + $xi = $yi = 0; + while ( $xi < $n_from || $yi < $n_to ) { + assert( $yi < $n_to || $this->removed[$xi] ); + assert( $xi < $n_from || $this->added[$yi] ); + + // Skip matching "snake". + $copy = []; + while ( $xi < $n_from && $yi < $n_to + && !$this->removed[$xi] && !$this->added[$yi] + ) { + $copy[] = $from_lines[$xi++]; + ++$yi; + } + if ( $copy ) { + $edits[] = new DiffOpCopy( $copy ); + } + + // Find deletes & adds. + $delete = []; + while ( $xi < $n_from && $this->removed[$xi] ) { + $delete[] = $from_lines[$xi++]; + } + + $add = []; + while ( $yi < $n_to && $this->added[$yi] ) { + $add[] = $to_lines[$yi++]; + } + + if ( $delete && $add ) { + $edits[] = new DiffOpChange( $delete, $add ); + } elseif ( $delete ) { + $edits[] = new DiffOpDelete( $delete ); + } elseif ( $add ) { + $edits[] = new DiffOpAdd( $add ); + } + } + + return $edits; + } + + /** + * Sets the complexity (in comparison operations) that can't be exceeded + * @param int $value + */ + public function setBailoutComplexity( $value ) { + $this->bailoutComplexity = $value; + } + + /** + * Adjust inserts/deletes of identical lines to join changes + * as much as possible. + * + * We do something when a run of changed lines include a + * line at one end and has an excluded, identical line at the other. + * We are free to choose which identical line is included. + * `compareseq' usually chooses the one at the beginning, + * but usually it is cleaner to consider the following identical line + * to be the "change". + * + * This is extracted verbatim from analyze.c (GNU diffutils-2.7). + * + * @param string[] $lines + * @param string[] $changed + * @param string[] $other_changed + */ + private function shiftBoundaries( array $lines, array &$changed, array $other_changed ) { + $i = 0; + $j = 0; + + assert( count( $lines ) == count( $changed ) ); + $len = count( $lines ); + $other_len = count( $other_changed ); + + while ( 1 ) { + /* + * Scan forwards to find beginning of another run of changes. + * Also keep track of the corresponding point in the other file. + * + * Throughout this code, $i and $j are adjusted together so that + * the first $i elements of $changed and the first $j elements + * of $other_changed both contain the same number of zeros + * (unchanged lines). + * Furthermore, $j is always kept so that $j == $other_len or + * $other_changed[$j] == false. + */ + while ( $j < $other_len && $other_changed[$j] ) { + $j++; + } + + while ( $i < $len && !$changed[$i] ) { + assert( $j < $other_len && !$other_changed[$j] ); + $i++; + $j++; + while ( $j < $other_len && $other_changed[$j] ) { + $j++; + } + } + + if ( $i == $len ) { + break; + } + + $start = $i; + + // Find the end of this run of changes. + while ( ++$i < $len && $changed[$i] ) { + continue; + } + + do { + /* + * Record the length of this run of changes, so that + * we can later determine whether the run has grown. + */ + $runlength = $i - $start; + + /* + * Move the changed region back, so long as the + * previous unchanged line matches the last changed one. + * This merges with previous changed regions. + */ + while ( $start > 0 && $lines[$start - 1] == $lines[$i - 1] ) { + $changed[--$start] = 1; + $changed[--$i] = false; + while ( $start > 0 && $changed[$start - 1] ) { + $start--; + } + assert( $j > 0 ); + while ( $other_changed[--$j] ) { + continue; + } + assert( $j >= 0 && !$other_changed[$j] ); + } + + /* + * Set CORRESPONDING to the end of the changed run, at the last + * point where it corresponds to a changed run in the other file. + * CORRESPONDING == LEN means no such point has been found. + */ + $corresponding = $j < $other_len ? $i : $len; + + /* + * Move the changed region forward, so long as the + * first changed line matches the following unchanged one. + * This merges with following changed regions. + * Do this second, so that if there are no merges, + * the changed region is moved forward as far as possible. + */ + while ( $i < $len && $lines[$start] == $lines[$i] ) { + $changed[$start++] = false; + $changed[$i++] = 1; + while ( $i < $len && $changed[$i] ) { + $i++; + } + + assert( $j < $other_len && !$other_changed[$j] ); + $j++; + if ( $j < $other_len && $other_changed[$j] ) { + $corresponding = $i; + while ( $j < $other_len && $other_changed[$j] ) { + $j++; + } + } + } + } while ( $runlength != $i - $start ); + + /* + * If possible, move the fully-merged run of changes + * back to a corresponding run in the other file. + */ + while ( $corresponding < $i ) { + $changed[--$start] = 1; + $changed[--$i] = 0; + assert( $j > 0 ); + while ( $other_changed[--$j] ) { + continue; + } + assert( $j >= 0 && !$other_changed[$j] ); + } + } + } + + /** + * @param string[] $from + * @param string[] $to + * @throws ComplexityException + */ + protected function diffInternal( array $from, array $to ) { + // remember initial lengths + $m = count( $from ); + $n = count( $to ); + + $this->heuristicUsed = false; + + // output + $removed = $m > 0 ? array_fill( 0, $m, true ) : []; + $added = $n > 0 ? array_fill( 0, $n, true ) : []; + + // reduce the complexity for the next step (intentionally done twice) + // remove common tokens at the start + $i = 0; + while ( $i < $m && $i < $n && $from[$i] === $to[$i] ) { + $removed[$i] = $added[$i] = false; + unset( $from[$i], $to[$i] ); + ++$i; + } + + // remove common tokens at the end + $j = 1; + while ( $i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j] ) { + $removed[$m - $j] = $added[$n - $j] = false; + unset( $from[$m - $j], $to[$n - $j] ); + ++$j; + } + + $this->from = $newFromIndex = $this->to = $newToIndex = []; + + // remove tokens not in both sequences + $shared = []; + foreach ( $from as $key ) { + $shared[$key] = false; + } + + foreach ( $to as $index => &$el ) { + if ( array_key_exists( $el, $shared ) ) { + // keep it + $this->to[] = $el; + $shared[$el] = true; + $newToIndex[] = $index; + } + } + foreach ( $from as $index => &$el ) { + if ( $shared[$el] ) { + // keep it + $this->from[] = $el; + $newFromIndex[] = $index; + } + } + + unset( $shared, $from, $to ); + + $this->m = count( $this->from ); + $this->n = count( $this->to ); + + if ( $this->bailoutComplexity > 0 && $this->m * $this->n > $this->bailoutComplexity ) { + throw new ComplexityException(); + } + + $this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : []; + $this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : []; + + if ( $this->m == 0 || $this->n == 0 ) { + $this->length = 0; + } else { + $this->maxDifferences = ceil( ( $this->m + $this->n ) / 2.0 ); + if ( $this->m * $this->n > $this->tooLong ) { + // limit complexity to D^POW_LIMIT for long sequences + $this->maxDifferences = floor( pow( $this->maxDifferences, $this->powLimit - 1.0 ) ); + wfDebug( "Limiting max number of differences to $this->maxDifferences\n" ); + } + + /* + * The common prefixes and suffixes are always part of some LCS, include + * them now to reduce our search space + */ + $max = min( $this->m, $this->n ); + for ( $forwardBound = 0; $forwardBound < $max + && $this->from[$forwardBound] === $this->to[$forwardBound]; + ++$forwardBound + ) { + $this->removed[$forwardBound] = $this->added[$forwardBound] = false; + } + + $backBoundL1 = $this->m - 1; + $backBoundL2 = $this->n - 1; + + while ( $backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound + && $this->from[$backBoundL1] === $this->to[$backBoundL2] + ) { + $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false; + } + + $temp = array_fill( 0, $this->m + $this->n + 1, 0 ); + $V = [ $temp, $temp ]; + $snake = [ 0, 0, 0 ]; + + $this->length = $forwardBound + $this->m - $backBoundL1 - 1 + + $this->lcs_rec( + $forwardBound, + $backBoundL1, + $forwardBound, + $backBoundL2, + $V, + $snake + ); + } + + $this->m = $m; + $this->n = $n; + + $this->length += $i + $j - 1; + + foreach ( $this->removed as $key => &$removed_elem ) { + if ( !$removed_elem ) { + $removed[$newFromIndex[$key]] = false; + } + } + foreach ( $this->added as $key => &$added_elem ) { + if ( !$added_elem ) { + $added[$newToIndex[$key]] = false; + } + } + $this->removed = $removed; + $this->added = $added; + } + + function diff_range( $from_lines, $to_lines ) { + // Diff and store locally + $this->diff( $from_lines, $to_lines ); + unset( $from_lines, $to_lines ); + + $ranges = []; + $xi = $yi = 0; + while ( $xi < $this->m || $yi < $this->n ) { + // Matching "snake". + while ( $xi < $this->m && $yi < $this->n + && !$this->removed[$xi] + && !$this->added[$yi] + ) { + ++$xi; + ++$yi; + } + // Find deletes & adds. + $xstart = $xi; + while ( $xi < $this->m && $this->removed[$xi] ) { + ++$xi; + } + + $ystart = $yi; + while ( $yi < $this->n && $this->added[$yi] ) { + ++$yi; + } + + if ( $xi > $xstart || $yi > $ystart ) { + $ranges[] = new RangeDifference( $xstart, $xi, $ystart, $yi ); + } + } + + return $ranges; + } + + private function lcs_rec( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) { + // check that both sequences are non-empty + if ( $bottoml1 > $topl1 || $bottoml2 > $topl2 ) { + return 0; + } + + $d = $this->find_middle_snake( $bottoml1, $topl1, $bottoml2, + $topl2, $V, $snake ); + + // need to store these so we don't lose them when they're + // overwritten by the recursion + $len = $snake[2]; + $startx = $snake[0]; + $starty = $snake[1]; + + // the middle snake is part of the LCS, store it + for ( $i = 0; $i < $len; ++$i ) { + $this->removed[$startx + $i] = $this->added[$starty + $i] = false; + } + + if ( $d > 1 ) { + return $len + + $this->lcs_rec( $bottoml1, $startx - 1, $bottoml2, + $starty - 1, $V, $snake ) + + $this->lcs_rec( $startx + $len, $topl1, $starty + $len, + $topl2, $V, $snake ); + } elseif ( $d == 1 ) { + /* + * In this case the sequences differ by exactly 1 line. We have + * already saved all the lines after the difference in the for loop + * above, now we need to save all the lines before the difference. + */ + $max = min( $startx - $bottoml1, $starty - $bottoml2 ); + for ( $i = 0; $i < $max; ++$i ) { + $this->removed[$bottoml1 + $i] = + $this->added[$bottoml2 + $i] = false; + } + + return $max + $len; + } + + return $len; + } + + private function find_middle_snake( $bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake ) { + $from = &$this->from; + $to = &$this->to; + $V0 = &$V[0]; + $V1 = &$V[1]; + $snake0 = &$snake[0]; + $snake1 = &$snake[1]; + $snake2 = &$snake[2]; + $bottoml1_min_1 = $bottoml1 - 1; + $bottoml2_min_1 = $bottoml2 - 1; + $N = $topl1 - $bottoml1_min_1; + $M = $topl2 - $bottoml2_min_1; + $delta = $N - $M; + $maxabsx = $N + $bottoml1; + $maxabsy = $M + $bottoml2; + $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) ); + + // value_to_add_forward: a 0 or 1 that we add to the start + // offset to make it odd/even + if ( ( $M & 1 ) == 1 ) { + $value_to_add_forward = 1; + } else { + $value_to_add_forward = 0; + } + + if ( ( $N & 1 ) == 1 ) { + $value_to_add_backward = 1; + } else { + $value_to_add_backward = 0; + } + + $start_forward = -$M; + $end_forward = $N; + $start_backward = -$N; + $end_backward = $M; + + $limit_min_1 = $limit - 1; + $limit_plus_1 = $limit + 1; + + $V0[$limit_plus_1] = 0; + $V1[$limit_min_1] = $N; + $limit = min( $this->maxDifferences, ceil( ( $N + $M ) / 2 ) ); + + if ( ( $delta & 1 ) == 1 ) { + for ( $d = 0; $d <= $limit; ++$d ) { + $start_diag = max( $value_to_add_forward + $start_forward, -$d ); + $end_diag = min( $end_forward, $d ); + $value_to_add_forward = 1 - $value_to_add_forward; + + // compute forward furthest reaching paths + for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { + if ( $k == -$d || ( $k < $d + && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] ) + ) { + $x = $V0[$limit_plus_1 + $k]; + } else { + $x = $V0[$limit_min_1 + $k] + 1; + } + + $absx = $snake0 = $x + $bottoml1; + $absy = $snake1 = $x - $k + $bottoml2; + + while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) { + ++$absx; + ++$absy; + } + $x = $absx - $bottoml1; + + $snake2 = $absx - $snake0; + $V0[$limit + $k] = $x; + if ( $k >= $delta - $d + 1 && $k <= $delta + $d - 1 + && $x >= $V1[$limit + $k - $delta] + ) { + return 2 * $d - 1; + } + + // check to see if we can cut down the diagonal range + if ( $x >= $N && $end_forward > $k - 1 ) { + $end_forward = $k - 1; + } elseif ( $absy - $bottoml2 >= $M ) { + $start_forward = $k + 1; + $value_to_add_forward = 0; + } + } + + $start_diag = max( $value_to_add_backward + $start_backward, -$d ); + $end_diag = min( $end_backward, $d ); + $value_to_add_backward = 1 - $value_to_add_backward; + + // compute backward furthest reaching paths + for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { + if ( $k == $d + || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] ) + ) { + $x = $V1[$limit_min_1 + $k]; + } else { + $x = $V1[$limit_plus_1 + $k] - 1; + } + + $y = $x - $k - $delta; + + $snake2 = 0; + while ( $x > 0 && $y > 0 + && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1] + ) { + --$x; + --$y; + ++$snake2; + } + $V1[$limit + $k] = $x; + + // check to see if we can cut down our diagonal range + if ( $x <= 0 ) { + $start_backward = $k + 1; + $value_to_add_backward = 0; + } elseif ( $y <= 0 && $end_backward > $k - 1 ) { + $end_backward = $k - 1; + } + } + } + } else { + for ( $d = 0; $d <= $limit; ++$d ) { + $start_diag = max( $value_to_add_forward + $start_forward, -$d ); + $end_diag = min( $end_forward, $d ); + $value_to_add_forward = 1 - $value_to_add_forward; + + // compute forward furthest reaching paths + for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { + if ( $k == -$d + || ( $k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k] ) + ) { + $x = $V0[$limit_plus_1 + $k]; + } else { + $x = $V0[$limit_min_1 + $k] + 1; + } + + $absx = $snake0 = $x + $bottoml1; + $absy = $snake1 = $x - $k + $bottoml2; + + while ( $absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy] ) { + ++$absx; + ++$absy; + } + $x = $absx - $bottoml1; + $snake2 = $absx - $snake0; + $V0[$limit + $k] = $x; + + // check to see if we can cut down the diagonal range + if ( $x >= $N && $end_forward > $k - 1 ) { + $end_forward = $k - 1; + } elseif ( $absy - $bottoml2 >= $M ) { + $start_forward = $k + 1; + $value_to_add_forward = 0; + } + } + + $start_diag = max( $value_to_add_backward + $start_backward, -$d ); + $end_diag = min( $end_backward, $d ); + $value_to_add_backward = 1 - $value_to_add_backward; + + // compute backward furthest reaching paths + for ( $k = $start_diag; $k <= $end_diag; $k += 2 ) { + if ( $k == $d + || ( $k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k] ) + ) { + $x = $V1[$limit_min_1 + $k]; + } else { + $x = $V1[$limit_plus_1 + $k] - 1; + } + + $y = $x - $k - $delta; + + $snake2 = 0; + while ( $x > 0 && $y > 0 + && $from[$x + $bottoml1_min_1] === $to[$y + $bottoml2_min_1] + ) { + --$x; + --$y; + ++$snake2; + } + $V1[$limit + $k] = $x; + + if ( $k >= -$delta - $d && $k <= $d - $delta + && $x <= $V0[$limit + $k + $delta] + ) { + $snake0 = $bottoml1 + $x; + $snake1 = $bottoml2 + $y; + + return 2 * $d; + } + + // check to see if we can cut down our diagonal range + if ( $x <= 0 ) { + $start_backward = $k + 1; + $value_to_add_backward = 0; + } elseif ( $y <= 0 && $end_backward > $k - 1 ) { + $end_backward = $k - 1; + } + } + } + } + /* + * computing the true LCS is too expensive, instead find the diagonal + * with the most progress and pretend a midle snake of length 0 occurs + * there. + */ + + $most_progress = self::findMostProgress( $M, $N, $limit, $V ); + + $snake0 = $bottoml1 + $most_progress[0]; + $snake1 = $bottoml2 + $most_progress[1]; + $snake2 = 0; + wfDebug( "Computing the LCS is too expensive. Using a heuristic.\n" ); + $this->heuristicUsed = true; + + return 5; /* + * HACK: since we didn't really finish the LCS computation + * we don't really know the length of the SES. We don't do + * anything with the result anyway, unless it's <=1. We know + * for a fact SES > 1 so 5 is as good a number as any to + * return here + */ + } + + private static function findMostProgress( $M, $N, $limit, $V ) { + $delta = $N - $M; + + if ( ( $M & 1 ) == ( $limit & 1 ) ) { + $forward_start_diag = max( -$M, -$limit ); + } else { + $forward_start_diag = max( 1 - $M, -$limit ); + } + + $forward_end_diag = min( $N, $limit ); + + if ( ( $N & 1 ) == ( $limit & 1 ) ) { + $backward_start_diag = max( -$N, -$limit ); + } else { + $backward_start_diag = max( 1 - $N, -$limit ); + } + + $backward_end_diag = -min( $M, $limit ); + + $temp = [ 0, 0, 0 ]; + + $max_progress = array_fill( 0, ceil( max( $forward_end_diag - $forward_start_diag, + $backward_end_diag - $backward_start_diag ) / 2 ), $temp ); + $num_progress = 0; // the 1st entry is current, it is initialized + // with 0s + + // first search the forward diagonals + for ( $k = $forward_start_diag; $k <= $forward_end_diag; $k += 2 ) { + $x = $V[0][$limit + $k]; + $y = $x - $k; + if ( $x > $N || $y > $M ) { + continue; + } + + $progress = $x + $y; + if ( $progress > $max_progress[0][2] ) { + $num_progress = 0; + $max_progress[0][0] = $x; + $max_progress[0][1] = $y; + $max_progress[0][2] = $progress; + } elseif ( $progress == $max_progress[0][2] ) { + ++$num_progress; + $max_progress[$num_progress][0] = $x; + $max_progress[$num_progress][1] = $y; + $max_progress[$num_progress][2] = $progress; + } + } + + $max_progress_forward = true; // initially the maximum + // progress is in the forward + // direction + + // now search the backward diagonals + for ( $k = $backward_start_diag; $k <= $backward_end_diag; $k += 2 ) { + $x = $V[1][$limit + $k]; + $y = $x - $k - $delta; + if ( $x < 0 || $y < 0 ) { + continue; + } + + $progress = $N - $x + $M - $y; + if ( $progress > $max_progress[0][2] ) { + $num_progress = 0; + $max_progress_forward = false; + $max_progress[0][0] = $x; + $max_progress[0][1] = $y; + $max_progress[0][2] = $progress; + } elseif ( $progress == $max_progress[0][2] && !$max_progress_forward ) { + ++$num_progress; + $max_progress[$num_progress][0] = $x; + $max_progress[$num_progress][1] = $y; + $max_progress[$num_progress][2] = $progress; + } + } + + // return the middle diagonal with maximal progress. + return $max_progress[(int)floor( $num_progress / 2 )]; + } + + /** + * @return mixed + */ + public function getLcsLength() { + if ( $this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic ) { + $this->lcsLengthCorrectedForHeuristic = true; + $this->length = $this->m - array_sum( $this->added ); + } + + return $this->length; + } + +} + +/** + * Alternative representation of a set of changes, by the index + * ranges that are changed. + * + * @ingroup DifferenceEngine + */ +class RangeDifference { + + /** @var int */ + public $leftstart; + + /** @var int */ + public $leftend; + + /** @var int */ + public $leftlength; + + /** @var int */ + public $rightstart; + + /** @var int */ + public $rightend; + + /** @var int */ + public $rightlength; + + function __construct( $leftstart, $leftend, $rightstart, $rightend ) { + $this->leftstart = $leftstart; + $this->leftend = $leftend; + $this->leftlength = $leftend - $leftstart; + $this->rightstart = $rightstart; + $this->rightend = $rightend; + $this->rightlength = $rightend - $rightstart; + } + +} diff --git a/www/wiki/includes/diff/DiffFormatter.php b/www/wiki/includes/diff/DiffFormatter.php new file mode 100644 index 00000000..07124c02 --- /dev/null +++ b/www/wiki/includes/diff/DiffFormatter.php @@ -0,0 +1,254 @@ +<?php +/** + * Base for diff rendering classes. Portions taken from phpwiki-1.3.3. + * + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ + +/** + * Base class for diff formatters + * + * This class formats the diff in classic diff format. + * It is intended that this class be customized via inheritance, + * to obtain fancier outputs. + * @todo document + * @ingroup DifferenceEngine + */ +abstract class DiffFormatter { + + /** @var int Number of leading context "lines" to preserve. + * + * This should be left at zero for this class, but subclasses + * may want to set this to other values. + */ + protected $leadingContextLines = 0; + + /** @var int Number of trailing context "lines" to preserve. + * + * This should be left at zero for this class, but subclasses + * may want to set this to other values. + */ + protected $trailingContextLines = 0; + + /** @var string The output buffer; holds the output while it is built. */ + private $result = ''; + + /** + * Format a diff. + * + * @param Diff $diff + * + * @return string The formatted output. + */ + public function format( $diff ) { + $xi = $yi = 1; + $block = false; + $context = []; + + $nlead = $this->leadingContextLines; + $ntrail = $this->trailingContextLines; + + $this->startDiff(); + + // Initialize $x0 and $y0 to prevent IDEs from getting confused. + $x0 = $y0 = 0; + foreach ( $diff->edits as $edit ) { + if ( $edit->type == 'copy' ) { + if ( is_array( $block ) ) { + if ( count( $edit->orig ) <= $nlead + $ntrail ) { + $block[] = $edit; + } else { + if ( $ntrail ) { + $context = array_slice( $edit->orig, 0, $ntrail ); + $block[] = new DiffOpCopy( $context ); + } + $this->block( $x0, $ntrail + $xi - $x0, + $y0, $ntrail + $yi - $y0, + $block ); + $block = false; + } + } + $context = $edit->orig; + } else { + if ( !is_array( $block ) ) { + $context = array_slice( $context, count( $context ) - $nlead ); + $x0 = $xi - count( $context ); + $y0 = $yi - count( $context ); + $block = []; + if ( $context ) { + $block[] = new DiffOpCopy( $context ); + } + } + $block[] = $edit; + } + + if ( $edit->orig ) { + $xi += count( $edit->orig ); + } + if ( $edit->closing ) { + $yi += count( $edit->closing ); + } + } + + if ( is_array( $block ) ) { + $this->block( $x0, $xi - $x0, + $y0, $yi - $y0, + $block ); + } + + $end = $this->endDiff(); + + return $end; + } + + /** + * @param int $xbeg + * @param int $xlen + * @param int $ybeg + * @param int $ylen + * @param array &$edits + * + * @throws MWException If the edit type is not known. + */ + protected function block( $xbeg, $xlen, $ybeg, $ylen, &$edits ) { + $this->startBlock( $this->blockHeader( $xbeg, $xlen, $ybeg, $ylen ) ); + foreach ( $edits as $edit ) { + if ( $edit->type == 'copy' ) { + $this->context( $edit->orig ); + } elseif ( $edit->type == 'add' ) { + $this->added( $edit->closing ); + } elseif ( $edit->type == 'delete' ) { + $this->deleted( $edit->orig ); + } elseif ( $edit->type == 'change' ) { + $this->changed( $edit->orig, $edit->closing ); + } else { + throw new MWException( "Unknown edit type: {$edit->type}" ); + } + } + $this->endBlock(); + } + + protected function startDiff() { + $this->result = ''; + } + + /** + * Writes a string to the output buffer. + * + * @param string $text + */ + protected function writeOutput( $text ) { + $this->result .= $text; + } + + /** + * @return string + */ + protected function endDiff() { + $val = $this->result; + $this->result = ''; + + return $val; + } + + /** + * @param int $xbeg + * @param int $xlen + * @param int $ybeg + * @param int $ylen + * + * @return string + */ + protected function blockHeader( $xbeg, $xlen, $ybeg, $ylen ) { + if ( $xlen > 1 ) { + $xbeg .= ',' . ( $xbeg + $xlen - 1 ); + } + if ( $ylen > 1 ) { + $ybeg .= ',' . ( $ybeg + $ylen - 1 ); + } + + return $xbeg . ( $xlen ? ( $ylen ? 'c' : 'd' ) : 'a' ) . $ybeg; + } + + /** + * Called at the start of a block of connected edits. + * This default implementation writes the header and a newline to the output buffer. + * + * @param string $header + */ + protected function startBlock( $header ) { + $this->writeOutput( $header . "\n" ); + } + + /** + * Called at the end of a block of connected edits. + * This default implementation does nothing. + */ + protected function endBlock() { + } + + /** + * Writes all (optionally prefixed) lines to the output buffer, separated by newlines. + * + * @param string[] $lines + * @param string $prefix + */ + protected function lines( $lines, $prefix = ' ' ) { + foreach ( $lines as $line ) { + $this->writeOutput( "$prefix $line\n" ); + } + } + + /** + * @param string[] $lines + */ + protected function context( $lines ) { + $this->lines( $lines ); + } + + /** + * @param string[] $lines + */ + protected function added( $lines ) { + $this->lines( $lines, '>' ); + } + + /** + * @param string[] $lines + */ + protected function deleted( $lines ) { + $this->lines( $lines, '<' ); + } + + /** + * Writes the two sets of lines to the output buffer, separated by "---" and a newline. + * + * @param string[] $orig + * @param string[] $closing + */ + protected function changed( $orig, $closing ) { + $this->deleted( $orig ); + $this->writeOutput( "---\n" ); + $this->added( $closing ); + } + +} diff --git a/www/wiki/includes/diff/DifferenceEngine.php b/www/wiki/includes/diff/DifferenceEngine.php new file mode 100644 index 00000000..8f57c578 --- /dev/null +++ b/www/wiki/includes/diff/DifferenceEngine.php @@ -0,0 +1,1558 @@ +<?php +/** + * User interface for the difference engine. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ +use MediaWiki\MediaWikiServices; +use MediaWiki\Shell\Shell; + +/** + * @todo document + * @ingroup DifferenceEngine + */ +class DifferenceEngine extends ContextSource { + /** + * Constant to indicate diff cache compatibility. + * Bump this when changing the diff formatting in a way that + * fixes important bugs or such to force cached diff views to + * clear. + */ + const DIFF_VERSION = '1.12'; + + /** @var int */ + public $mOldid; + + /** @var int */ + public $mNewid; + + private $mOldTags; + private $mNewTags; + + /** @var Content */ + public $mOldContent; + + /** @var Content */ + public $mNewContent; + + /** @var Language */ + protected $mDiffLang; + + /** @var Title */ + public $mOldPage; + + /** @var Title */ + public $mNewPage; + + /** @var Revision */ + public $mOldRev; + + /** @var Revision */ + public $mNewRev; + + /** @var bool Have the revisions IDs been loaded */ + private $mRevisionsIdsLoaded = false; + + /** @var bool Have the revisions been loaded */ + public $mRevisionsLoaded = false; + + /** @var int How many text blobs have been loaded, 0, 1 or 2? */ + public $mTextLoaded = 0; + + /** @var bool Was the diff fetched from cache? */ + public $mCacheHit = false; + + /** + * Set this to true to add debug info to the HTML output. + * Warning: this may cause RSS readers to spuriously mark articles as "new" + * (T22601) + */ + public $enableDebugComment = false; + + /** @var bool If true, line X is not displayed when X is 1, for example + * to increase readability and conserve space with many small diffs. + */ + protected $mReducedLineNumbers = false; + + /** @var string Link to action=markpatrolled */ + protected $mMarkPatrolledLink = null; + + /** @var bool Show rev_deleted content if allowed */ + protected $unhide = false; + + /** @var bool Refresh the diff cache */ + protected $mRefreshCache = false; + + /**#@-*/ + + /** + * @param IContextSource $context Context to use, anything else will be ignored + * @param int $old Old ID we want to show and diff with. + * @param string|int $new Either revision ID or 'prev' or 'next'. Default: 0. + * @param int $rcid Deprecated, no longer used! + * @param bool $refreshCache If set, refreshes the diff cache + * @param bool $unhide If set, allow viewing deleted revs + */ + public function __construct( $context = null, $old = 0, $new = 0, $rcid = 0, + $refreshCache = false, $unhide = false + ) { + if ( $context instanceof IContextSource ) { + $this->setContext( $context ); + } + + wfDebug( "DifferenceEngine old '$old' new '$new' rcid '$rcid'\n" ); + + $this->mOldid = $old; + $this->mNewid = $new; + $this->mRefreshCache = $refreshCache; + $this->unhide = $unhide; + } + + /** + * @param bool $value + */ + public function setReducedLineNumbers( $value = true ) { + $this->mReducedLineNumbers = $value; + } + + /** + * @return Language + */ + public function getDiffLang() { + if ( $this->mDiffLang === null ) { + # Default language in which the diff text is written. + $this->mDiffLang = $this->getTitle()->getPageLanguage(); + } + + return $this->mDiffLang; + } + + /** + * @return bool + */ + public function wasCacheHit() { + return $this->mCacheHit; + } + + /** + * @return int + */ + public function getOldid() { + $this->loadRevisionIds(); + + return $this->mOldid; + } + + /** + * @return bool|int + */ + public function getNewid() { + $this->loadRevisionIds(); + + return $this->mNewid; + } + + /** + * Look up a special:Undelete link to the given deleted revision id, + * as a workaround for being unable to load deleted diffs in currently. + * + * @param int $id Revision ID + * + * @return string|bool Link HTML or false + */ + public function deletedLink( $id ) { + if ( $this->getUser()->isAllowed( 'deletedhistory' ) ) { + $dbr = wfGetDB( DB_REPLICA ); + $arQuery = Revision::getArchiveQueryInfo(); + $row = $dbr->selectRow( + $arQuery['tables'], + array_merge( $arQuery['fields'], [ 'ar_namespace', 'ar_title' ] ), + [ 'ar_rev_id' => $id ], + __METHOD__, + [], + $arQuery['joins'] + ); + if ( $row ) { + $rev = Revision::newFromArchiveRow( $row ); + $title = Title::makeTitleSafe( $row->ar_namespace, $row->ar_title ); + + return SpecialPage::getTitleFor( 'Undelete' )->getFullURL( [ + 'target' => $title->getPrefixedText(), + 'timestamp' => $rev->getTimestamp() + ] ); + } + } + + return false; + } + + /** + * Build a wikitext link toward a deleted revision, if viewable. + * + * @param int $id Revision ID + * + * @return string Wikitext fragment + */ + public function deletedIdMarker( $id ) { + $link = $this->deletedLink( $id ); + if ( $link ) { + return "[$link $id]"; + } else { + return (string)$id; + } + } + + private function showMissingRevision() { + $out = $this->getOutput(); + + $missing = []; + if ( $this->mOldRev === null || + ( $this->mOldRev && $this->mOldContent === null ) + ) { + $missing[] = $this->deletedIdMarker( $this->mOldid ); + } + if ( $this->mNewRev === null || + ( $this->mNewRev && $this->mNewContent === null ) + ) { + $missing[] = $this->deletedIdMarker( $this->mNewid ); + } + + $out->setPageTitle( $this->msg( 'errorpagetitle' ) ); + $msg = $this->msg( 'difference-missing-revision' ) + ->params( $this->getLanguage()->listToText( $missing ) ) + ->numParams( count( $missing ) ) + ->parseAsBlock(); + $out->addHTML( $msg ); + } + + public function showDiffPage( $diffOnly = false ) { + # Allow frames except in certain special cases + $out = $this->getOutput(); + $out->allowClickjacking(); + $out->setRobotPolicy( 'noindex,nofollow' ); + + // Allow extensions to add any extra output here + Hooks::run( 'DifferenceEngineShowDiffPage', [ $out ] ); + + if ( !$this->loadRevisionData() ) { + if ( Hooks::run( 'DifferenceEngineShowDiffPageMaybeShowMissingRevision', [ $this ] ) ) { + $this->showMissingRevision(); + } + return; + } + + $user = $this->getUser(); + $permErrors = $this->mNewPage->getUserPermissionsErrors( 'read', $user ); + if ( $this->mOldPage ) { # mOldPage might not be set, see below. + $permErrors = wfMergeErrorArrays( $permErrors, + $this->mOldPage->getUserPermissionsErrors( 'read', $user ) ); + } + if ( count( $permErrors ) ) { + throw new PermissionsError( 'read', $permErrors ); + } + + $rollback = ''; + + $query = []; + # Carry over 'diffonly' param via navigation links + if ( $diffOnly != $user->getBoolOption( 'diffonly' ) ) { + $query['diffonly'] = $diffOnly; + } + # Cascade unhide param in links for easy deletion browsing + if ( $this->unhide ) { + $query['unhide'] = 1; + } + + # Check if one of the revisions is deleted/suppressed + $deleted = $suppressed = false; + $allowed = $this->mNewRev->userCan( Revision::DELETED_TEXT, $user ); + + $revisionTools = []; + + # mOldRev is false if the difference engine is called with a "vague" query for + # a diff between a version V and its previous version V' AND the version V + # is the first version of that article. In that case, V' does not exist. + if ( $this->mOldRev === false ) { + $out->setPageTitle( $this->msg( 'difference-title', $this->mNewPage->getPrefixedText() ) ); + $samePage = true; + $oldHeader = ''; + // Allow extensions to change the $oldHeader variable + Hooks::run( 'DifferenceEngineOldHeaderNoOldRev', [ &$oldHeader ] ); + } else { + Hooks::run( 'DiffViewHeader', [ $this, $this->mOldRev, $this->mNewRev ] ); + + if ( $this->mNewPage->equals( $this->mOldPage ) ) { + $out->setPageTitle( $this->msg( 'difference-title', $this->mNewPage->getPrefixedText() ) ); + $samePage = true; + } else { + $out->setPageTitle( $this->msg( 'difference-title-multipage', + $this->mOldPage->getPrefixedText(), $this->mNewPage->getPrefixedText() ) ); + $out->addSubtitle( $this->msg( 'difference-multipage' ) ); + $samePage = false; + } + + if ( $samePage && $this->mNewPage->quickUserCan( 'edit', $user ) ) { + if ( $this->mNewRev->isCurrent() && $this->mNewPage->userCan( 'rollback', $user ) ) { + $rollbackLink = Linker::generateRollback( $this->mNewRev, $this->getContext() ); + if ( $rollbackLink ) { + $out->preventClickjacking(); + $rollback = '   ' . $rollbackLink; + } + } + + if ( !$this->mOldRev->isDeleted( Revision::DELETED_TEXT ) && + !$this->mNewRev->isDeleted( Revision::DELETED_TEXT ) + ) { + $undoLink = Html::element( 'a', [ + 'href' => $this->mNewPage->getLocalURL( [ + 'action' => 'edit', + 'undoafter' => $this->mOldid, + 'undo' => $this->mNewid + ] ), + 'title' => Linker::titleAttrib( 'undo' ), + ], + $this->msg( 'editundo' )->text() + ); + $revisionTools['mw-diff-undo'] = $undoLink; + } + } + + # Make "previous revision link" + if ( $samePage && $this->mOldRev->getPrevious() ) { + $prevlink = Linker::linkKnown( + $this->mOldPage, + $this->msg( 'previousdiff' )->escaped(), + [ 'id' => 'differences-prevlink' ], + [ 'diff' => 'prev', 'oldid' => $this->mOldid ] + $query + ); + } else { + $prevlink = ' '; + } + + if ( $this->mOldRev->isMinor() ) { + $oldminor = ChangesList::flag( 'minor' ); + } else { + $oldminor = ''; + } + + $ldel = $this->revisionDeleteLink( $this->mOldRev ); + $oldRevisionHeader = $this->getRevisionHeader( $this->mOldRev, 'complete' ); + $oldChangeTags = ChangeTags::formatSummaryRow( $this->mOldTags, 'diff', $this->getContext() ); + + $oldHeader = '<div id="mw-diff-otitle1"><strong>' . $oldRevisionHeader . '</strong></div>' . + '<div id="mw-diff-otitle2">' . + Linker::revUserTools( $this->mOldRev, !$this->unhide ) . '</div>' . + '<div id="mw-diff-otitle3">' . $oldminor . + Linker::revComment( $this->mOldRev, !$diffOnly, !$this->unhide ) . $ldel . '</div>' . + '<div id="mw-diff-otitle5">' . $oldChangeTags[0] . '</div>' . + '<div id="mw-diff-otitle4">' . $prevlink . '</div>'; + + // Allow extensions to change the $oldHeader variable + Hooks::run( 'DifferenceEngineOldHeader', [ $this, &$oldHeader, $prevlink, $oldminor, + $diffOnly, $ldel, $this->unhide ] ); + + if ( $this->mOldRev->isDeleted( Revision::DELETED_TEXT ) ) { + $deleted = true; // old revisions text is hidden + if ( $this->mOldRev->isDeleted( Revision::DELETED_RESTRICTED ) ) { + $suppressed = true; // also suppressed + } + } + + # Check if this user can see the revisions + if ( !$this->mOldRev->userCan( Revision::DELETED_TEXT, $user ) ) { + $allowed = false; + } + } + + $out->addJsConfigVars( [ + 'wgDiffOldId' => $this->mOldid, + 'wgDiffNewId' => $this->mNewid, + ] ); + + # Make "next revision link" + # Skip next link on the top revision + if ( $samePage && !$this->mNewRev->isCurrent() ) { + $nextlink = Linker::linkKnown( + $this->mNewPage, + $this->msg( 'nextdiff' )->escaped(), + [ 'id' => 'differences-nextlink' ], + [ 'diff' => 'next', 'oldid' => $this->mNewid ] + $query + ); + } else { + $nextlink = ' '; + } + + if ( $this->mNewRev->isMinor() ) { + $newminor = ChangesList::flag( 'minor' ); + } else { + $newminor = ''; + } + + # Handle RevisionDelete links... + $rdel = $this->revisionDeleteLink( $this->mNewRev ); + + # Allow extensions to define their own revision tools + Hooks::run( 'DiffRevisionTools', + [ $this->mNewRev, &$revisionTools, $this->mOldRev, $user ] ); + $formattedRevisionTools = []; + // Put each one in parentheses (poor man's button) + foreach ( $revisionTools as $key => $tool ) { + $toolClass = is_string( $key ) ? $key : 'mw-diff-tool'; + $element = Html::rawElement( + 'span', + [ 'class' => $toolClass ], + $this->msg( 'parentheses' )->rawParams( $tool )->escaped() + ); + $formattedRevisionTools[] = $element; + } + $newRevisionHeader = $this->getRevisionHeader( $this->mNewRev, 'complete' ) . + ' ' . implode( ' ', $formattedRevisionTools ); + $newChangeTags = ChangeTags::formatSummaryRow( $this->mNewTags, 'diff', $this->getContext() ); + + $newHeader = '<div id="mw-diff-ntitle1"><strong>' . $newRevisionHeader . '</strong></div>' . + '<div id="mw-diff-ntitle2">' . Linker::revUserTools( $this->mNewRev, !$this->unhide ) . + " $rollback</div>" . + '<div id="mw-diff-ntitle3">' . $newminor . + Linker::revComment( $this->mNewRev, !$diffOnly, !$this->unhide ) . $rdel . '</div>' . + '<div id="mw-diff-ntitle5">' . $newChangeTags[0] . '</div>' . + '<div id="mw-diff-ntitle4">' . $nextlink . $this->markPatrolledLink() . '</div>'; + + // Allow extensions to change the $newHeader variable + Hooks::run( 'DifferenceEngineNewHeader', [ $this, &$newHeader, $formattedRevisionTools, + $nextlink, $rollback, $newminor, $diffOnly, $rdel, $this->unhide ] ); + + if ( $this->mNewRev->isDeleted( Revision::DELETED_TEXT ) ) { + $deleted = true; // new revisions text is hidden + if ( $this->mNewRev->isDeleted( Revision::DELETED_RESTRICTED ) ) { + $suppressed = true; // also suppressed + } + } + + # If the diff cannot be shown due to a deleted revision, then output + # the diff header and links to unhide (if available)... + if ( $deleted && ( !$this->unhide || !$allowed ) ) { + $this->showDiffStyle(); + $multi = $this->getMultiNotice(); + $out->addHTML( $this->addHeader( '', $oldHeader, $newHeader, $multi ) ); + if ( !$allowed ) { + $msg = $suppressed ? 'rev-suppressed-no-diff' : 'rev-deleted-no-diff'; + # Give explanation for why revision is not visible + $out->wrapWikiMsg( "<div id='mw-$msg' class='mw-warning plainlinks'>\n$1\n</div>\n", + [ $msg ] ); + } else { + # Give explanation and add a link to view the diff... + $query = $this->getRequest()->appendQueryValue( 'unhide', '1' ); + $link = $this->getTitle()->getFullURL( $query ); + $msg = $suppressed ? 'rev-suppressed-unhide-diff' : 'rev-deleted-unhide-diff'; + $out->wrapWikiMsg( + "<div id='mw-$msg' class='mw-warning plainlinks'>\n$1\n</div>\n", + [ $msg, $link ] + ); + } + # Otherwise, output a regular diff... + } else { + # Add deletion notice if the user is viewing deleted content + $notice = ''; + if ( $deleted ) { + $msg = $suppressed ? 'rev-suppressed-diff-view' : 'rev-deleted-diff-view'; + $notice = "<div id='mw-$msg' class='mw-warning plainlinks'>\n" . + $this->msg( $msg )->parse() . + "</div>\n"; + } + $this->showDiff( $oldHeader, $newHeader, $notice ); + if ( !$diffOnly ) { + $this->renderNewRevision(); + } + } + } + + /** + * Build a link to mark a change as patrolled. + * + * Returns empty string if there's either no revision to patrol or the user is not allowed to. + * Side effect: When the patrol link is build, this method will call + * OutputPage::preventClickjacking() and load mediawiki.page.patrol.ajax. + * + * @return string HTML or empty string + */ + public function markPatrolledLink() { + if ( $this->mMarkPatrolledLink === null ) { + $linkInfo = $this->getMarkPatrolledLinkInfo(); + // If false, there is no patrol link needed/allowed + if ( !$linkInfo ) { + $this->mMarkPatrolledLink = ''; + } else { + $this->mMarkPatrolledLink = ' <span class="patrollink" data-mw="interface">[' . + Linker::linkKnown( + $this->mNewPage, + $this->msg( 'markaspatrolleddiff' )->escaped(), + [], + [ + 'action' => 'markpatrolled', + 'rcid' => $linkInfo['rcid'], + ] + ) . ']</span>'; + // Allow extensions to change the markpatrolled link + Hooks::run( 'DifferenceEngineMarkPatrolledLink', [ $this, + &$this->mMarkPatrolledLink, $linkInfo['rcid'] ] ); + } + } + return $this->mMarkPatrolledLink; + } + + /** + * Returns an array of meta data needed to build a "mark as patrolled" link and + * adds the mediawiki.page.patrol.ajax to the output. + * + * @return array|false An array of meta data for a patrol link (rcid only) + * or false if no link is needed + */ + protected function getMarkPatrolledLinkInfo() { + global $wgUseRCPatrol, $wgEnableAPI, $wgEnableWriteAPI; + + $user = $this->getUser(); + + // Prepare a change patrol link, if applicable + if ( + // Is patrolling enabled and the user allowed to? + $wgUseRCPatrol && $this->mNewPage->quickUserCan( 'patrol', $user ) && + // Only do this if the revision isn't more than 6 hours older + // than the Max RC age (6h because the RC might not be cleaned out regularly) + RecentChange::isInRCLifespan( $this->mNewRev->getTimestamp(), 21600 ) + ) { + // Look for an unpatrolled change corresponding to this diff + $db = wfGetDB( DB_REPLICA ); + $change = RecentChange::newFromConds( + [ + 'rc_timestamp' => $db->timestamp( $this->mNewRev->getTimestamp() ), + 'rc_this_oldid' => $this->mNewid, + 'rc_patrolled' => RecentChange::PRC_UNPATROLLED + ], + __METHOD__ + ); + + if ( $change && !$change->getPerformer()->equals( $user ) ) { + $rcid = $change->getAttribute( 'rc_id' ); + } else { + // None found or the page has been created by the current user. + // If the user could patrol this it already would be patrolled + $rcid = 0; + } + + // Allow extensions to possibly change the rcid here + // For example the rcid might be set to zero due to the user + // being the same as the performer of the change but an extension + // might still want to show it under certain conditions + Hooks::run( 'DifferenceEngineMarkPatrolledRCID', [ &$rcid, $this, $change, $user ] ); + + // Build the link + if ( $rcid ) { + $this->getOutput()->preventClickjacking(); + if ( $wgEnableAPI && $wgEnableWriteAPI + && $user->isAllowed( 'writeapi' ) + ) { + $this->getOutput()->addModules( 'mediawiki.page.patrol.ajax' ); + } + + return [ + 'rcid' => $rcid, + ]; + } + } + + // No mark as patrolled link applicable + return false; + } + + /** + * @param Revision $rev + * + * @return string + */ + protected function revisionDeleteLink( $rev ) { + $link = Linker::getRevDeleteLink( $this->getUser(), $rev, $rev->getTitle() ); + if ( $link !== '' ) { + $link = '   ' . $link . ' '; + } + + return $link; + } + + /** + * Show the new revision of the page. + */ + public function renderNewRevision() { + $out = $this->getOutput(); + $revHeader = $this->getRevisionHeader( $this->mNewRev ); + # Add "current version as of X" title + $out->addHTML( "<hr class='diff-hr' id='mw-oldid' /> + <h2 class='diff-currentversion-title'>{$revHeader}</h2>\n" ); + # Page content may be handled by a hooked call instead... + if ( Hooks::run( 'ArticleContentOnDiff', [ $this, $out ] ) ) { + $this->loadNewText(); + $out->setRevisionId( $this->mNewid ); + $out->setRevisionTimestamp( $this->mNewRev->getTimestamp() ); + $out->setArticleFlag( true ); + + if ( !Hooks::run( 'ArticleContentViewCustom', + [ $this->mNewContent, $this->mNewPage, $out ] ) + ) { + // Handled by extension + } else { + // Normal page + if ( $this->getTitle()->equals( $this->mNewPage ) ) { + // If the Title stored in the context is the same as the one + // of the new revision, we can use its associated WikiPage + // object. + $wikiPage = $this->getWikiPage(); + } else { + // Otherwise we need to create our own WikiPage object + $wikiPage = WikiPage::factory( $this->mNewPage ); + } + + $parserOutput = $this->getParserOutput( $wikiPage, $this->mNewRev ); + + # WikiPage::getParserOutput() should not return false, but just in case + if ( $parserOutput ) { + // Allow extensions to change parser output here + if ( Hooks::run( 'DifferenceEngineRenderRevisionAddParserOutput', + [ $this, $out, $parserOutput, $wikiPage ] ) + ) { + $out->addParserOutput( $parserOutput, [ + 'enableSectionEditLinks' => $this->mNewRev->isCurrent() + && $this->mNewRev->getTitle()->quickUserCan( 'edit', $this->getUser() ), + ] ); + } + } + } + } + + // Allow extensions to optionally not show the final patrolled link + if ( Hooks::run( 'DifferenceEngineRenderRevisionShowFinalPatrolLink' ) ) { + # Add redundant patrol link on bottom... + $out->addHTML( $this->markPatrolledLink() ); + } + } + + /** + * @param WikiPage $page + * @param Revision $rev + * + * @return ParserOutput|bool False if the revision was not found + */ + protected function getParserOutput( WikiPage $page, Revision $rev ) { + $parserOptions = $page->makeParserOptions( $this->getContext() ); + $parserOutput = $page->getParserOutput( $parserOptions, $rev->getId() ); + + return $parserOutput; + } + + /** + * Get the diff text, send it to the OutputPage object + * Returns false if the diff could not be generated, otherwise returns true + * + * @param string|bool $otitle Header for old text or false + * @param string|bool $ntitle Header for new text or false + * @param string $notice HTML between diff header and body + * + * @return bool + */ + public function showDiff( $otitle, $ntitle, $notice = '' ) { + // Allow extensions to affect the output here + Hooks::run( 'DifferenceEngineShowDiff', [ $this ] ); + + $diff = $this->getDiff( $otitle, $ntitle, $notice ); + if ( $diff === false ) { + $this->showMissingRevision(); + + return false; + } else { + $this->showDiffStyle(); + $this->getOutput()->addHTML( $diff ); + + return true; + } + } + + /** + * Add style sheets for diff display. + */ + public function showDiffStyle() { + $this->getOutput()->addModuleStyles( 'mediawiki.diff.styles' ); + } + + /** + * Get complete diff table, including header + * + * @param string|bool $otitle Header for old text or false + * @param string|bool $ntitle Header for new text or false + * @param string $notice HTML between diff header and body + * + * @return mixed + */ + public function getDiff( $otitle, $ntitle, $notice = '' ) { + $body = $this->getDiffBody(); + if ( $body === false ) { + return false; + } + + $multi = $this->getMultiNotice(); + // Display a message when the diff is empty + if ( $body === '' ) { + $notice .= '<div class="mw-diff-empty">' . + $this->msg( 'diff-empty' )->parse() . + "</div>\n"; + } + + return $this->addHeader( $body, $otitle, $ntitle, $multi, $notice ); + } + + /** + * Get the diff table body, without header + * + * @return mixed (string/false) + */ + public function getDiffBody() { + $this->mCacheHit = true; + // Check if the diff should be hidden from this user + if ( !$this->loadRevisionData() ) { + return false; + } elseif ( $this->mOldRev && + !$this->mOldRev->userCan( Revision::DELETED_TEXT, $this->getUser() ) + ) { + return false; + } elseif ( $this->mNewRev && + !$this->mNewRev->userCan( Revision::DELETED_TEXT, $this->getUser() ) + ) { + return false; + } + // Short-circuit + if ( $this->mOldRev === false || ( $this->mOldRev && $this->mNewRev + && $this->mOldRev->getId() == $this->mNewRev->getId() ) + ) { + if ( Hooks::run( 'DifferenceEngineShowEmptyOldContent', [ $this ] ) ) { + return ''; + } + } + // Cacheable? + $key = false; + $cache = ObjectCache::getMainWANInstance(); + if ( $this->mOldid && $this->mNewid ) { + // Check if subclass is still using the old way + // for backwards-compatibility + $key = $this->getDiffBodyCacheKey(); + if ( $key === null ) { + $key = call_user_func_array( + [ $cache, 'makeKey' ], + $this->getDiffBodyCacheKeyParams() + ); + } + + // Try cache + if ( !$this->mRefreshCache ) { + $difftext = $cache->get( $key ); + if ( $difftext ) { + wfIncrStats( 'diff_cache.hit' ); + $difftext = $this->localiseDiff( $difftext ); + $difftext .= "\n<!-- diff cache key $key -->\n"; + + return $difftext; + } + } // don't try to load but save the result + } + $this->mCacheHit = false; + + // Loadtext is permission safe, this just clears out the diff + if ( !$this->loadText() ) { + return false; + } + + $difftext = $this->generateContentDiffBody( $this->mOldContent, $this->mNewContent ); + + // Avoid PHP 7.1 warning from passing $this by reference + $diffEngine = $this; + + // Save to cache for 7 days + if ( !Hooks::run( 'AbortDiffCache', [ &$diffEngine ] ) ) { + wfIncrStats( 'diff_cache.uncacheable' ); + } elseif ( $key !== false && $difftext !== false ) { + wfIncrStats( 'diff_cache.miss' ); + $cache->set( $key, $difftext, 7 * 86400 ); + } else { + wfIncrStats( 'diff_cache.uncacheable' ); + } + // localise line numbers and title attribute text + if ( $difftext !== false ) { + $difftext = $this->localiseDiff( $difftext ); + } + + return $difftext; + } + + /** + * Returns the cache key for diff body text or content. + * + * @deprecated since 1.31, use getDiffBodyCacheKeyParams() instead + * @since 1.23 + * + * @throws MWException + * @return string|null + */ + protected function getDiffBodyCacheKey() { + return null; + } + + /** + * Get the cache key parameters + * + * Subclasses can replace the first element in the array to something + * more specific to the type of diff (e.g. "inline-diff"), or append + * if the cache should vary on more things. Overriding entirely should + * be avoided. + * + * @since 1.31 + * + * @return array + * @throws MWException + */ + protected function getDiffBodyCacheKeyParams() { + if ( !$this->mOldid || !$this->mNewid ) { + throw new MWException( 'mOldid and mNewid must be set to get diff cache key.' ); + } + + $engine = $this->getEngine(); + $params = [ + 'diff', + $engine, + self::DIFF_VERSION, + "old-{$this->mOldid}", + "rev-{$this->mNewid}" + ]; + + if ( $engine === 'wikidiff2' ) { + $params[] = phpversion( 'wikidiff2' ); + $params[] = $this->getConfig()->get( 'WikiDiff2MovedParagraphDetectionCutoff' ); + } + + return $params; + } + + /** + * Generate a diff, no caching. + * + * This implementation uses generateTextDiffBody() to generate a diff based on the default + * serialization of the given Content objects. This will fail if $old or $new are not + * instances of TextContent. + * + * Subclasses may override this to provide a different rendering for the diff, + * perhaps taking advantage of the content's native form. This is required for all content + * models that are not text based. + * + * @since 1.21 + * + * @param Content $old Old content + * @param Content $new New content + * + * @throws MWException If old or new content is not an instance of TextContent. + * @return bool|string + */ + public function generateContentDiffBody( Content $old, Content $new ) { + if ( !( $old instanceof TextContent ) ) { + throw new MWException( "Diff not implemented for " . get_class( $old ) . "; " . + "override generateContentDiffBody to fix this." ); + } + + if ( !( $new instanceof TextContent ) ) { + throw new MWException( "Diff not implemented for " . get_class( $new ) . "; " + . "override generateContentDiffBody to fix this." ); + } + + $otext = $old->serialize(); + $ntext = $new->serialize(); + + return $this->generateTextDiffBody( $otext, $ntext ); + } + + /** + * Generate a diff, no caching + * + * @todo move this to TextDifferenceEngine, make DifferenceEngine abstract. At some point. + * + * @param string $otext Old text, must be already segmented + * @param string $ntext New text, must be already segmented + * + * @return bool|string + */ + public function generateTextDiffBody( $otext, $ntext ) { + $diff = function () use ( $otext, $ntext ) { + $time = microtime( true ); + + $result = $this->textDiff( $otext, $ntext ); + + $time = intval( ( microtime( true ) - $time ) * 1000 ); + MediaWikiServices::getInstance()->getStatsdDataFactory()->timing( 'diff_time', $time ); + // Log requests slower than 99th percentile + if ( $time > 100 && $this->mOldPage && $this->mNewPage ) { + wfDebugLog( 'diff', + "$time ms diff: {$this->mOldid} -> {$this->mNewid} {$this->mNewPage}" ); + } + + return $result; + }; + + /** + * @param Status $status + * @throws FatalError + */ + $error = function ( $status ) { + throw new FatalError( $status->getWikiText() ); + }; + + // Use PoolCounter if the diff looks like it can be expensive + if ( strlen( $otext ) + strlen( $ntext ) > 20000 ) { + $work = new PoolCounterWorkViaCallback( 'diff', + md5( $otext ) . md5( $ntext ), + [ 'doWork' => $diff, 'error' => $error ] + ); + return $work->execute(); + } + + return $diff(); + } + + /** + * Process $wgExternalDiffEngine and get a sane, usable engine + * + * @return bool|string 'wikidiff2', path to an executable, or false + */ + private function getEngine() { + global $wgExternalDiffEngine; + // We use the global here instead of Config because we write to the value, + // and Config is not mutable. + if ( $wgExternalDiffEngine == 'wikidiff' || $wgExternalDiffEngine == 'wikidiff3' ) { + wfDeprecated( "\$wgExternalDiffEngine = '{$wgExternalDiffEngine}'", '1.27' ); + $wgExternalDiffEngine = false; + } elseif ( $wgExternalDiffEngine == 'wikidiff2' ) { + // Same as above, but with no deprecation warnings + $wgExternalDiffEngine = false; + } elseif ( !is_string( $wgExternalDiffEngine ) && $wgExternalDiffEngine !== false ) { + // And prevent people from shooting themselves in the foot... + wfWarn( '$wgExternalDiffEngine is set to a non-string value, forcing it to false' ); + $wgExternalDiffEngine = false; + } + + if ( is_string( $wgExternalDiffEngine ) && is_executable( $wgExternalDiffEngine ) ) { + return $wgExternalDiffEngine; + } elseif ( $wgExternalDiffEngine === false && function_exists( 'wikidiff2_do_diff' ) ) { + return 'wikidiff2'; + } else { + // Native PHP + return false; + } + } + + /** + * Generates diff, to be wrapped internally in a logging/instrumentation + * + * @param string $otext Old text, must be already segmented + * @param string $ntext New text, must be already segmented + * @return bool|string + */ + protected function textDiff( $otext, $ntext ) { + global $wgContLang; + + $otext = str_replace( "\r\n", "\n", $otext ); + $ntext = str_replace( "\r\n", "\n", $ntext ); + + $engine = $this->getEngine(); + + // Better external diff engine, the 2 may some day be dropped + // This one does the escaping and segmenting itself + if ( $engine === 'wikidiff2' ) { + $wikidiff2Version = phpversion( 'wikidiff2' ); + if ( + $wikidiff2Version !== false && + version_compare( $wikidiff2Version, '1.5.0', '>=' ) + ) { + $text = wikidiff2_do_diff( + $otext, + $ntext, + 2, + $this->getConfig()->get( 'WikiDiff2MovedParagraphDetectionCutoff' ) + ); + } else { + // Don't pass the 4th parameter for compatibility with older versions of wikidiff2 + $text = wikidiff2_do_diff( + $otext, + $ntext, + 2 + ); + + // Log a warning in case the configuration value is set to not silently ignore it + if ( $this->getConfig()->get( 'WikiDiff2MovedParagraphDetectionCutoff' ) > 0 ) { + wfLogWarning( '$wgWikiDiff2MovedParagraphDetectionCutoff is set but has no + effect since the used version of WikiDiff2 does not support it.' ); + } + } + + $text .= $this->debug( 'wikidiff2' ); + + return $text; + } elseif ( $engine !== false ) { + # Diff via the shell + $tmpDir = wfTempDir(); + $tempName1 = tempnam( $tmpDir, 'diff_' ); + $tempName2 = tempnam( $tmpDir, 'diff_' ); + + $tempFile1 = fopen( $tempName1, "w" ); + if ( !$tempFile1 ) { + return false; + } + $tempFile2 = fopen( $tempName2, "w" ); + if ( !$tempFile2 ) { + return false; + } + fwrite( $tempFile1, $otext ); + fwrite( $tempFile2, $ntext ); + fclose( $tempFile1 ); + fclose( $tempFile2 ); + $cmd = [ $engine, $tempName1, $tempName2 ]; + $result = Shell::command( $cmd ) + ->execute(); + $exitCode = $result->getExitCode(); + if ( $exitCode !== 0 ) { + throw new Exception( "External diff command returned code {$exitCode}. Stderr: " + . wfEscapeWikiText( $result->getStderr() ) + ); + } + $difftext = $result->getStdout(); + $difftext .= $this->debug( "external $engine" ); + unlink( $tempName1 ); + unlink( $tempName2 ); + + return $difftext; + } + + # Native PHP diff + $ota = explode( "\n", $wgContLang->segmentForDiff( $otext ) ); + $nta = explode( "\n", $wgContLang->segmentForDiff( $ntext ) ); + $diffs = new Diff( $ota, $nta ); + $formatter = new TableDiffFormatter(); + $difftext = $wgContLang->unsegmentForDiff( $formatter->format( $diffs ) ); + $difftext .= $this->debug( 'native PHP' ); + + return $difftext; + } + + /** + * Generate a debug comment indicating diff generating time, + * server node, and generator backend. + * + * @param string $generator : What diff engine was used + * + * @return string + */ + protected function debug( $generator = "internal" ) { + global $wgShowHostnames; + if ( !$this->enableDebugComment ) { + return ''; + } + $data = [ $generator ]; + if ( $wgShowHostnames ) { + $data[] = wfHostname(); + } + $data[] = wfTimestamp( TS_DB ); + + return "<!-- diff generator: " . + implode( " ", array_map( "htmlspecialchars", $data ) ) . + " -->\n"; + } + + /** + * Localise diff output + * + * @param string $text + * @return string + */ + private function localiseDiff( $text ) { + $text = $this->localiseLineNumbers( $text ); + if ( $this->getEngine() === 'wikidiff2' && + version_compare( phpversion( 'wikidiff2' ), '1.5.1', '>=' ) + ) { + $text = $this->addLocalisedTitleTooltips( $text ); + } + return $text; + } + + /** + * Replace line numbers with the text in the user's language + * + * @param string $text + * + * @return mixed + */ + public function localiseLineNumbers( $text ) { + return preg_replace_callback( + '/<!--LINE (\d+)-->/', + [ $this, 'localiseLineNumbersCb' ], + $text + ); + } + + public function localiseLineNumbersCb( $matches ) { + if ( $matches[1] === '1' && $this->mReducedLineNumbers ) { + return ''; + } + + return $this->msg( 'lineno' )->numParams( $matches[1] )->escaped(); + } + + /** + * Add title attributes for tooltips on moved paragraph indicators + * + * @param string $text + * @return string + */ + private function addLocalisedTitleTooltips( $text ) { + return preg_replace_callback( + '/class="mw-diff-movedpara-(left|right)"/', + [ $this, 'addLocalisedTitleTooltipsCb' ], + $text + ); + } + + /** + * @param array $matches + * @return string + */ + private function addLocalisedTitleTooltipsCb( array $matches ) { + $key = $matches[1] === 'right' ? + 'diff-paragraph-moved-toold' : + 'diff-paragraph-moved-tonew'; + return $matches[0] . ' title="' . $this->msg( $key )->escaped() . '"'; + } + + /** + * If there are revisions between the ones being compared, return a note saying so. + * + * @return string + */ + public function getMultiNotice() { + if ( !is_object( $this->mOldRev ) || !is_object( $this->mNewRev ) ) { + return ''; + } elseif ( !$this->mOldPage->equals( $this->mNewPage ) ) { + // Comparing two different pages? Count would be meaningless. + return ''; + } + + if ( $this->mOldRev->getTimestamp() > $this->mNewRev->getTimestamp() ) { + $oldRev = $this->mNewRev; // flip + $newRev = $this->mOldRev; // flip + } else { // normal case + $oldRev = $this->mOldRev; + $newRev = $this->mNewRev; + } + + // Sanity: don't show the notice if too many rows must be scanned + // @todo show some special message for that case + $nEdits = $this->mNewPage->countRevisionsBetween( $oldRev, $newRev, 1000 ); + if ( $nEdits > 0 && $nEdits <= 1000 ) { + $limit = 100; // use diff-multi-manyusers if too many users + $users = $this->mNewPage->getAuthorsBetween( $oldRev, $newRev, $limit ); + $numUsers = count( $users ); + + if ( $numUsers == 1 && $users[0] == $newRev->getUserText( Revision::RAW ) ) { + $numUsers = 0; // special case to say "by the same user" instead of "by one other user" + } + + return self::intermediateEditsMsg( $nEdits, $numUsers, $limit ); + } + + return ''; // nothing + } + + /** + * Get a notice about how many intermediate edits and users there are + * + * @param int $numEdits + * @param int $numUsers + * @param int $limit + * + * @return string + */ + public static function intermediateEditsMsg( $numEdits, $numUsers, $limit ) { + if ( $numUsers === 0 ) { + $msg = 'diff-multi-sameuser'; + } elseif ( $numUsers > $limit ) { + $msg = 'diff-multi-manyusers'; + $numUsers = $limit; + } else { + $msg = 'diff-multi-otherusers'; + } + + return wfMessage( $msg )->numParams( $numEdits, $numUsers )->parse(); + } + + /** + * Get a header for a specified revision. + * + * @param Revision $rev + * @param string $complete 'complete' to get the header wrapped depending + * the visibility of the revision and a link to edit the page. + * + * @return string HTML fragment + */ + public function getRevisionHeader( Revision $rev, $complete = '' ) { + $lang = $this->getLanguage(); + $user = $this->getUser(); + $revtimestamp = $rev->getTimestamp(); + $timestamp = $lang->userTimeAndDate( $revtimestamp, $user ); + $dateofrev = $lang->userDate( $revtimestamp, $user ); + $timeofrev = $lang->userTime( $revtimestamp, $user ); + + $header = $this->msg( + $rev->isCurrent() ? 'currentrev-asof' : 'revisionasof', + $timestamp, + $dateofrev, + $timeofrev + )->escaped(); + + if ( $complete !== 'complete' ) { + return $header; + } + + $title = $rev->getTitle(); + + $header = Linker::linkKnown( $title, $header, [], + [ 'oldid' => $rev->getId() ] ); + + if ( $rev->userCan( Revision::DELETED_TEXT, $user ) ) { + $editQuery = [ 'action' => 'edit' ]; + if ( !$rev->isCurrent() ) { + $editQuery['oldid'] = $rev->getId(); + } + + $key = $title->quickUserCan( 'edit', $user ) ? 'editold' : 'viewsourceold'; + $msg = $this->msg( $key )->escaped(); + $editLink = $this->msg( 'parentheses' )->rawParams( + Linker::linkKnown( $title, $msg, [], $editQuery ) )->escaped(); + $header .= ' ' . Html::rawElement( + 'span', + [ 'class' => 'mw-diff-edit' ], + $editLink + ); + if ( $rev->isDeleted( Revision::DELETED_TEXT ) ) { + $header = Html::rawElement( + 'span', + [ 'class' => 'history-deleted' ], + $header + ); + } + } else { + $header = Html::rawElement( 'span', [ 'class' => 'history-deleted' ], $header ); + } + + return $header; + } + + /** + * Add the header to a diff body + * + * @param string $diff Diff body + * @param string $otitle Old revision header + * @param string $ntitle New revision header + * @param string $multi Notice telling user that there are intermediate + * revisions between the ones being compared + * @param string $notice Other notices, e.g. that user is viewing deleted content + * + * @return string + */ + public function addHeader( $diff, $otitle, $ntitle, $multi = '', $notice = '' ) { + // shared.css sets diff in interface language/dir, but the actual content + // is often in a different language, mostly the page content language/dir + $header = Html::openElement( 'table', [ + 'class' => [ 'diff', 'diff-contentalign-' . $this->getDiffLang()->alignStart() ], + 'data-mw' => 'interface', + ] ); + $userLang = htmlspecialchars( $this->getLanguage()->getHtmlCode() ); + + if ( !$diff && !$otitle ) { + $header .= " + <tr class=\"diff-title\" lang=\"{$userLang}\"> + <td class=\"diff-ntitle\">{$ntitle}</td> + </tr>"; + $multiColspan = 1; + } else { + if ( $diff ) { // Safari/Chrome show broken output if cols not used + $header .= " + <col class=\"diff-marker\" /> + <col class=\"diff-content\" /> + <col class=\"diff-marker\" /> + <col class=\"diff-content\" />"; + $colspan = 2; + $multiColspan = 4; + } else { + $colspan = 1; + $multiColspan = 2; + } + if ( $otitle || $ntitle ) { + $header .= " + <tr class=\"diff-title\" lang=\"{$userLang}\"> + <td colspan=\"$colspan\" class=\"diff-otitle\">{$otitle}</td> + <td colspan=\"$colspan\" class=\"diff-ntitle\">{$ntitle}</td> + </tr>"; + } + } + + if ( $multi != '' ) { + $header .= "<tr><td colspan=\"{$multiColspan}\" " . + "class=\"diff-multi\" lang=\"{$userLang}\">{$multi}</td></tr>"; + } + if ( $notice != '' ) { + $header .= "<tr><td colspan=\"{$multiColspan}\" " . + "class=\"diff-notice\" lang=\"{$userLang}\">{$notice}</td></tr>"; + } + + return $header . $diff . "</table>"; + } + + /** + * Use specified text instead of loading from the database + * @param Content $oldContent + * @param Content $newContent + * @since 1.21 + */ + public function setContent( Content $oldContent, Content $newContent ) { + $this->mOldContent = $oldContent; + $this->mNewContent = $newContent; + + $this->mTextLoaded = 2; + $this->mRevisionsLoaded = true; + } + + /** + * Set the language in which the diff text is written + * (Defaults to page content language). + * @param Language|string $lang + * @since 1.19 + */ + public function setTextLanguage( $lang ) { + $this->mDiffLang = wfGetLangObj( $lang ); + } + + /** + * Maps a revision pair definition as accepted by DifferenceEngine constructor + * to a pair of actual integers representing revision ids. + * + * @param int $old Revision id, e.g. from URL parameter 'oldid' + * @param int|string $new Revision id or strings 'next' or 'prev', e.g. from URL parameter 'diff' + * + * @return int[] List of two revision ids, older first, later second. + * Zero signifies invalid argument passed. + * false signifies that there is no previous/next revision ($old is the oldest/newest one). + */ + public function mapDiffPrevNext( $old, $new ) { + if ( $new === 'prev' ) { + // Show diff between revision $old and the previous one. Get previous one from DB. + $newid = intval( $old ); + $oldid = $this->getTitle()->getPreviousRevisionID( $newid ); + } elseif ( $new === 'next' ) { + // Show diff between revision $old and the next one. Get next one from DB. + $oldid = intval( $old ); + $newid = $this->getTitle()->getNextRevisionID( $oldid ); + } else { + $oldid = intval( $old ); + $newid = intval( $new ); + } + + return [ $oldid, $newid ]; + } + + /** + * Load revision IDs + */ + private function loadRevisionIds() { + if ( $this->mRevisionsIdsLoaded ) { + return; + } + + $this->mRevisionsIdsLoaded = true; + + $old = $this->mOldid; + $new = $this->mNewid; + + list( $this->mOldid, $this->mNewid ) = self::mapDiffPrevNext( $old, $new ); + if ( $new === 'next' && $this->mNewid === false ) { + # if no result, NewId points to the newest old revision. The only newer + # revision is cur, which is "0". + $this->mNewid = 0; + } + + Hooks::run( + 'NewDifferenceEngine', + [ $this->getTitle(), &$this->mOldid, &$this->mNewid, $old, $new ] + ); + } + + /** + * Load revision metadata for the specified articles. If newid is 0, then compare + * the old article in oldid to the current article; if oldid is 0, then + * compare the current article to the immediately previous one (ignoring the + * value of newid). + * + * If oldid is false, leave the corresponding revision object set + * to false. This is impossible via ordinary user input, and is provided for + * API convenience. + * + * @return bool + */ + public function loadRevisionData() { + if ( $this->mRevisionsLoaded ) { + return true; + } + + // Whether it succeeds or fails, we don't want to try again + $this->mRevisionsLoaded = true; + + $this->loadRevisionIds(); + + // Load the new revision object + if ( $this->mNewid ) { + $this->mNewRev = Revision::newFromId( $this->mNewid ); + } else { + $this->mNewRev = Revision::newFromTitle( + $this->getTitle(), + false, + Revision::READ_NORMAL + ); + } + + if ( !$this->mNewRev instanceof Revision ) { + return false; + } + + // Update the new revision ID in case it was 0 (makes life easier doing UI stuff) + $this->mNewid = $this->mNewRev->getId(); + $this->mNewPage = $this->mNewRev->getTitle(); + + // Load the old revision object + $this->mOldRev = false; + if ( $this->mOldid ) { + $this->mOldRev = Revision::newFromId( $this->mOldid ); + } elseif ( $this->mOldid === 0 ) { + $rev = $this->mNewRev->getPrevious(); + if ( $rev ) { + $this->mOldid = $rev->getId(); + $this->mOldRev = $rev; + } else { + // No previous revision; mark to show as first-version only. + $this->mOldid = false; + $this->mOldRev = false; + } + } /* elseif ( $this->mOldid === false ) leave mOldRev false; */ + + if ( is_null( $this->mOldRev ) ) { + return false; + } + + if ( $this->mOldRev ) { + $this->mOldPage = $this->mOldRev->getTitle(); + } + + // Load tags information for both revisions + $dbr = wfGetDB( DB_REPLICA ); + if ( $this->mOldid !== false ) { + $this->mOldTags = $dbr->selectField( + 'tag_summary', + 'ts_tags', + [ 'ts_rev_id' => $this->mOldid ], + __METHOD__ + ); + } else { + $this->mOldTags = false; + } + $this->mNewTags = $dbr->selectField( + 'tag_summary', + 'ts_tags', + [ 'ts_rev_id' => $this->mNewid ], + __METHOD__ + ); + + return true; + } + + /** + * Load the text of the revisions, as well as revision data. + * + * @return bool + */ + public function loadText() { + if ( $this->mTextLoaded == 2 ) { + return true; + } + + // Whether it succeeds or fails, we don't want to try again + $this->mTextLoaded = 2; + + if ( !$this->loadRevisionData() ) { + return false; + } + + if ( $this->mOldRev ) { + $this->mOldContent = $this->mOldRev->getContent( Revision::FOR_THIS_USER, $this->getUser() ); + if ( $this->mOldContent === null ) { + return false; + } + } + + if ( $this->mNewRev ) { + $this->mNewContent = $this->mNewRev->getContent( Revision::FOR_THIS_USER, $this->getUser() ); + Hooks::run( 'DifferenceEngineLoadTextAfterNewContentIsLoaded', [ $this ] ); + if ( $this->mNewContent === null ) { + return false; + } + } + + return true; + } + + /** + * Load the text of the new revision, not the old one + * + * @return bool + */ + public function loadNewText() { + if ( $this->mTextLoaded >= 1 ) { + return true; + } + + $this->mTextLoaded = 1; + + if ( !$this->loadRevisionData() ) { + return false; + } + + $this->mNewContent = $this->mNewRev->getContent( Revision::FOR_THIS_USER, $this->getUser() ); + + Hooks::run( 'DifferenceEngineAfterLoadNewText', [ $this ] ); + + return true; + } + +} diff --git a/www/wiki/includes/diff/TableDiffFormatter.php b/www/wiki/includes/diff/TableDiffFormatter.php new file mode 100644 index 00000000..67f9a79b --- /dev/null +++ b/www/wiki/includes/diff/TableDiffFormatter.php @@ -0,0 +1,215 @@ +<?php +/** + * Portions taken from phpwiki-1.3.3. + * + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ + +/** + * MediaWiki default table style diff formatter + * @todo document + * @private + * @ingroup DifferenceEngine + */ +class TableDiffFormatter extends DiffFormatter { + + function __construct() { + $this->leadingContextLines = 2; + $this->trailingContextLines = 2; + } + + /** + * @param string $msg + * + * @return mixed + */ + public static function escapeWhiteSpace( $msg ) { + $msg = preg_replace( '/^ /m', '  ', $msg ); + $msg = preg_replace( '/ $/m', '  ', $msg ); + $msg = preg_replace( '/ /', '  ', $msg ); + + return $msg; + } + + /** + * @param int $xbeg + * @param int $xlen + * @param int $ybeg + * @param int $ylen + * + * @return string + */ + protected function blockHeader( $xbeg, $xlen, $ybeg, $ylen ) { + // '<!--LINE \d+ -->' get replaced by a localised line number + // in DifferenceEngine::localiseLineNumbers + $r = '<tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l' . + $xbeg . + '" ><!--LINE ' . + $xbeg . + "--></td>\n" . + '<td colspan="2" class="diff-lineno"><!--LINE ' . + $ybeg . + "--></td></tr>\n"; + + return $r; + } + + /** + * Writes the header to the output buffer. + * + * @param string $header + */ + protected function startBlock( $header ) { + $this->writeOutput( $header ); + } + + protected function endBlock() { + } + + /** + * @param string[] $lines + * @param string $prefix + * @param string $color + */ + protected function lines( $lines, $prefix = ' ', $color = 'white' ) { + } + + /** + * HTML-escape parameter before calling this + * + * @param string $line + * + * @return string + */ + protected function addedLine( $line ) { + return $this->wrapLine( '+', 'diff-addedline', $line ); + } + + /** + * HTML-escape parameter before calling this + * + * @param string $line + * + * @return string + */ + protected function deletedLine( $line ) { + return $this->wrapLine( '−', 'diff-deletedline', $line ); + } + + /** + * HTML-escape parameter before calling this + * + * @param string $line + * + * @return string + */ + protected function contextLine( $line ) { + return $this->wrapLine( ' ', 'diff-context', $line ); + } + + /** + * @param string $marker + * @param string $class Unused + * @param string $line + * + * @return string + */ + protected function wrapLine( $marker, $class, $line ) { + if ( $line !== '' ) { + // The <div> wrapper is needed for 'overflow: auto' style to scroll properly + $line = Xml::tags( 'div', null, $this->escapeWhiteSpace( $line ) ); + } + + return "<td class='diff-marker'>$marker</td><td class='$class'>$line</td>"; + } + + /** + * @return string + */ + protected function emptyLine() { + return '<td colspan="2"> </td>'; + } + + /** + * Writes all lines to the output buffer, each enclosed in <tr>. + * + * @param string[] $lines + */ + protected function added( $lines ) { + foreach ( $lines as $line ) { + $this->writeOutput( '<tr>' . $this->emptyLine() . + $this->addedLine( '<ins class="diffchange">' . + htmlspecialchars( $line ) . '</ins>' ) . "</tr>\n" ); + } + } + + /** + * Writes all lines to the output buffer, each enclosed in <tr>. + * + * @param string[] $lines + */ + protected function deleted( $lines ) { + foreach ( $lines as $line ) { + $this->writeOutput( '<tr>' . $this->deletedLine( '<del class="diffchange">' . + htmlspecialchars( $line ) . '</del>' ) . + $this->emptyLine() . "</tr>\n" ); + } + } + + /** + * Writes all lines to the output buffer, each enclosed in <tr>. + * + * @param string[] $lines + */ + protected function context( $lines ) { + foreach ( $lines as $line ) { + $this->writeOutput( '<tr>' . + $this->contextLine( htmlspecialchars( $line ) ) . + $this->contextLine( htmlspecialchars( $line ) ) . "</tr>\n" ); + } + } + + /** + * Writes the two sets of lines to the output buffer, each enclosed in <tr>. + * + * @param string[] $orig + * @param string[] $closing + */ + protected function changed( $orig, $closing ) { + $diff = new WordLevelDiff( $orig, $closing ); + $del = $diff->orig(); + $add = $diff->closing(); + + # Notice that WordLevelDiff returns HTML-escaped output. + # Hence, we will be calling addedLine/deletedLine without HTML-escaping. + + $ndel = count( $del ); + $nadd = count( $add ); + $n = max( $ndel, $nadd ); + for ( $i = 0; $i < $n; $i++ ) { + $delLine = $i < $ndel ? $this->deletedLine( $del[$i] ) : $this->emptyLine(); + $addLine = $i < $nadd ? $this->addedLine( $add[$i] ) : $this->emptyLine(); + $this->writeOutput( "<tr>{$delLine}{$addLine}</tr>\n" ); + } + } + +} diff --git a/www/wiki/includes/diff/UnifiedDiffFormatter.php b/www/wiki/includes/diff/UnifiedDiffFormatter.php new file mode 100644 index 00000000..72f1a660 --- /dev/null +++ b/www/wiki/includes/diff/UnifiedDiffFormatter.php @@ -0,0 +1,84 @@ +<?php +/** + * Portions taken from phpwiki-1.3.3. + * + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + */ + +/** + * A formatter that outputs unified diffs + * @ingroup DifferenceEngine + */ +class UnifiedDiffFormatter extends DiffFormatter { + + /** @var int */ + protected $leadingContextLines = 2; + + /** @var int */ + protected $trailingContextLines = 2; + + /** + * @param string[] $lines + * @param string $prefix + */ + protected function lines( $lines, $prefix = ' ' ) { + foreach ( $lines as $line ) { + $this->writeOutput( "{$prefix}{$line}\n" ); + } + } + + /** + * @param string[] $lines + */ + protected function added( $lines ) { + $this->lines( $lines, '+' ); + } + + /** + * @param string[] $lines + */ + protected function deleted( $lines ) { + $this->lines( $lines, '-' ); + } + + /** + * @param string[] $orig + * @param string[] $closing + */ + protected function changed( $orig, $closing ) { + $this->deleted( $orig ); + $this->added( $closing ); + } + + /** + * @param int $xbeg + * @param int $xlen + * @param int $ybeg + * @param int $ylen + * + * @return string + */ + protected function blockHeader( $xbeg, $xlen, $ybeg, $ylen ) { + return "@@ -$xbeg,$xlen +$ybeg,$ylen @@"; + } + +} diff --git a/www/wiki/includes/diff/WordAccumulator.php b/www/wiki/includes/diff/WordAccumulator.php new file mode 100644 index 00000000..ad802756 --- /dev/null +++ b/www/wiki/includes/diff/WordAccumulator.php @@ -0,0 +1,105 @@ +<?php +/** + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + * @defgroup DifferenceEngine DifferenceEngine + */ + +namespace MediaWiki\Diff; + +/** + * Stores, escapes and formats the results of word-level diff + * + * @private + * @ingroup DifferenceEngine + */ +class WordAccumulator { + public $insClass = ' class="diffchange diffchange-inline"'; + public $delClass = ' class="diffchange diffchange-inline"'; + + private $lines = []; + private $line = ''; + private $group = ''; + private $tag = ''; + + /** + * @param string $new_tag + */ + private function flushGroup( $new_tag ) { + if ( $this->group !== '' ) { + if ( $this->tag == 'ins' ) { + $this->line .= "<ins{$this->insClass}>" . htmlspecialchars( $this->group ) . '</ins>'; + } elseif ( $this->tag == 'del' ) { + $this->line .= "<del{$this->delClass}>" . htmlspecialchars( $this->group ) . '</del>'; + } else { + $this->line .= htmlspecialchars( $this->group ); + } + } + $this->group = ''; + $this->tag = $new_tag; + } + + /** + * @param string $new_tag + */ + private function flushLine( $new_tag ) { + $this->flushGroup( $new_tag ); + if ( $this->line != '' ) { + array_push( $this->lines, $this->line ); + } else { + # make empty lines visible by inserting an NBSP + array_push( $this->lines, ' ' ); + } + $this->line = ''; + } + + /** + * @param string[] $words + * @param string $tag + */ + public function addWords( $words, $tag = '' ) { + if ( $tag != $this->tag ) { + $this->flushGroup( $tag ); + } + + foreach ( $words as $word ) { + // new-line should only come as first char of word. + if ( $word == '' ) { + continue; + } + if ( $word[0] == "\n" ) { + $this->flushLine( $tag ); + $word = substr( $word, 1 ); + } + assert( !strstr( $word, "\n" ) ); + $this->group .= $word; + } + } + + /** + * @return string[] + */ + public function getLines() { + $this->flushLine( '~done' ); + + return $this->lines; + } +} diff --git a/www/wiki/includes/diff/WordLevelDiff.php b/www/wiki/includes/diff/WordLevelDiff.php new file mode 100644 index 00000000..0b318bdb --- /dev/null +++ b/www/wiki/includes/diff/WordLevelDiff.php @@ -0,0 +1,139 @@ +<?php +/** + * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> + * You may copy this code freely under the conditions of the GPL. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup DifferenceEngine + * @defgroup DifferenceEngine DifferenceEngine + */ + +use MediaWiki\Diff\ComplexityException; +use MediaWiki\Diff\WordAccumulator; + +/** + * Performs a word-level diff on several lines + * + * @ingroup DifferenceEngine + */ +class WordLevelDiff extends \Diff { + /** + * @inheritDoc + */ + protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed + + /** + * @param string[] $linesBefore + * @param string[] $linesAfter + */ + public function __construct( $linesBefore, $linesAfter ) { + list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore ); + list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter ); + + try { + parent::__construct( $wordsBeforeStripped, $wordsAfterStripped ); + } catch ( ComplexityException $ex ) { + // Too hard to diff, just show whole paragraph(s) as changed + $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ]; + } + + $xi = $yi = 0; + $editCount = count( $this->edits ); + for ( $i = 0; $i < $editCount; $i++ ) { + $orig = &$this->edits[$i]->orig; + if ( is_array( $orig ) ) { + $orig = array_slice( $wordsBefore, $xi, count( $orig ) ); + $xi += count( $orig ); + } + + $closing = &$this->edits[$i]->closing; + if ( is_array( $closing ) ) { + $closing = array_slice( $wordsAfter, $yi, count( $closing ) ); + $yi += count( $closing ); + } + } + } + + /** + * @param string[] $lines + * + * @return array[] + */ + private function split( $lines ) { + $words = []; + $stripped = []; + $first = true; + foreach ( $lines as $line ) { + if ( $first ) { + $first = false; + } else { + $words[] = "\n"; + $stripped[] = "\n"; + } + $m = []; + if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', + $line, $m ) ) { + foreach ( $m[0] as $word ) { + $words[] = $word; + } + foreach ( $m[1] as $stripped_word ) { + $stripped[] = $stripped_word; + } + } + } + + return [ $words, $stripped ]; + } + + /** + * @return string[] + */ + public function orig() { + $orig = new WordAccumulator; + + foreach ( $this->edits as $edit ) { + if ( $edit->type == 'copy' ) { + $orig->addWords( $edit->orig ); + } elseif ( $edit->orig ) { + $orig->addWords( $edit->orig, 'del' ); + } + } + $lines = $orig->getLines(); + + return $lines; + } + + /** + * @return string[] + */ + public function closing() { + $closing = new WordAccumulator; + + foreach ( $this->edits as $edit ) { + if ( $edit->type == 'copy' ) { + $closing->addWords( $edit->closing ); + } elseif ( $edit->closing ) { + $closing->addWords( $edit->closing, 'ins' ); + } + } + $lines = $closing->getLines(); + + return $lines; + } + +} |