summaryrefslogtreecommitdiff
path: root/www/wiki/includes/utils
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/includes/utils
first commit
Diffstat (limited to 'www/wiki/includes/utils')
-rw-r--r--www/wiki/includes/utils/AutoloadGenerator.php508
-rw-r--r--www/wiki/includes/utils/AvroValidator.php181
-rw-r--r--www/wiki/includes/utils/BatchRowIterator.php296
-rw-r--r--www/wiki/includes/utils/BatchRowUpdate.php128
-rw-r--r--www/wiki/includes/utils/BatchRowWriter.php75
-rw-r--r--www/wiki/includes/utils/ExecutableFinder.php115
-rw-r--r--www/wiki/includes/utils/FileContentsHasher.php114
-rw-r--r--www/wiki/includes/utils/MWCryptHKDF.php103
-rw-r--r--www/wiki/includes/utils/MWCryptRand.php79
-rw-r--r--www/wiki/includes/utils/MWFileProps.php145
-rw-r--r--www/wiki/includes/utils/MWRestrictions.php147
-rw-r--r--www/wiki/includes/utils/README9
-rw-r--r--www/wiki/includes/utils/RowUpdateGenerator.php39
-rw-r--r--www/wiki/includes/utils/UIDGenerator.php629
-rw-r--r--www/wiki/includes/utils/ZipDirectoryReader.php717
-rw-r--r--www/wiki/includes/utils/ZipDirectoryReaderError.php38
16 files changed, 3323 insertions, 0 deletions
diff --git a/www/wiki/includes/utils/AutoloadGenerator.php b/www/wiki/includes/utils/AutoloadGenerator.php
new file mode 100644
index 00000000..0e2ef85d
--- /dev/null
+++ b/www/wiki/includes/utils/AutoloadGenerator.php
@@ -0,0 +1,508 @@
+<?php
+
+/**
+ * Accepts a list of files and directories to search for
+ * php files and generates $wgAutoloadLocalClasses or $wgAutoloadClasses
+ * lines for all detected classes. These lines are written out
+ * to an autoload.php file in the projects provided basedir.
+ *
+ * Usage:
+ *
+ * $gen = new AutoloadGenerator( __DIR__ );
+ * $gen->readDir( __DIR__ . '/includes' );
+ * $gen->readFile( __DIR__ . '/foo.php' )
+ * $gen->getAutoload();
+ */
+class AutoloadGenerator {
+ const FILETYPE_JSON = 'json';
+ const FILETYPE_PHP = 'php';
+
+ /**
+ * @var string Root path of the project being scanned for classes
+ */
+ protected $basepath;
+
+ /**
+ * @var ClassCollector Helper class extracts class names from php files
+ */
+ protected $collector;
+
+ /**
+ * @var array Map of file shortpath to list of FQCN detected within file
+ */
+ protected $classes = [];
+
+ /**
+ * @var string The global variable to write output to
+ */
+ protected $variableName = 'wgAutoloadClasses';
+
+ /**
+ * @var array Map of FQCN to relative path(from self::$basepath)
+ */
+ protected $overrides = [];
+
+ /**
+ * Directories that should be excluded
+ *
+ * @var string[]
+ */
+ protected $excludePaths = [];
+
+ /**
+ * @param string $basepath Root path of the project being scanned for classes
+ * @param array|string $flags
+ *
+ * local - If this flag is set $wgAutoloadLocalClasses will be build instead
+ * of $wgAutoloadClasses
+ */
+ public function __construct( $basepath, $flags = [] ) {
+ if ( !is_array( $flags ) ) {
+ $flags = [ $flags ];
+ }
+ $this->basepath = self::normalizePathSeparator( realpath( $basepath ) );
+ $this->collector = new ClassCollector;
+ if ( in_array( 'local', $flags ) ) {
+ $this->variableName = 'wgAutoloadLocalClasses';
+ }
+ }
+
+ /**
+ * Directories that should be excluded
+ *
+ * @since 1.31
+ * @param string[] $paths
+ */
+ public function setExcludePaths( array $paths ) {
+ foreach ( $paths as $path ) {
+ $this->excludePaths[] = self::normalizePathSeparator( $path );
+ }
+ }
+
+ /**
+ * Whether the file should be excluded
+ *
+ * @param string $path File path
+ * @return bool
+ */
+ private function shouldExclude( $path ) {
+ foreach ( $this->excludePaths as $dir ) {
+ if ( strpos( $path, $dir ) === 0 ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Force a class to be autoloaded from a specific path, regardless of where
+ * or if it was detected.
+ *
+ * @param string $fqcn FQCN to force the location of
+ * @param string $inputPath Full path to the file containing the class
+ * @throws Exception
+ */
+ public function forceClassPath( $fqcn, $inputPath ) {
+ $path = self::normalizePathSeparator( realpath( $inputPath ) );
+ if ( !$path ) {
+ throw new \Exception( "Invalid path: $inputPath" );
+ }
+ $len = strlen( $this->basepath );
+ if ( substr( $path, 0, $len ) !== $this->basepath ) {
+ throw new \Exception( "Path is not within basepath: $inputPath" );
+ }
+ $shortpath = substr( $path, $len );
+ $this->overrides[$fqcn] = $shortpath;
+ }
+
+ /**
+ * @param string $inputPath Path to a php file to find classes within
+ * @throws Exception
+ */
+ public function readFile( $inputPath ) {
+ // NOTE: do NOT expand $inputPath using realpath(). It is perfectly
+ // reasonable for LocalSettings.php and similiar files to be symlinks
+ // to files that are outside of $this->basepath.
+ $inputPath = self::normalizePathSeparator( $inputPath );
+ $len = strlen( $this->basepath );
+ if ( substr( $inputPath, 0, $len ) !== $this->basepath ) {
+ throw new \Exception( "Path is not within basepath: $inputPath" );
+ }
+ if ( $this->shouldExclude( $inputPath ) ) {
+ return;
+ }
+ $result = $this->collector->getClasses(
+ file_get_contents( $inputPath )
+ );
+ if ( $result ) {
+ $shortpath = substr( $inputPath, $len );
+ $this->classes[$shortpath] = $result;
+ }
+ }
+
+ /**
+ * @param string $dir Path to a directory to recursively search
+ * for php files with either .php or .inc extensions
+ */
+ public function readDir( $dir ) {
+ $it = new RecursiveDirectoryIterator(
+ self::normalizePathSeparator( realpath( $dir ) ) );
+ $it = new RecursiveIteratorIterator( $it );
+
+ foreach ( $it as $path => $file ) {
+ $ext = pathinfo( $path, PATHINFO_EXTENSION );
+ // some older files in mw use .inc
+ if ( $ext === 'php' || $ext === 'inc' ) {
+ $this->readFile( $path );
+ }
+ }
+ }
+
+ /**
+ * Updates the AutoloadClasses field at the given
+ * filename.
+ *
+ * @param string $filename Filename of JSON
+ * extension/skin registration file
+ * @return string Updated Json of the file given as the $filename parameter
+ */
+ protected function generateJsonAutoload( $filename ) {
+ $key = 'AutoloadClasses';
+ $json = FormatJson::decode( file_get_contents( $filename ), true );
+ unset( $json[$key] );
+ // Inverting the key-value pairs so that they become of the
+ // format class-name : path when they get converted into json.
+ foreach ( $this->classes as $path => $contained ) {
+ foreach ( $contained as $fqcn ) {
+ // Using substr to remove the leading '/'
+ $json[$key][$fqcn] = substr( $path, 1 );
+ }
+ }
+ foreach ( $this->overrides as $path => $fqcn ) {
+ // Using substr to remove the leading '/'
+ $json[$key][$fqcn] = substr( $path, 1 );
+ }
+
+ // Sorting the list of autoload classes.
+ ksort( $json[$key] );
+
+ // Return the whole JSON file
+ return FormatJson::encode( $json, "\t", FormatJson::ALL_OK ) . "\n";
+ }
+
+ /**
+ * Generates a PHP file setting up autoload information.
+ *
+ * @param string $commandName Command name to include in comment
+ * @param string $filename of PHP file to put autoload information in.
+ * @return string
+ */
+ protected function generatePHPAutoload( $commandName, $filename ) {
+ // No existing JSON file found; update/generate PHP file
+ $content = [];
+
+ // We need to generate a line each rather than exporting the
+ // full array so __DIR__ can be prepended to all the paths
+ $format = "%s => __DIR__ . %s,";
+ foreach ( $this->classes as $path => $contained ) {
+ $exportedPath = var_export( $path, true );
+ foreach ( $contained as $fqcn ) {
+ $content[$fqcn] = sprintf(
+ $format,
+ var_export( $fqcn, true ),
+ $exportedPath
+ );
+ }
+ }
+
+ foreach ( $this->overrides as $fqcn => $path ) {
+ $content[$fqcn] = sprintf(
+ $format,
+ var_export( $fqcn, true ),
+ var_export( $path, true )
+ );
+ }
+
+ // sort for stable output
+ ksort( $content );
+
+ // extensions using this generator are appending to the existing
+ // autoload.
+ if ( $this->variableName === 'wgAutoloadClasses' ) {
+ $op = '+=';
+ } else {
+ $op = '=';
+ }
+
+ $output = implode( "\n\t", $content );
+ return <<<EOD
+<?php
+// This file is generated by $commandName, do not adjust manually
+// phpcs:disable Generic.Files.LineLength
+global \${$this->variableName};
+
+\${$this->variableName} {$op} [
+ {$output}
+];
+
+EOD;
+ }
+
+ /**
+ * Returns all known classes as a string, which can be used to put into a target
+ * file (e.g. extension.json, skin.json or autoload.php)
+ *
+ * @param string $commandName Value used in file comment to direct
+ * developers towards the appropriate way to update the autoload.
+ * @return string
+ */
+ public function getAutoload( $commandName = 'AutoloadGenerator' ) {
+ // We need to check whether an extenson.json or skin.json exists or not, and
+ // incase it doesn't, update the autoload.php file.
+
+ $fileinfo = $this->getTargetFileinfo();
+
+ if ( $fileinfo['type'] === self::FILETYPE_JSON ) {
+ return $this->generateJsonAutoload( $fileinfo['filename'] );
+ } else {
+ return $this->generatePHPAutoload( $commandName, $fileinfo['filename'] );
+ }
+ }
+
+ /**
+ * Returns the filename of the extension.json of skin.json, if there's any, or
+ * otherwise the path to the autoload.php file in an array as the "filename"
+ * key and with the type (AutoloadGenerator::FILETYPE_JSON or AutoloadGenerator::FILETYPE_PHP)
+ * of the file as the "type" key.
+ *
+ * @return array
+ */
+ public function getTargetFileinfo() {
+ $fileinfo = [
+ 'filename' => $this->basepath . '/autoload.php',
+ 'type' => self::FILETYPE_PHP
+ ];
+ if ( file_exists( $this->basepath . '/extension.json' ) ) {
+ $fileinfo = [
+ 'filename' => $this->basepath . '/extension.json',
+ 'type' => self::FILETYPE_JSON
+ ];
+ } elseif ( file_exists( $this->basepath . '/skin.json' ) ) {
+ $fileinfo = [
+ 'filename' => $this->basepath . '/skin.json',
+ 'type' => self::FILETYPE_JSON
+ ];
+ }
+
+ return $fileinfo;
+ }
+
+ /**
+ * Ensure that Unix-style path separators ("/") are used in the path.
+ *
+ * @param string $path
+ * @return string
+ */
+ protected static function normalizePathSeparator( $path ) {
+ return str_replace( '\\', '/', $path );
+ }
+
+ /**
+ * Initialize the source files and directories which are used for the MediaWiki default
+ * autoloader in {mw-base-dir}/autoload.php including:
+ * * includes/
+ * * languages/
+ * * maintenance/
+ * * mw-config/
+ * * /*.php
+ */
+ public function initMediaWikiDefault() {
+ foreach ( [ 'includes', 'languages', 'maintenance', 'mw-config' ] as $dir ) {
+ $this->readDir( $this->basepath . '/' . $dir );
+ }
+ foreach ( glob( $this->basepath . '/*.php' ) as $file ) {
+ $this->readFile( $file );
+ }
+ }
+}
+
+/**
+ * Reads PHP code and returns the FQCN of every class defined within it.
+ */
+class ClassCollector {
+
+ /**
+ * @var string Current namespace
+ */
+ protected $namespace = '';
+
+ /**
+ * @var array List of FQCN detected in this pass
+ */
+ protected $classes;
+
+ /**
+ * @var array Token from token_get_all() that started an expect sequence
+ */
+ protected $startToken;
+
+ /**
+ * @var array List of tokens that are members of the current expect sequence
+ */
+ protected $tokens;
+
+ /**
+ * @var array Class alias with target/name fields
+ */
+ protected $alias;
+
+ /**
+ * @param string $code PHP code (including <?php) to detect class names from
+ * @return array List of FQCN detected within the tokens
+ */
+ public function getClasses( $code ) {
+ $this->namespace = '';
+ $this->classes = [];
+ $this->startToken = null;
+ $this->alias = null;
+ $this->tokens = [];
+
+ foreach ( token_get_all( $code ) as $token ) {
+ if ( $this->startToken === null ) {
+ $this->tryBeginExpect( $token );
+ } else {
+ $this->tryEndExpect( $token );
+ }
+ }
+
+ return $this->classes;
+ }
+
+ /**
+ * Determine if $token begins the next expect sequence.
+ *
+ * @param array $token
+ */
+ protected function tryBeginExpect( $token ) {
+ if ( is_string( $token ) ) {
+ return;
+ }
+ // Note: When changing class name discovery logic,
+ // AutoLoaderTest.php may also need to be updated.
+ switch ( $token[0] ) {
+ case T_NAMESPACE:
+ case T_CLASS:
+ case T_INTERFACE:
+ case T_TRAIT:
+ case T_DOUBLE_COLON:
+ $this->startToken = $token;
+ break;
+ case T_STRING:
+ if ( $token[1] === 'class_alias' ) {
+ $this->startToken = $token;
+ $this->alias = [];
+ }
+ }
+ }
+
+ /**
+ * Accepts the next token in an expect sequence
+ *
+ * @param array $token
+ */
+ protected function tryEndExpect( $token ) {
+ switch ( $this->startToken[0] ) {
+ case T_DOUBLE_COLON:
+ // Skip over T_CLASS after T_DOUBLE_COLON because this is something like
+ // "self::static" which accesses the class name. It doens't define a new class.
+ $this->startToken = null;
+ break;
+ case T_NAMESPACE:
+ if ( $token === ';' || $token === '{' ) {
+ $this->namespace = $this->implodeTokens() . '\\';
+ } else {
+ $this->tokens[] = $token;
+ }
+ break;
+
+ case T_STRING:
+ if ( $this->alias !== null ) {
+ // Flow 1 - Two string literals:
+ // - T_STRING class_alias
+ // - '('
+ // - T_CONSTANT_ENCAPSED_STRING 'TargetClass'
+ // - ','
+ // - T_WHITESPACE
+ // - T_CONSTANT_ENCAPSED_STRING 'AliasName'
+ // - ')'
+ // Flow 2 - Use of ::class syntax for first parameter
+ // - T_STRING class_alias
+ // - '('
+ // - T_STRING TargetClass
+ // - T_DOUBLE_COLON ::
+ // - T_CLASS class
+ // - ','
+ // - T_WHITESPACE
+ // - T_CONSTANT_ENCAPSED_STRING 'AliasName'
+ // - ')'
+ if ( $token === '(' ) {
+ // Start of a function call to class_alias()
+ $this->alias = [ 'target' => false, 'name' => false ];
+ } elseif ( $token === ',' ) {
+ // Record that we're past the first parameter
+ if ( $this->alias['target'] === false ) {
+ $this->alias['target'] = true;
+ }
+ } elseif ( is_array( $token ) && $token[0] === T_CONSTANT_ENCAPSED_STRING ) {
+ if ( $this->alias['target'] === true ) {
+ // We already saw a first argument, this must be the second.
+ // Strip quotes from the string literal.
+ $this->alias['name'] = substr( $token[1], 1, -1 );
+ }
+ } elseif ( $token === ')' ) {
+ // End of function call
+ $this->classes[] = $this->alias['name'];
+ $this->alias = null;
+ $this->startToken = null;
+ } elseif ( !is_array( $token ) || (
+ $token[0] !== T_STRING &&
+ $token[0] !== T_DOUBLE_COLON &&
+ $token[0] !== T_CLASS &&
+ $token[0] !== T_WHITESPACE
+ ) ) {
+ // Ignore this call to class_alias() - compat/Timestamp.php
+ $this->alias = null;
+ $this->startToken = null;
+ }
+ }
+ break;
+
+ case T_CLASS:
+ case T_INTERFACE:
+ case T_TRAIT:
+ $this->tokens[] = $token;
+ if ( is_array( $token ) && $token[0] === T_STRING ) {
+ $this->classes[] = $this->namespace . $this->implodeTokens();
+ }
+ }
+ }
+
+ /**
+ * Returns the string representation of the tokens within the
+ * current expect sequence and resets the sequence.
+ *
+ * @return string
+ */
+ protected function implodeTokens() {
+ $content = [];
+ foreach ( $this->tokens as $token ) {
+ $content[] = is_string( $token ) ? $token : $token[1];
+ }
+
+ $this->tokens = [];
+ $this->startToken = null;
+
+ return trim( implode( '', $content ), " \n\t" );
+ }
+}
diff --git a/www/wiki/includes/utils/AvroValidator.php b/www/wiki/includes/utils/AvroValidator.php
new file mode 100644
index 00000000..153b3135
--- /dev/null
+++ b/www/wiki/includes/utils/AvroValidator.php
@@ -0,0 +1,181 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Generate error strings for data that doesn't match the specified
+ * Avro schema. This is very similar to AvroSchema::is_valid_datum(),
+ * but returns error messages instead of a boolean.
+ *
+ * @since 1.26
+ * @author Erik Bernhardson <ebernhardson@wikimedia.org>
+ * @copyright © 2015 Erik Bernhardson and Wikimedia Foundation.
+ */
+class AvroValidator {
+ /**
+ * @param AvroSchema $schema The rules to conform to.
+ * @param mixed $datum The value to validate against $schema.
+ * @return string|string[] An error or list of errors in the
+ * provided $datum. When no errors exist the empty array is
+ * returned.
+ */
+ public static function getErrors( AvroSchema $schema, $datum ) {
+ switch ( $schema->type ) {
+ case AvroSchema::NULL_TYPE:
+ if ( !is_null( $datum ) ) {
+ return self::wrongType( 'null', $datum );
+ }
+ return [];
+ case AvroSchema::BOOLEAN_TYPE:
+ if ( !is_bool( $datum ) ) {
+ return self::wrongType( 'boolean', $datum );
+ }
+ return [];
+ case AvroSchema::STRING_TYPE:
+ case AvroSchema::BYTES_TYPE:
+ if ( !is_string( $datum ) ) {
+ return self::wrongType( 'string', $datum );
+ }
+ return [];
+ case AvroSchema::INT_TYPE:
+ if ( !is_int( $datum ) ) {
+ return self::wrongType( 'integer', $datum );
+ }
+ if ( AvroSchema::INT_MIN_VALUE > $datum
+ || $datum > AvroSchema::INT_MAX_VALUE
+ ) {
+ return self::outOfRange(
+ AvroSchema::INT_MIN_VALUE,
+ AvroSchema::INT_MAX_VALUE,
+ $datum
+ );
+ }
+ return [];
+ case AvroSchema::LONG_TYPE:
+ if ( !is_int( $datum ) ) {
+ return self::wrongType( 'integer', $datum );
+ }
+ if ( AvroSchema::LONG_MIN_VALUE > $datum
+ || $datum > AvroSchema::LONG_MAX_VALUE
+ ) {
+ return self::outOfRange(
+ AvroSchema::LONG_MIN_VALUE,
+ AvroSchema::LONG_MAX_VALUE,
+ $datum
+ );
+ }
+ return [];
+ case AvroSchema::FLOAT_TYPE:
+ case AvroSchema::DOUBLE_TYPE:
+ if ( !is_float( $datum ) && !is_int( $datum ) ) {
+ return self::wrongType( 'float or integer', $datum );
+ }
+ return [];
+ case AvroSchema::ARRAY_SCHEMA:
+ if ( !is_array( $datum ) ) {
+ return self::wrongType( 'array', $datum );
+ }
+ $errors = [];
+ foreach ( $datum as $d ) {
+ $result = self::getErrors( $schema->items(), $d );
+ if ( $result ) {
+ $errors[] = $result;
+ }
+ }
+ return $errors;
+ case AvroSchema::MAP_SCHEMA:
+ if ( !is_array( $datum ) ) {
+ return self::wrongType( 'array', $datum );
+ }
+ $errors = [];
+ foreach ( $datum as $k => $v ) {
+ if ( !is_string( $k ) ) {
+ $errors[] = self::wrongType( 'string key', $k );
+ }
+ $result = self::getErrors( $schema->values(), $v );
+ if ( $result ) {
+ $errors[$k] = $result;
+ }
+ }
+ return $errors;
+ case AvroSchema::UNION_SCHEMA:
+ $errors = [];
+ foreach ( $schema->schemas() as $schema ) {
+ $result = self::getErrors( $schema, $datum );
+ if ( !$result ) {
+ return [];
+ }
+ $errors[] = $result;
+ }
+ if ( $errors ) {
+ return [ "Expected any one of these to be true", $errors ];
+ }
+ return "No schemas provided to union";
+ case AvroSchema::ENUM_SCHEMA:
+ if ( !in_array( $datum, $schema->symbols() ) ) {
+ $symbols = implode( ', ', $schema->symbols );
+ return "Expected one of $symbols but recieved $datum";
+ }
+ return [];
+ case AvroSchema::FIXED_SCHEMA:
+ if ( !is_string( $datum ) ) {
+ return self::wrongType( 'string', $datum );
+ }
+ $len = strlen( $datum );
+ if ( $len !== $schema->size() ) {
+ return "Expected string of length {$schema->size()}, "
+ . "but recieved one of length $len";
+ }
+ return [];
+ case AvroSchema::RECORD_SCHEMA:
+ case AvroSchema::ERROR_SCHEMA:
+ case AvroSchema::REQUEST_SCHEMA:
+ if ( !is_array( $datum ) ) {
+ return self::wrongType( 'array', $datum );
+ }
+ $errors = [];
+ foreach ( $schema->fields() as $field ) {
+ $name = $field->name();
+ if ( !array_key_exists( $name, $datum ) ) {
+ $errors[$name] = 'Missing expected field';
+ continue;
+ }
+ $result = self::getErrors( $field->type(), $datum[$name] );
+ if ( $result ) {
+ $errors[$name] = $result;
+ }
+ }
+ return $errors;
+ default:
+ return "Unknown avro schema type: {$schema->type}";
+ }
+ }
+
+ public static function typeOf( $datum ) {
+ return is_object( $datum ) ? get_class( $datum ) : gettype( $datum );
+ }
+
+ public static function wrongType( $expected, $datum ) {
+ return "Expected $expected, but recieved " . self::typeOf( $datum );
+ }
+
+ public static function outOfRange( $min, $max, $datum ) {
+ return "Expected value between $min and $max, but recieved $datum";
+ }
+}
diff --git a/www/wiki/includes/utils/BatchRowIterator.php b/www/wiki/includes/utils/BatchRowIterator.php
new file mode 100644
index 00000000..60720c87
--- /dev/null
+++ b/www/wiki/includes/utils/BatchRowIterator.php
@@ -0,0 +1,296 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Allows iterating a large number of rows in batches transparently.
+ * By default when iterated over returns the full query result as an
+ * array of rows. Can be wrapped in RecursiveIteratorIterator to
+ * collapse those arrays into a single stream of rows queried in batches.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+class BatchRowIterator implements RecursiveIterator {
+
+ /**
+ * @var IDatabase $db The database to read from
+ */
+ protected $db;
+
+ /**
+ * @var string|array $table The name or names of the table to read from
+ */
+ protected $table;
+
+ /**
+ * @var array $primaryKey The name of the primary key(s)
+ */
+ protected $primaryKey;
+
+ /**
+ * @var int $batchSize The number of rows to fetch per iteration
+ */
+ protected $batchSize;
+
+ /**
+ * @var array $conditions Array of strings containing SQL conditions
+ * to add to the query
+ */
+ protected $conditions = [];
+
+ /**
+ * @var array $joinConditions
+ */
+ protected $joinConditions = [];
+
+ /**
+ * @var array $fetchColumns List of column names to select from the
+ * table suitable for use with IDatabase::select()
+ */
+ protected $fetchColumns;
+
+ /**
+ * @var string $orderBy SQL Order by condition generated from $this->primaryKey
+ */
+ protected $orderBy;
+
+ /**
+ * @var array $current The current iterator value
+ */
+ private $current = [];
+
+ /**
+ * @var int key 0-indexed number of pages fetched since self::reset()
+ */
+ private $key;
+
+ /**
+ * @var array Additional query options
+ */
+ protected $options = [];
+
+ /**
+ * @param IDatabase $db The database to read from
+ * @param string|array $table The name or names of the table to read from
+ * @param string|array $primaryKey The name or names of the primary key columns
+ * @param int $batchSize The number of rows to fetch per iteration
+ * @throws InvalidArgumentException
+ */
+ public function __construct( IDatabase $db, $table, $primaryKey, $batchSize ) {
+ if ( $batchSize < 1 ) {
+ throw new InvalidArgumentException( 'Batch size must be at least 1 row.' );
+ }
+ $this->db = $db;
+ $this->table = $table;
+ $this->primaryKey = (array)$primaryKey;
+ $this->fetchColumns = $this->primaryKey;
+ $this->orderBy = implode( ' ASC,', $this->primaryKey ) . ' ASC';
+ $this->batchSize = $batchSize;
+ }
+
+ /**
+ * @param array $conditions Query conditions suitable for use with
+ * IDatabase::select
+ */
+ public function addConditions( array $conditions ) {
+ $this->conditions = array_merge( $this->conditions, $conditions );
+ }
+
+ /**
+ * @param array $options Query options suitable for use with
+ * IDatabase::select
+ */
+ public function addOptions( array $options ) {
+ $this->options = array_merge( $this->options, $options );
+ }
+
+ /**
+ * @param array $conditions Query join conditions suitable for use
+ * with IDatabase::select
+ */
+ public function addJoinConditions( array $conditions ) {
+ $this->joinConditions = array_merge( $this->joinConditions, $conditions );
+ }
+
+ /**
+ * @param array $columns List of column names to select from the
+ * table suitable for use with IDatabase::select()
+ */
+ public function setFetchColumns( array $columns ) {
+ // If it's not the all column selector merge in the primary keys we need
+ if ( count( $columns ) === 1 && reset( $columns ) === '*' ) {
+ $this->fetchColumns = $columns;
+ } else {
+ $this->fetchColumns = array_unique( array_merge(
+ $this->primaryKey,
+ $columns
+ ) );
+ }
+ }
+
+ /**
+ * Extracts the primary key(s) from a database row.
+ *
+ * @param stdClass $row An individual database row from this iterator
+ * @return array Map of primary key column to value within the row
+ */
+ public function extractPrimaryKeys( $row ) {
+ $pk = [];
+ foreach ( $this->primaryKey as $alias => $column ) {
+ $name = is_numeric( $alias ) ? $column : $alias;
+ $pk[$name] = $row->{$name};
+ }
+ return $pk;
+ }
+
+ /**
+ * @return array The most recently fetched set of rows from the database
+ */
+ public function current() {
+ return $this->current;
+ }
+
+ /**
+ * @return int 0-indexed count of the page number fetched
+ */
+ public function key() {
+ return $this->key;
+ }
+
+ /**
+ * Reset the iterator to the begining of the table.
+ */
+ public function rewind() {
+ $this->key = -1; // self::next() will turn this into 0
+ $this->current = [];
+ $this->next();
+ }
+
+ /**
+ * @return bool True when the iterator is in a valid state
+ */
+ public function valid() {
+ return (bool)$this->current;
+ }
+
+ /**
+ * @return bool True when this result set has rows
+ */
+ public function hasChildren() {
+ return $this->current && count( $this->current );
+ }
+
+ /**
+ * @return RecursiveIterator
+ */
+ public function getChildren() {
+ return new NotRecursiveIterator( new ArrayIterator( $this->current ) );
+ }
+
+ /**
+ * Fetch the next set of rows from the database.
+ */
+ public function next() {
+ $res = $this->db->select(
+ $this->table,
+ $this->fetchColumns,
+ $this->buildConditions(),
+ __METHOD__,
+ [
+ 'LIMIT' => $this->batchSize,
+ 'ORDER BY' => $this->orderBy,
+ ] + $this->options,
+ $this->joinConditions
+ );
+
+ // The iterator is converted to an array because in addition to
+ // returning it in self::current() we need to use the end value
+ // in self::buildConditions()
+ $this->current = iterator_to_array( $res );
+ $this->key++;
+ }
+
+ /**
+ * Uses the primary key list and the maximal result row from the
+ * previous iteration to build an SQL condition sufficient for
+ * selecting the next page of results. All except the final key use
+ * `=` conditions while the final key uses a `>` condition
+ *
+ * Example output:
+ * [ '( foo = 42 AND bar > 7 ) OR ( foo > 42 )' ]
+ *
+ * @return array The SQL conditions necessary to select the next set
+ * of rows in the batched query
+ */
+ protected function buildConditions() {
+ if ( !$this->current ) {
+ return $this->conditions;
+ }
+
+ $maxRow = end( $this->current );
+ $maximumValues = [];
+ foreach ( $this->primaryKey as $alias => $column ) {
+ $name = is_numeric( $alias ) ? $column : $alias;
+ $maximumValues[$column] = $this->db->addQuotes( $maxRow->{$name} );
+ }
+
+ $pkConditions = [];
+ // For example: If we have 3 primary keys
+ // first run through will generate
+ // col1 = 4 AND col2 = 7 AND col3 > 1
+ // second run through will generate
+ // col1 = 4 AND col2 > 7
+ // and the final run through will generate
+ // col1 > 4
+ while ( $maximumValues ) {
+ $pkConditions[] = $this->buildGreaterThanCondition( $maximumValues );
+ array_pop( $maximumValues );
+ }
+
+ $conditions = $this->conditions;
+ $conditions[] = sprintf( '( %s )', implode( ' ) OR ( ', $pkConditions ) );
+
+ return $conditions;
+ }
+
+ /**
+ * Given an array of column names and their maximum value generate
+ * an SQL condition where all keys except the last match $quotedMaximumValues
+ * exactly and the last column is greater than the matching value in
+ * $quotedMaximumValues
+ *
+ * @param array $quotedMaximumValues The maximum values quoted with
+ * $this->db->addQuotes()
+ * @return string An SQL condition that will select rows where all
+ * columns match the maximum value exactly except the last column
+ * which must be greater than the provided maximum value
+ */
+ protected function buildGreaterThanCondition( array $quotedMaximumValues ) {
+ $keys = array_keys( $quotedMaximumValues );
+ $lastColumn = end( $keys );
+ $lastValue = array_pop( $quotedMaximumValues );
+ $conditions = [];
+ foreach ( $quotedMaximumValues as $column => $value ) {
+ $conditions[] = "$column = $value";
+ }
+ $conditions[] = "$lastColumn > $lastValue";
+
+ return implode( ' AND ', $conditions );
+ }
+}
diff --git a/www/wiki/includes/utils/BatchRowUpdate.php b/www/wiki/includes/utils/BatchRowUpdate.php
new file mode 100644
index 00000000..f42b5a07
--- /dev/null
+++ b/www/wiki/includes/utils/BatchRowUpdate.php
@@ -0,0 +1,128 @@
+<?php
+/*
+ * Ties together the batch update components to provide a composable
+ * method of batch updating rows in a database. To use create a class
+ * implementing the RowUpdateGenerator interface and configure the
+ * BatchRowIterator and BatchRowWriter for access to the correct table.
+ * The components will handle reading, writing, and waiting for replica DBs
+ * while the generator implementation handles generating update arrays
+ * for singular rows.
+ *
+ * Instantiate:
+ * $updater = new BatchRowUpdate(
+ * new BatchRowIterator( $dbr, 'some_table', 'primary_key_column', 500 ),
+ * new BatchRowWriter( $dbw, 'some_table', 'clusterName' ),
+ * new MyImplementationOfRowUpdateGenerator
+ * );
+ *
+ * Run:
+ * $updater->execute();
+ *
+ * An example maintenance script utilizing the BatchRowUpdate can be
+ * located in the Echo extension file maintenance/updateSchema.php
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+class BatchRowUpdate {
+ /**
+ * @var BatchRowIterator $reader Iterator that returns an array of
+ * database rows
+ */
+ protected $reader;
+
+ /**
+ * @var BatchRowWriter $writer Writer capable of pushing row updates
+ * to the database
+ */
+ protected $writer;
+
+ /**
+ * @var RowUpdateGenerator $generator Generates single row updates
+ * based on the rows content
+ */
+ protected $generator;
+
+ /**
+ * @var callable $output Output callback
+ */
+ protected $output;
+
+ /**
+ * @param BatchRowIterator $reader Iterator that returns an
+ * array of database rows
+ * @param BatchRowWriter $writer Writer capable of pushing
+ * row updates to the database
+ * @param RowUpdateGenerator $generator Generates single row updates
+ * based on the rows content
+ */
+ public function __construct(
+ BatchRowIterator $reader, BatchRowWriter $writer, RowUpdateGenerator $generator
+ ) {
+ $this->reader = $reader;
+ $this->writer = $writer;
+ $this->generator = $generator;
+ $this->output = function () {
+ }; // nop
+ }
+
+ /**
+ * Runs the batch update process
+ */
+ public function execute() {
+ foreach ( $this->reader as $rows ) {
+ $updates = [];
+ foreach ( $rows as $row ) {
+ $update = $this->generator->update( $row );
+ if ( $update ) {
+ $updates[] = [
+ 'primaryKey' => $this->reader->extractPrimaryKeys( $row ),
+ 'changes' => $update,
+ ];
+ }
+ }
+
+ if ( $updates ) {
+ $this->output( "Processing " . count( $updates ) . " rows\n" );
+ $this->writer->write( $updates );
+ }
+ }
+
+ $this->output( "Completed\n" );
+ }
+
+ /**
+ * Accepts a callable which will receive a single parameter
+ * containing string status updates
+ *
+ * @param callable $output A callback taking a single string
+ * parameter to output
+ */
+ public function setOutput( callable $output ) {
+ $this->output = $output;
+ }
+
+ /**
+ * Write out a status update
+ *
+ * @param string $text The value to print
+ */
+ protected function output( $text ) {
+ call_user_func( $this->output, $text );
+ }
+}
diff --git a/www/wiki/includes/utils/BatchRowWriter.php b/www/wiki/includes/utils/BatchRowWriter.php
new file mode 100644
index 00000000..c146e964
--- /dev/null
+++ b/www/wiki/includes/utils/BatchRowWriter.php
@@ -0,0 +1,75 @@
+<?php
+/**
+ * Updates database rows by primary key in batches.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+use MediaWiki\MediaWikiServices;
+use Wikimedia\Rdbms\IDatabase;
+
+class BatchRowWriter {
+ /**
+ * @var IDatabase $db The database to write to
+ */
+ protected $db;
+
+ /**
+ * @var string $table The name of the table to update
+ */
+ protected $table;
+
+ /**
+ * @var string $clusterName A cluster name valid for use with LBFactory
+ */
+ protected $clusterName;
+
+ /**
+ * @param IDatabase $db The database to write to
+ * @param string $table The name of the table to update
+ * @param string|bool $clusterName A cluster name valid for use with LBFactory
+ */
+ public function __construct( IDatabase $db, $table, $clusterName = false ) {
+ $this->db = $db;
+ $this->table = $table;
+ $this->clusterName = $clusterName;
+ }
+
+ /**
+ * @param array $updates Array of arrays each containing two keys, 'primaryKey'
+ * and 'changes'. primaryKey must contain a map of column names to values
+ * sufficient to uniquely identify the row changes must contain a map of column
+ * names to update values to apply to the row.
+ */
+ public function write( array $updates ) {
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
+
+ foreach ( $updates as $update ) {
+ $this->db->update(
+ $this->table,
+ $update['changes'],
+ $update['primaryKey'],
+ __METHOD__
+ );
+ }
+
+ $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
+ }
+}
diff --git a/www/wiki/includes/utils/ExecutableFinder.php b/www/wiki/includes/utils/ExecutableFinder.php
new file mode 100644
index 00000000..78b3f8e2
--- /dev/null
+++ b/www/wiki/includes/utils/ExecutableFinder.php
@@ -0,0 +1,115 @@
+<?php
+/**
+ * Copyright (C) 2017 Kunal Mehta <legoktm@member.fsf.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+use MediaWiki\Shell\Shell;
+
+/**
+ * Utility class to find executables in likely places
+ *
+ * @since 1.31
+ */
+class ExecutableFinder {
+
+ /**
+ * Get an array of likely places we can find executables. Check a bunch
+ * of known Unix-like defaults, as well as the PATH environment variable
+ * (which should maybe make it work for Windows?)
+ *
+ * @return array
+ */
+ protected static function getPossibleBinPaths() {
+ return array_unique( array_merge(
+ [ '/usr/bin', '/bin', '/usr/local/bin', '/opt/csw/bin',
+ '/usr/gnu/bin', '/usr/sfw/bin', '/sw/bin', '/opt/local/bin' ],
+ explode( PATH_SEPARATOR, getenv( 'PATH' ) )
+ ) );
+ }
+
+ /**
+ * Search a path for any of the given executable names. Returns the
+ * executable name if found. Also checks the version string returned
+ * by each executable.
+ *
+ * Used only by environment checks.
+ *
+ * @param string $path Path to search
+ * @param string $name Executable name to look for
+ * @param array|bool $versionInfo False or array with two members:
+ * 0 => Parameter to pass to binary for version check (e.g. --version)
+ * 1 => String to compare the output with
+ *
+ * If $versionInfo is not false, only executables with a version
+ * matching $versionInfo[1] will be returned.
+ * @return bool|string
+ */
+ protected static function findExecutable( $path, $name, $versionInfo = false ) {
+ $command = $path . DIRECTORY_SEPARATOR . $name;
+
+ Wikimedia\suppressWarnings();
+ $file_exists = is_executable( $command );
+ Wikimedia\restoreWarnings();
+
+ if ( $file_exists ) {
+ if ( !$versionInfo ) {
+ return $command;
+ }
+
+ $output = Shell::command( $command, $versionInfo[0] )
+ ->includeStderr()->execute()->getStdout();
+ if ( strstr( $output, $versionInfo[1] ) !== false ) {
+ return $command;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Same as locateExecutable(), but checks in getPossibleBinPaths() by default
+ * @see locateExecutable()
+ * @param string|string[] $names Array of possible names.
+ * @param array|bool $versionInfo Default: false or array with two members:
+ * 0 => Parameter to run for version check, e.g. '--version'
+ * 1 => String to compare the output with
+ *
+ * If $versionInfo is not false, only executables with a version
+ * matching $versionInfo[1] will be returned.
+ * @return bool|string
+ */
+ public static function findInDefaultPaths( $names, $versionInfo = false ) {
+ if ( Shell::isDisabled() ) {
+ // If we can't shell out, there's no point looking for executables
+ return false;
+ }
+
+ $paths = self::getPossibleBinPaths();
+ foreach ( (array)$names as $name ) {
+ foreach ( $paths as $path ) {
+ $exe = self::findExecutable( $path, $name, $versionInfo );
+ if ( $exe !== false ) {
+ return $exe;
+ }
+ }
+ }
+
+ return false;
+ }
+
+}
diff --git a/www/wiki/includes/utils/FileContentsHasher.php b/www/wiki/includes/utils/FileContentsHasher.php
new file mode 100644
index 00000000..e390f217
--- /dev/null
+++ b/www/wiki/includes/utils/FileContentsHasher.php
@@ -0,0 +1,114 @@
+<?php
+/**
+ * Generate hash digests of file contents to help with cache invalidation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+class FileContentsHasher {
+
+ /** @var BagOStuff */
+ protected $cache;
+
+ /** @var FileContentsHasher */
+ private static $instance;
+
+ public function __construct() {
+ $this->cache = ObjectCache::getLocalServerInstance( 'hash' );
+ }
+
+ /**
+ * Get the singleton instance of this class.
+ *
+ * @return FileContentsHasher
+ */
+ public static function singleton() {
+ if ( !self::$instance ) {
+ self::$instance = new self;
+ }
+
+ return self::$instance;
+ }
+
+ /**
+ * Get a hash of a file's contents, either by retrieving a previously-
+ * computed hash from the cache, or by computing a hash from the file.
+ *
+ * @private
+ * @param string $filePath Full path to the file.
+ * @param string $algo Name of selected hashing algorithm.
+ * @return string|bool Hash of file contents, or false if the file could not be read.
+ */
+ public function getFileContentsHashInternal( $filePath, $algo = 'md4' ) {
+ $mtime = filemtime( $filePath );
+ if ( $mtime === false ) {
+ return false;
+ }
+
+ $cacheKey = $this->cache->makeGlobalKey( __CLASS__, $filePath, $mtime, $algo );
+ $hash = $this->cache->get( $cacheKey );
+
+ if ( $hash ) {
+ return $hash;
+ }
+
+ $contents = file_get_contents( $filePath );
+ if ( $contents === false ) {
+ return false;
+ }
+
+ $hash = hash( $algo, $contents );
+ $this->cache->set( $cacheKey, $hash, 60 * 60 * 24 ); // 24h
+
+ return $hash;
+ }
+
+ /**
+ * Get a hash of the combined contents of one or more files, either by
+ * retrieving a previously-computed hash from the cache, or by computing
+ * a hash from the files.
+ *
+ * @param string|string[] $filePaths One or more file paths.
+ * @param string $algo Name of selected hashing algorithm.
+ * @return string|bool Hash of files' contents, or false if no file could not be read.
+ */
+ public static function getFileContentsHash( $filePaths, $algo = 'md4' ) {
+ $instance = self::singleton();
+
+ if ( !is_array( $filePaths ) ) {
+ $filePaths = (array)$filePaths;
+ }
+
+ Wikimedia\suppressWarnings();
+
+ if ( count( $filePaths ) === 1 ) {
+ $hash = $instance->getFileContentsHashInternal( $filePaths[0], $algo );
+ Wikimedia\restoreWarnings();
+ return $hash;
+ }
+
+ sort( $filePaths );
+ $hashes = array_map( function ( $filePath ) use ( $instance, $algo ) {
+ return $instance->getFileContentsHashInternal( $filePath, $algo ) ?: '';
+ }, $filePaths );
+
+ Wikimedia\restoreWarnings();
+
+ $hashes = implode( '', $hashes );
+ return $hashes ? hash( $algo, $hashes ) : false;
+ }
+}
diff --git a/www/wiki/includes/utils/MWCryptHKDF.php b/www/wiki/includes/utils/MWCryptHKDF.php
new file mode 100644
index 00000000..1c8d4861
--- /dev/null
+++ b/www/wiki/includes/utils/MWCryptHKDF.php
@@ -0,0 +1,103 @@
+<?php
+/**
+ * Extract-and-Expand Key Derivation Function (HKDF). A cryptographicly
+ * secure key expansion function based on RFC 5869.
+ *
+ * This relies on the secrecy of $wgSecretKey (by default), or $wgHKDFSecret.
+ * By default, sha256 is used as the underlying hashing algorithm, but any other
+ * algorithm can be used. Finding the secret key from the output would require
+ * an attacker to discover the input key (the PRK) to the hmac that generated
+ * the output, and discover the particular data, hmac'ed with an evolving key
+ * (salt), to produce the PRK. Even with md5, no publicly known attacks make
+ * this currently feasible.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Chris Steipp
+ * @file
+ */
+
+use MediaWiki\MediaWikiServices;
+
+class MWCryptHKDF {
+
+ /**
+ * Return a singleton instance, based on the global configs.
+ * @return CryptHKDF
+ */
+ protected static function singleton() {
+ return MediaWikiServices::getInstance()->getCryptHKDF();
+ }
+
+ /**
+ * RFC5869 defines HKDF in 2 steps, extraction and expansion.
+ * From http://eprint.iacr.org/2010/264.pdf:
+ *
+ * The scheme HKDF is specifed as:
+ * HKDF(XTS, SKM, CTXinfo, L) = K(1) || K(2) || ... || K(t)
+ * where the values K(i) are defined as follows:
+ * PRK = HMAC(XTS, SKM)
+ * K(1) = HMAC(PRK, CTXinfo || 0);
+ * K(i+1) = HMAC(PRK, K(i) || CTXinfo || i), 1 <= i < t;
+ * where t = [L/k] and the value K(t) is truncated to its first d = L mod k bits;
+ * the counter i is non-wrapping and of a given fixed size, e.g., a single byte.
+ * Note that the length of the HMAC output is the same as its key length and therefore
+ * the scheme is well defined.
+ *
+ * XTS is the "extractor salt"
+ * SKM is the "secret keying material"
+ *
+ * N.B. http://eprint.iacr.org/2010/264.pdf seems to differ from RFC 5869 in that the test
+ * vectors from RFC 5869 only work if K(0) = '' and K(1) = HMAC(PRK, K(0) || CTXinfo || 1)
+ *
+ * @param string $hash The hashing function to use (e.g., sha256)
+ * @param string $ikm The input keying material
+ * @param string $salt The salt to add to the ikm, to get the prk
+ * @param string $info Optional context (change the output without affecting
+ * the randomness properties of the output)
+ * @param int $L Number of bytes to return
+ * @return string Cryptographically secure pseudorandom binary string
+ */
+ public static function HKDF( $hash, $ikm, $salt, $info, $L ) {
+ return CryptHKDF::HKDF( $hash, $ikm, $salt, $info, $L );
+ }
+
+ /**
+ * Generate cryptographically random data and return it in raw binary form.
+ *
+ * @param int $bytes The number of bytes of random data to generate
+ * @param string $context String to mix into HMAC context
+ * @return string Binary string of length $bytes
+ */
+ public static function generate( $bytes, $context ) {
+ return self::singleton()->generate( $bytes, $context );
+ }
+
+ /**
+ * Generate cryptographically random data and return it in hexadecimal string format.
+ * See MWCryptRand::realGenerateHex for details of the char-to-byte conversion logic.
+ *
+ * @param int $chars The number of hex chars of random data to generate
+ * @param string $context String to mix into HMAC context
+ * @return string Random hex characters, $chars long
+ */
+ public static function generateHex( $chars, $context = '' ) {
+ $bytes = ceil( $chars / 2 );
+ $hex = bin2hex( self::singleton()->generate( $bytes, $context ) );
+ return substr( $hex, 0, $chars );
+ }
+
+}
diff --git a/www/wiki/includes/utils/MWCryptRand.php b/www/wiki/includes/utils/MWCryptRand.php
new file mode 100644
index 00000000..58189580
--- /dev/null
+++ b/www/wiki/includes/utils/MWCryptRand.php
@@ -0,0 +1,79 @@
+<?php
+/**
+ * A cryptographic random generator class used for generating secret keys
+ *
+ * This is based in part on Drupal code as well as what we used in our own code
+ * prior to introduction of this class.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Daniel Friesen
+ * @file
+ */
+
+use MediaWiki\MediaWikiServices;
+
+class MWCryptRand {
+ /**
+ * @return CryptRand
+ */
+ protected static function singleton() {
+ return MediaWikiServices::getInstance()->getCryptRand();
+ }
+
+ /**
+ * Return a boolean indicating whether or not the source used for cryptographic
+ * random bytes generation in the previously run generate* call
+ * was cryptographically strong.
+ *
+ * @return bool Returns true if the source was strong, false if not.
+ */
+ public static function wasStrong() {
+ return self::singleton()->wasStrong();
+ }
+
+ /**
+ * Generate a run of (ideally) cryptographically random data and return
+ * it in raw binary form.
+ * You can use MWCryptRand::wasStrong() if you wish to know if the source used
+ * was cryptographically strong.
+ *
+ * @param int $bytes The number of bytes of random data to generate
+ * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
+ * strong sources of entropy even if reading from them may steal
+ * more entropy from the system than optimal.
+ * @return string Raw binary random data
+ */
+ public static function generate( $bytes, $forceStrong = false ) {
+ return self::singleton()->generate( $bytes, $forceStrong );
+ }
+
+ /**
+ * Generate a run of (ideally) cryptographically random data and return
+ * it in hexadecimal string format.
+ * You can use MWCryptRand::wasStrong() if you wish to know if the source used
+ * was cryptographically strong.
+ *
+ * @param int $chars The number of hex chars of random data to generate
+ * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
+ * strong sources of entropy even if reading from them may steal
+ * more entropy from the system than optimal.
+ * @return string Hexadecimal random data
+ */
+ public static function generateHex( $chars, $forceStrong = false ) {
+ return self::singleton()->generateHex( $chars, $forceStrong );
+ }
+}
diff --git a/www/wiki/includes/utils/MWFileProps.php b/www/wiki/includes/utils/MWFileProps.php
new file mode 100644
index 00000000..9d05c6ab
--- /dev/null
+++ b/www/wiki/includes/utils/MWFileProps.php
@@ -0,0 +1,145 @@
+<?php
+/**
+ * MimeMagic helper functions for detecting and dealing with MIME types.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * MimeMagic helper wrapper
+ *
+ * @since 1.28
+ */
+class MWFileProps {
+ /** @var MimeMagic */
+ private $magic;
+
+ /**
+ * @param MimeAnalyzer $magic
+ */
+ public function __construct( MimeAnalyzer $magic ) {
+ $this->magic = $magic;
+ }
+
+ /**
+ * Get an associative array containing information about
+ * a file with the given storage path.
+ *
+ * Resulting array fields include:
+ * - fileExists
+ * - size (filesize in bytes)
+ * - mime (as major/minor)
+ * - media_type (value to be used with the MEDIATYPE_xxx constants)
+ * - metadata (handler specific)
+ * - sha1 (in base 36)
+ * - width
+ * - height
+ * - bits (bitrate)
+ * - file-mime
+ * - major_mime
+ * - minor_mime
+ *
+ * @param string $path Filesystem path to a file
+ * @param string|bool $ext The file extension, or true to extract it from the filename.
+ * Set it to false to ignore the extension.
+ * @return array
+ * @since 1.28
+ */
+ public function getPropsFromPath( $path, $ext ) {
+ $fsFile = new FSFile( $path );
+
+ $info = $this->newPlaceholderProps();
+ $info['fileExists'] = $fsFile->exists();
+ if ( $info['fileExists'] ) {
+ $info['size'] = $fsFile->getSize(); // bytes
+ $info['sha1'] = $fsFile->getSha1Base36();
+
+ # MIME type according to file contents
+ $info['file-mime'] = $this->magic->guessMimeType( $path, false );
+ # Logical MIME type
+ $ext = ( $ext === true ) ? FileBackend::extensionFromPath( $path ) : $ext;
+ $info['mime'] = $this->magic->improveTypeFromExtension( $info['file-mime'], $ext );
+
+ list( $info['major_mime'], $info['minor_mime'] ) = File::splitMime( $info['mime'] );
+ $info['media_type'] = $this->magic->getMediaType( $path, $info['mime'] );
+
+ # Height, width and metadata
+ $handler = MediaHandler::getHandler( $info['mime'] );
+ if ( $handler ) {
+ $info['metadata'] = $handler->getMetadata( $fsFile, $path );
+ /** @noinspection PhpMethodParametersCountMismatchInspection */
+ $gis = $handler->getImageSize( $fsFile, $path, $info['metadata'] );
+ if ( is_array( $gis ) ) {
+ $info = $this->extractImageSizeInfo( $gis ) + $info;
+ }
+ }
+ }
+
+ return $info;
+ }
+
+ /**
+ * Exract image size information
+ *
+ * @param array $gis
+ * @return array
+ */
+ private function extractImageSizeInfo( array $gis ) {
+ $info = [];
+ # NOTE: $gis[2] contains a code for the image type. This is no longer used.
+ $info['width'] = $gis[0];
+ $info['height'] = $gis[1];
+ if ( isset( $gis['bits'] ) ) {
+ $info['bits'] = $gis['bits'];
+ } else {
+ $info['bits'] = 0;
+ }
+
+ return $info;
+ }
+
+ /**
+ * Empty place holder props for non-existing files
+ *
+ * Resulting array fields include:
+ * - fileExists
+ * - size (filesize in bytes)
+ * - mime (as major/minor)
+ * - media_type (value to be used with the MEDIATYPE_xxx constants)
+ * - metadata (handler specific)
+ * - sha1 (in base 36)
+ * - width
+ * - height
+ * - bits (bitrate)
+ * - file-mime
+ * - major_mime
+ * - minor_mime
+ *
+ * @return array
+ * @since 1.28
+ */
+ public function newPlaceholderProps() {
+ return FSFile::placeholderProps() + [
+ 'metadata' => '',
+ 'width' => 0,
+ 'height' => 0,
+ 'bits' => 0,
+ 'media_type' => MEDIATYPE_UNKNOWN
+ ];
+ }
+}
diff --git a/www/wiki/includes/utils/MWRestrictions.php b/www/wiki/includes/utils/MWRestrictions.php
new file mode 100644
index 00000000..caf88a15
--- /dev/null
+++ b/www/wiki/includes/utils/MWRestrictions.php
@@ -0,0 +1,147 @@
+<?php
+/**
+ * A class to check request restrictions expressed as a JSON object
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/**
+ * A class to check request restrictions expressed as a JSON object
+ */
+class MWRestrictions {
+
+ private $ipAddresses = [ '0.0.0.0/0', '::/0' ];
+
+ /**
+ * @param array $restrictions
+ * @throws InvalidArgumentException
+ */
+ protected function __construct( array $restrictions = null ) {
+ if ( $restrictions !== null ) {
+ $this->loadFromArray( $restrictions );
+ }
+ }
+
+ /**
+ * @return MWRestrictions
+ */
+ public static function newDefault() {
+ return new self();
+ }
+
+ /**
+ * @param array $restrictions
+ * @return MWRestrictions
+ * @throws InvalidArgumentException
+ */
+ public static function newFromArray( array $restrictions ) {
+ return new self( $restrictions );
+ }
+
+ /**
+ * @param string $json JSON representation of the restrictions
+ * @return MWRestrictions
+ * @throws InvalidArgumentException
+ */
+ public static function newFromJson( $json ) {
+ $restrictions = FormatJson::decode( $json, true );
+ if ( !is_array( $restrictions ) ) {
+ throw new InvalidArgumentException( 'Invalid restrictions JSON' );
+ }
+ return new self( $restrictions );
+ }
+
+ private function loadFromArray( array $restrictions ) {
+ static $validKeys = [ 'IPAddresses' ];
+ static $neededKeys = [ 'IPAddresses' ];
+
+ $keys = array_keys( $restrictions );
+ $invalidKeys = array_diff( $keys, $validKeys );
+ if ( $invalidKeys ) {
+ throw new InvalidArgumentException(
+ 'Array contains invalid keys: ' . implode( ', ', $invalidKeys )
+ );
+ }
+ $missingKeys = array_diff( $neededKeys, $keys );
+ if ( $missingKeys ) {
+ throw new InvalidArgumentException(
+ 'Array is missing required keys: ' . implode( ', ', $missingKeys )
+ );
+ }
+
+ if ( !is_array( $restrictions['IPAddresses'] ) ) {
+ throw new InvalidArgumentException( 'IPAddresses is not an array' );
+ }
+ foreach ( $restrictions['IPAddresses'] as $ip ) {
+ if ( !\IP::isIPAddress( $ip ) ) {
+ throw new InvalidArgumentException( "Invalid IP address: $ip" );
+ }
+ }
+ $this->ipAddresses = $restrictions['IPAddresses'];
+ }
+
+ /**
+ * Return the restrictions as an array
+ * @return array
+ */
+ public function toArray() {
+ return [
+ 'IPAddresses' => $this->ipAddresses,
+ ];
+ }
+
+ /**
+ * Return the restrictions as a JSON string
+ * @param bool|string $pretty Pretty-print the JSON output, see FormatJson::encode
+ * @return string
+ */
+ public function toJson( $pretty = false ) {
+ return FormatJson::encode( $this->toArray(), $pretty, FormatJson::ALL_OK );
+ }
+
+ public function __toString() {
+ return $this->toJson();
+ }
+
+ /**
+ * Test against the passed WebRequest
+ * @param WebRequest $request
+ * @return Status
+ */
+ public function check( WebRequest $request ) {
+ $ok = [
+ 'ip' => $this->checkIP( $request->getIP() ),
+ ];
+ $status = Status::newGood();
+ $status->setResult( $ok === array_filter( $ok ), $ok );
+ return $status;
+ }
+
+ /**
+ * Test an IP address
+ * @param string $ip
+ * @return bool
+ */
+ public function checkIP( $ip ) {
+ foreach ( $this->ipAddresses as $range ) {
+ if ( \IP::isInRange( $ip, $range ) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
diff --git a/www/wiki/includes/utils/README b/www/wiki/includes/utils/README
new file mode 100644
index 00000000..b5b8ec88
--- /dev/null
+++ b/www/wiki/includes/utils/README
@@ -0,0 +1,9 @@
+The classes in this directory are general utilities for use by any part of
+MediaWiki. They do not favour any particular user interface and are not
+constrained to serve any particular feature. This is similar to includes/libs,
+except that some dependency on the MediaWiki framework (such as the use of
+MWException, Status or wfDebug()) disqualifies them from use outside of
+MediaWiki without modification.
+
+Utilities should not use global configuration variables, rather they should rely
+on the caller to configure their behaviour.
diff --git a/www/wiki/includes/utils/RowUpdateGenerator.php b/www/wiki/includes/utils/RowUpdateGenerator.php
new file mode 100644
index 00000000..342dffd6
--- /dev/null
+++ b/www/wiki/includes/utils/RowUpdateGenerator.php
@@ -0,0 +1,39 @@
+<?php
+/**
+ * Interface for generating updates to single rows in the database.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+interface RowUpdateGenerator {
+ /**
+ * Given a database row, generates an array mapping column names to
+ * updated value within the database row.
+ *
+ * Sample Response:
+ * return [
+ * 'some_col' => 'new value',
+ * 'other_col' => 99,
+ * ];
+ *
+ * @param stdClass $row A row from the database
+ * @return array Map of column names to updated value within the
+ * database row. When no update is required returns an empty array.
+ */
+ public function update( $row );
+}
diff --git a/www/wiki/includes/utils/UIDGenerator.php b/www/wiki/includes/utils/UIDGenerator.php
new file mode 100644
index 00000000..4d5c3af8
--- /dev/null
+++ b/www/wiki/includes/utils/UIDGenerator.php
@@ -0,0 +1,629 @@
+<?php
+/**
+ * This file deals with UID generation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+use Wikimedia\Assert\Assert;
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Class for getting statistically unique IDs
+ *
+ * @since 1.21
+ */
+class UIDGenerator {
+ /** @var UIDGenerator */
+ protected static $instance = null;
+
+ protected $nodeIdFile; // string; local file path
+ protected $nodeId32; // string; node ID in binary (32 bits)
+ protected $nodeId48; // string; node ID in binary (48 bits)
+
+ protected $lockFile88; // string; local file path
+ protected $lockFile128; // string; local file path
+ protected $lockFileUUID; // string; local file path
+
+ /** @var array */
+ protected $fileHandles = []; // cache file handles
+
+ const QUICK_RAND = 1; // get randomness from fast and insecure sources
+ const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
+
+ protected function __construct() {
+ $this->nodeIdFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
+ $nodeId = '';
+ if ( is_file( $this->nodeIdFile ) ) {
+ $nodeId = file_get_contents( $this->nodeIdFile );
+ }
+ // Try to get some ID that uniquely identifies this machine (RFC 4122)...
+ if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
+ Wikimedia\suppressWarnings();
+ if ( wfIsWindows() ) {
+ // https://technet.microsoft.com/en-us/library/bb490913.aspx
+ $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
+ $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
+ $info = str_getcsv( $line );
+ $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
+ } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
+ // See https://linux.die.net/man/8/ifconfig
+ $m = [];
+ preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
+ wfShellExec( '/sbin/ifconfig -a' ), $m );
+ $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
+ }
+ Wikimedia\restoreWarnings();
+ if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
+ $nodeId = MWCryptRand::generateHex( 12, true );
+ $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
+ }
+ file_put_contents( $this->nodeIdFile, $nodeId ); // cache
+ }
+ $this->nodeId32 = Wikimedia\base_convert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
+ $this->nodeId48 = Wikimedia\base_convert( $nodeId, 16, 2, 48 );
+ // If different processes run as different users, they may have different temp dirs.
+ // This is dealt with by initializing the clock sequence number and counters randomly.
+ $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
+ $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
+ $this->lockFileUUID = wfTempDir() . '/mw-' . __CLASS__ . '-UUID-128';
+ }
+
+ /**
+ * @todo: move to MW-specific factory class and inject temp dir
+ * @return UIDGenerator
+ */
+ protected static function singleton() {
+ if ( self::$instance === null ) {
+ self::$instance = new self();
+ }
+
+ return self::$instance;
+ }
+
+ /**
+ * Get a statistically unique 88-bit unsigned integer ID string.
+ * The bits of the UID are prefixed with the time (down to the millisecond).
+ *
+ * These IDs are suitable as values for the shard key of distributed data.
+ * If a column uses these as values, it should be declared UNIQUE to handle collisions.
+ * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
+ * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
+ *
+ * UID generation is serialized on each server (as the node ID is for the whole machine).
+ *
+ * @param int $base Specifies a base other than 10
+ * @return string Number
+ * @throws RuntimeException
+ */
+ public static function newTimestampedUID88( $base = 10 ) {
+ Assert::parameterType( 'integer', $base, '$base' );
+ Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
+ Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
+
+ $gen = self::singleton();
+ $info = $gen->getTimeAndDelay( 'lockFile88', 1, 1024, 1024 );
+ $info['offsetCounter'] = $info['offsetCounter'] % 1024;
+ return Wikimedia\base_convert( $gen->getTimestampedID88( $info ), 2, $base );
+ }
+
+ /**
+ * @param array $info The result of UIDGenerator::getTimeAndDelay() or
+ * a plain (UIDGenerator::millitime(), counter, clock sequence) array.
+ * @return string 88 bits
+ * @throws RuntimeException
+ */
+ protected function getTimestampedID88( array $info ) {
+ if ( isset( $info['time'] ) ) {
+ $time = $info['time'];
+ $counter = $info['offsetCounter'];
+ } else {
+ $time = $info[0];
+ $counter = $info[1];
+ }
+ // Take the 46 LSBs of "milliseconds since epoch"
+ $id_bin = $this->millisecondsSinceEpochBinary( $time );
+ // Add a 10 bit counter resulting in 56 bits total
+ $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
+ // Add the 32 bit node ID resulting in 88 bits total
+ $id_bin .= $this->nodeId32;
+ // Convert to a 1-27 digit integer string
+ if ( strlen( $id_bin ) !== 88 ) {
+ throw new RuntimeException( "Detected overflow for millisecond timestamp." );
+ }
+
+ return $id_bin;
+ }
+
+ /**
+ * Get a statistically unique 128-bit unsigned integer ID string.
+ * The bits of the UID are prefixed with the time (down to the millisecond).
+ *
+ * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
+ * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
+ * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
+ *
+ * UID generation is serialized on each server (as the node ID is for the whole machine).
+ *
+ * @param int $base Specifies a base other than 10
+ * @return string Number
+ * @throws RuntimeException
+ */
+ public static function newTimestampedUID128( $base = 10 ) {
+ Assert::parameterType( 'integer', $base, '$base' );
+ Assert::parameter( $base <= 36, '$base', 'must be <= 36' );
+ Assert::parameter( $base >= 2, '$base', 'must be >= 2' );
+
+ $gen = self::singleton();
+ $info = $gen->getTimeAndDelay( 'lockFile128', 16384, 1048576, 1048576 );
+ $info['offsetCounter'] = $info['offsetCounter'] % 1048576;
+
+ return Wikimedia\base_convert( $gen->getTimestampedID128( $info ), 2, $base );
+ }
+
+ /**
+ * @param array $info The result of UIDGenerator::getTimeAndDelay() or
+ * a plain (UIDGenerator::millitime(), counter, clock sequence) array.
+ * @return string 128 bits
+ * @throws RuntimeException
+ */
+ protected function getTimestampedID128( array $info ) {
+ if ( isset( $info['time'] ) ) {
+ $time = $info['time'];
+ $counter = $info['offsetCounter'];
+ $clkSeq = $info['clkSeq'];
+ } else {
+ $time = $info[0];
+ $counter = $info[1];
+ $clkSeq = $info[2];
+ }
+ // Take the 46 LSBs of "milliseconds since epoch"
+ $id_bin = $this->millisecondsSinceEpochBinary( $time );
+ // Add a 20 bit counter resulting in 66 bits total
+ $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
+ // Add a 14 bit clock sequence number resulting in 80 bits total
+ $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
+ // Add the 48 bit node ID resulting in 128 bits total
+ $id_bin .= $this->nodeId48;
+ // Convert to a 1-39 digit integer string
+ if ( strlen( $id_bin ) !== 128 ) {
+ throw new RuntimeException( "Detected overflow for millisecond timestamp." );
+ }
+
+ return $id_bin;
+ }
+
+ /**
+ * Return an RFC4122 compliant v1 UUID
+ *
+ * @return string
+ * @throws RuntimeException
+ * @since 1.27
+ */
+ public static function newUUIDv1() {
+ $gen = self::singleton();
+ // There can be up to 10000 intervals for the same millisecond timestamp.
+ // [0,4999] counter + [0,5000] offset is in [0,9999] for the offset counter.
+ // Add this onto the timestamp to allow making up to 5000 IDs per second.
+ return $gen->getUUIDv1( $gen->getTimeAndDelay( 'lockFileUUID', 16384, 5000, 5001 ) );
+ }
+
+ /**
+ * Return an RFC4122 compliant v1 UUID
+ *
+ * @return string 32 hex characters with no hyphens
+ * @throws RuntimeException
+ * @since 1.27
+ */
+ public static function newRawUUIDv1() {
+ return str_replace( '-', '', self::newUUIDv1() );
+ }
+
+ /**
+ * @param array $info Result of UIDGenerator::getTimeAndDelay()
+ * @return string 128 bits
+ */
+ protected function getUUIDv1( array $info ) {
+ $clkSeq_bin = Wikimedia\base_convert( $info['clkSeq'], 10, 2, 14 );
+ $time_bin = $this->intervalsSinceGregorianBinary( $info['time'], $info['offsetCounter'] );
+ // Take the 32 bits of "time low"
+ $id_bin = substr( $time_bin, 28, 32 );
+ // Add 16 bits of "time mid" resulting in 48 bits total
+ $id_bin .= substr( $time_bin, 12, 16 );
+ // Add 4 bit version resulting in 52 bits total
+ $id_bin .= '0001';
+ // Add 12 bits of "time high" resulting in 64 bits total
+ $id_bin .= substr( $time_bin, 0, 12 );
+ // Add 2 bits of "variant" resulting in 66 bits total
+ $id_bin .= '10';
+ // Add 6 bits of "clock seq high" resulting in 72 bits total
+ $id_bin .= substr( $clkSeq_bin, 0, 6 );
+ // Add 8 bits of "clock seq low" resulting in 80 bits total
+ $id_bin .= substr( $clkSeq_bin, 6, 8 );
+ // Add the 48 bit node ID resulting in 128 bits total
+ $id_bin .= $this->nodeId48;
+ // Convert to a 32 char hex string with dashes
+ if ( strlen( $id_bin ) !== 128 ) {
+ throw new RuntimeException( "Detected overflow for millisecond timestamp." );
+ }
+ $hex = Wikimedia\base_convert( $id_bin, 2, 16, 32 );
+ return sprintf( '%s-%s-%s-%s-%s',
+ // "time_low" (32 bits)
+ substr( $hex, 0, 8 ),
+ // "time_mid" (16 bits)
+ substr( $hex, 8, 4 ),
+ // "time_hi_and_version" (16 bits)
+ substr( $hex, 12, 4 ),
+ // "clk_seq_hi_res" (8 bits) and "clk_seq_low" (8 bits)
+ substr( $hex, 16, 4 ),
+ // "node" (48 bits)
+ substr( $hex, 20, 12 )
+ );
+ }
+
+ /**
+ * Return an RFC4122 compliant v4 UUID
+ *
+ * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
+ * @return string
+ * @throws RuntimeException
+ */
+ public static function newUUIDv4( $flags = 0 ) {
+ $hex = ( $flags & self::QUICK_RAND )
+ ? wfRandomString( 31 )
+ : MWCryptRand::generateHex( 31 );
+
+ return sprintf( '%s-%s-%s-%s-%s',
+ // "time_low" (32 bits)
+ substr( $hex, 0, 8 ),
+ // "time_mid" (16 bits)
+ substr( $hex, 8, 4 ),
+ // "time_hi_and_version" (16 bits)
+ '4' . substr( $hex, 12, 3 ),
+ // "clk_seq_hi_res" (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
+ dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
+ // "node" (48 bits)
+ substr( $hex, 19, 12 )
+ );
+ }
+
+ /**
+ * Return an RFC4122 compliant v4 UUID
+ *
+ * @param int $flags Bitfield (supports UIDGenerator::QUICK_RAND)
+ * @return string 32 hex characters with no hyphens
+ * @throws RuntimeException
+ */
+ public static function newRawUUIDv4( $flags = 0 ) {
+ return str_replace( '-', '', self::newUUIDv4( $flags ) );
+ }
+
+ /**
+ * Return an ID that is sequential *only* for this node and bucket
+ *
+ * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols.
+ * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart.
+ *
+ * @param string $bucket Arbitrary bucket name (should be ASCII)
+ * @param int $bits Bit size (<=48) of resulting numbers before wrap-around
+ * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
+ * @return float Integer value as float
+ * @since 1.23
+ */
+ public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
+ return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
+ }
+
+ /**
+ * Return IDs that are sequential *only* for this node and bucket
+ *
+ * @see UIDGenerator::newSequentialPerNodeID()
+ * @param string $bucket Arbitrary bucket name (should be ASCII)
+ * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
+ * @param int $count Number of IDs to return
+ * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
+ * @return array Ordered list of float integer values
+ * @since 1.23
+ */
+ public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
+ $gen = self::singleton();
+ return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
+ }
+
+ /**
+ * Return IDs that are sequential *only* for this node and bucket
+ *
+ * @see UIDGenerator::newSequentialPerNodeID()
+ * @param string $bucket Arbitrary bucket name (should be ASCII)
+ * @param int $bits Bit size (16 to 48) of resulting numbers before wrap-around
+ * @param int $count Number of IDs to return
+ * @param int $flags (supports UIDGenerator::QUICK_VOLATILE)
+ * @return array Ordered list of float integer values
+ * @throws RuntimeException
+ */
+ protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
+ if ( $count <= 0 ) {
+ return []; // nothing to do
+ } elseif ( $bits < 16 || $bits > 48 ) {
+ throw new RuntimeException( "Requested bit size ($bits) is out of range." );
+ }
+
+ $counter = null; // post-increment persistent counter value
+
+ // Use APC/etc if requested, available, and not in CLI mode;
+ // Counter values would not survive accross script instances in CLI mode.
+ $cache = null;
+ if ( ( $flags & self::QUICK_VOLATILE ) && !wfIsCLI() ) {
+ $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
+ }
+ if ( $cache ) {
+ $counter = $cache->incrWithInit( $bucket, $cache::TTL_INDEFINITE, $count, $count );
+ if ( $counter === false ) {
+ throw new RuntimeException( 'Unable to set value to ' . get_class( $cache ) );
+ }
+ }
+
+ // Note: use of fmod() avoids "division by zero" on 32 bit machines
+ if ( $counter === null ) {
+ $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
+ // Get the UID lock file handle
+ if ( isset( $this->fileHandles[$path] ) ) {
+ $handle = $this->fileHandles[$path];
+ } else {
+ $handle = fopen( $path, 'cb+' );
+ $this->fileHandles[$path] = $handle ?: null; // cache
+ }
+ // Acquire the UID lock file
+ if ( $handle === false ) {
+ throw new RuntimeException( "Could not open '{$path}'." );
+ } elseif ( !flock( $handle, LOCK_EX ) ) {
+ fclose( $handle );
+ throw new RuntimeException( "Could not acquire '{$path}'." );
+ }
+ // Fetch the counter value and increment it...
+ rewind( $handle );
+ $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
+ // Write back the new counter value
+ ftruncate( $handle, 0 );
+ rewind( $handle );
+ fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
+ fflush( $handle );
+ // Release the UID lock file
+ flock( $handle, LOCK_UN );
+ }
+
+ $ids = [];
+ $divisor = pow( 2, $bits );
+ $currentId = floor( $counter - $count ); // pre-increment counter value
+ for ( $i = 0; $i < $count; ++$i ) {
+ $ids[] = fmod( ++$currentId, $divisor );
+ }
+
+ return $ids;
+ }
+
+ /**
+ * Get a (time,counter,clock sequence) where (time,counter) is higher
+ * than any previous (time,counter) value for the given clock sequence.
+ * This is useful for making UIDs sequential on a per-node bases.
+ *
+ * @param string $lockFile Name of a local lock file
+ * @param int $clockSeqSize The number of possible clock sequence values
+ * @param int $counterSize The number of possible counter values
+ * @param int $offsetSize The number of possible offset values
+ * @return array (result of UIDGenerator::millitime(), counter, clock sequence)
+ * @throws RuntimeException
+ */
+ protected function getTimeAndDelay( $lockFile, $clockSeqSize, $counterSize, $offsetSize ) {
+ // Get the UID lock file handle
+ if ( isset( $this->fileHandles[$lockFile] ) ) {
+ $handle = $this->fileHandles[$lockFile];
+ } else {
+ $handle = fopen( $this->$lockFile, 'cb+' );
+ $this->fileHandles[$lockFile] = $handle ?: null; // cache
+ }
+ // Acquire the UID lock file
+ if ( $handle === false ) {
+ throw new RuntimeException( "Could not open '{$this->$lockFile}'." );
+ } elseif ( !flock( $handle, LOCK_EX ) ) {
+ fclose( $handle );
+ throw new RuntimeException( "Could not acquire '{$this->$lockFile}'." );
+ }
+ // Get the current timestamp, clock sequence number, last time, and counter
+ rewind( $handle );
+ $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
+ $clockChanged = false; // clock set back significantly?
+ if ( count( $data ) == 5 ) { // last UID info already initialized
+ $clkSeq = (int)$data[0] % $clockSeqSize;
+ $prevTime = [ (int)$data[1], (int)$data[2] ];
+ $offset = (int)$data[4] % $counterSize; // random counter offset
+ $counter = 0; // counter for UIDs with the same timestamp
+ // Delay until the clock reaches the time of the last ID.
+ // This detects any microtime() drift among processes.
+ $time = $this->timeWaitUntil( $prevTime );
+ if ( !$time ) { // too long to delay?
+ $clockChanged = true; // bump clock sequence number
+ $time = self::millitime();
+ } elseif ( $time == $prevTime ) {
+ // Bump the counter if there are timestamp collisions
+ $counter = (int)$data[3] % $counterSize;
+ if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
+ flock( $handle, LOCK_UN ); // abort
+ throw new RuntimeException( "Counter overflow for timestamp value." );
+ }
+ }
+ } else { // last UID info not initialized
+ $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
+ $counter = 0;
+ $offset = mt_rand( 0, $offsetSize - 1 );
+ $time = self::millitime();
+ }
+ // microtime() and gettimeofday() can drift from time() at least on Windows.
+ // The drift is immediate for processes running while the system clock changes.
+ // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
+ if ( abs( time() - $time[0] ) >= 2 ) {
+ // We don't want processes using too high or low timestamps to avoid duplicate
+ // UIDs and clock sequence number churn. This process should just be restarted.
+ flock( $handle, LOCK_UN ); // abort
+ throw new RuntimeException( "Process clock is outdated or drifted." );
+ }
+ // If microtime() is synced and a clock change was detected, then the clock went back
+ if ( $clockChanged ) {
+ // Bump the clock sequence number and also randomize the counter offset,
+ // which is useful for UIDs that do not include the clock sequence number.
+ $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
+ $offset = mt_rand( 0, $offsetSize - 1 );
+ trigger_error( "Clock was set back; sequence number incremented." );
+ }
+ // Update the (clock sequence number, timestamp, counter)
+ ftruncate( $handle, 0 );
+ rewind( $handle );
+ fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
+ fflush( $handle );
+ // Release the UID lock file
+ flock( $handle, LOCK_UN );
+
+ return [
+ 'time' => $time,
+ 'counter' => $counter,
+ 'clkSeq' => $clkSeq,
+ 'offset' => $offset,
+ 'offsetCounter' => $counter + $offset
+ ];
+ }
+
+ /**
+ * Wait till the current timestamp reaches $time and return the current
+ * timestamp. This returns false if it would have to wait more than 10ms.
+ *
+ * @param array $time Result of UIDGenerator::millitime()
+ * @return array|bool UIDGenerator::millitime() result or false
+ */
+ protected function timeWaitUntil( array $time ) {
+ do {
+ $ct = self::millitime();
+ if ( $ct >= $time ) { // https://secure.php.net/manual/en/language.operators.comparison.php
+ return $ct; // current timestamp is higher than $time
+ }
+ } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
+
+ return false;
+ }
+
+ /**
+ * @param array $time Result of UIDGenerator::millitime()
+ * @return string 46 LSBs of "milliseconds since epoch" in binary (rolls over in 4201)
+ * @throws RuntimeException
+ */
+ protected function millisecondsSinceEpochBinary( array $time ) {
+ list( $sec, $msec ) = $time;
+ $ts = 1000 * $sec + $msec;
+ if ( $ts > pow( 2, 52 ) ) {
+ throw new RuntimeException( __METHOD__ .
+ ': sorry, this function doesn\'t work after the year 144680' );
+ }
+
+ return substr( Wikimedia\base_convert( $ts, 10, 2, 46 ), -46 );
+ }
+
+ /**
+ * @param array $time Result of UIDGenerator::millitime()
+ * @param int $delta Number of intervals to add on to the timestamp
+ * @return string 60 bits of "100ns intervals since 15 October 1582" (rolls over in 3400)
+ * @throws RuntimeException
+ */
+ protected function intervalsSinceGregorianBinary( array $time, $delta = 0 ) {
+ list( $sec, $msec ) = $time;
+ $offset = '122192928000000000';
+ if ( PHP_INT_SIZE >= 8 ) { // 64 bit integers
+ $ts = ( 1000 * $sec + $msec ) * 10000 + (int)$offset + $delta;
+ $id_bin = str_pad( decbin( $ts % pow( 2, 60 ) ), 60, '0', STR_PAD_LEFT );
+ } elseif ( extension_loaded( 'gmp' ) ) {
+ $ts = gmp_add( gmp_mul( (string)$sec, '1000' ), (string)$msec ); // ms
+ $ts = gmp_add( gmp_mul( $ts, '10000' ), $offset ); // 100ns intervals
+ $ts = gmp_add( $ts, (string)$delta );
+ $ts = gmp_mod( $ts, gmp_pow( '2', '60' ) ); // wrap around
+ $id_bin = str_pad( gmp_strval( $ts, 2 ), 60, '0', STR_PAD_LEFT );
+ } elseif ( extension_loaded( 'bcmath' ) ) {
+ $ts = bcadd( bcmul( $sec, 1000 ), $msec ); // ms
+ $ts = bcadd( bcmul( $ts, 10000 ), $offset ); // 100ns intervals
+ $ts = bcadd( $ts, $delta );
+ $ts = bcmod( $ts, bcpow( 2, 60 ) ); // wrap around
+ $id_bin = Wikimedia\base_convert( $ts, 10, 2, 60 );
+ } else {
+ throw new RuntimeException( 'bcmath or gmp extension required for 32 bit machines.' );
+ }
+ return $id_bin;
+ }
+
+ /**
+ * @return array (current time in seconds, milliseconds since then)
+ */
+ protected static function millitime() {
+ list( $msec, $sec ) = explode( ' ', microtime() );
+
+ return [ (int)$sec, (int)( $msec * 1000 ) ];
+ }
+
+ /**
+ * Delete all cache files that have been created.
+ *
+ * This is a cleanup method primarily meant to be used from unit tests to
+ * avoid poluting the local filesystem. If used outside of a unit test
+ * environment it should be used with caution as it may destroy state saved
+ * in the files.
+ *
+ * @see unitTestTearDown
+ * @since 1.23
+ */
+ protected function deleteCacheFiles() {
+ // Bug: 44850
+ foreach ( $this->fileHandles as $path => $handle ) {
+ if ( $handle !== null ) {
+ fclose( $handle );
+ }
+ if ( is_file( $path ) ) {
+ unlink( $path );
+ }
+ unset( $this->fileHandles[$path] );
+ }
+ if ( is_file( $this->nodeIdFile ) ) {
+ unlink( $this->nodeIdFile );
+ }
+ }
+
+ /**
+ * Cleanup resources when tearing down after a unit test.
+ *
+ * This is a cleanup method primarily meant to be used from unit tests to
+ * avoid poluting the local filesystem. If used outside of a unit test
+ * environment it should be used with caution as it may destroy state saved
+ * in the files.
+ *
+ * @see deleteCacheFiles
+ * @since 1.23
+ */
+ public static function unitTestTearDown() {
+ // Bug: 44850
+ $gen = self::singleton();
+ $gen->deleteCacheFiles();
+ }
+
+ function __destruct() {
+ array_map( 'fclose', array_filter( $this->fileHandles ) );
+ }
+}
diff --git a/www/wiki/includes/utils/ZipDirectoryReader.php b/www/wiki/includes/utils/ZipDirectoryReader.php
new file mode 100644
index 00000000..f0ace2cc
--- /dev/null
+++ b/www/wiki/includes/utils/ZipDirectoryReader.php
@@ -0,0 +1,717 @@
+<?php
+/**
+ * ZIP file directories reader, for the purposes of upload verification.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * A class for reading ZIP file directories, for the purposes of upload
+ * verification.
+ *
+ * Only a functional interface is provided: ZipFileReader::read(). No access is
+ * given to object instances.
+ */
+class ZipDirectoryReader {
+ /**
+ * Read a ZIP file and call a function for each file discovered in it.
+ *
+ * Because this class is aimed at verification, an error is raised on
+ * suspicious or ambiguous input, instead of emulating some standard
+ * behavior.
+ *
+ * @param string $fileName The archive file name
+ * @param array $callback The callback function. It will be called for each file
+ * with a single associative array each time, with members:
+ *
+ * - name: The file name. Directories conventionally have a trailing
+ * slash.
+ *
+ * - mtime: The file modification time, in MediaWiki 14-char format
+ *
+ * - size: The uncompressed file size
+ *
+ * @param array $options An associative array of read options, with the option
+ * name in the key. This may currently contain:
+ *
+ * - zip64: If this is set to true, then we will emulate a
+ * library with ZIP64 support, like OpenJDK 7. If it is set to
+ * false, then we will emulate a library with no knowledge of
+ * ZIP64.
+ *
+ * NOTE: The ZIP64 code is untested and probably doesn't work. It
+ * turned out to be easier to just reject ZIP64 archive uploads,
+ * since they are likely to be very rare. Confirming safety of a
+ * ZIP64 file is fairly complex. What do you do with a file that is
+ * ambiguous and broken when read with a non-ZIP64 reader, but valid
+ * when read with a ZIP64 reader? This situation is normal for a
+ * valid ZIP64 file, and working out what non-ZIP64 readers will make
+ * of such a file is not trivial.
+ *
+ * @return Status A Status object. The following fatal errors are defined:
+ *
+ * - zip-file-open-error: The file could not be opened.
+ *
+ * - zip-wrong-format: The file does not appear to be a ZIP file.
+ *
+ * - zip-bad: There was something wrong or ambiguous about the file
+ * data.
+ *
+ * - zip-unsupported: The ZIP file uses features which
+ * ZipDirectoryReader does not support.
+ *
+ * The default messages for those fatal errors are written in a way that
+ * makes sense for upload verification.
+ *
+ * If a fatal error is returned, more information about the error will be
+ * available in the debug log.
+ *
+ * Note that the callback function may be called any number of times before
+ * a fatal error is returned. If this occurs, the data sent to the callback
+ * function should be discarded.
+ */
+ public static function read( $fileName, $callback, $options = [] ) {
+ $zdr = new self( $fileName, $callback, $options );
+
+ return $zdr->execute();
+ }
+
+ /** The file name */
+ protected $fileName;
+
+ /** The opened file resource */
+ protected $file;
+
+ /** The cached length of the file, or null if it has not been loaded yet. */
+ protected $fileLength;
+
+ /** A segmented cache of the file contents */
+ protected $buffer;
+
+ /** The file data callback */
+ protected $callback;
+
+ /** The ZIP64 mode */
+ protected $zip64 = false;
+
+ /** Stored headers */
+ protected $eocdr, $eocdr64, $eocdr64Locator;
+
+ protected $data;
+
+ /** The "extra field" ID for ZIP64 central directory entries */
+ const ZIP64_EXTRA_HEADER = 0x0001;
+
+ /** The segment size for the file contents cache */
+ const SEGSIZE = 16384;
+
+ /** The index of the "general field" bit for UTF-8 file names */
+ const GENERAL_UTF8 = 11;
+
+ /** The index of the "general field" bit for central directory encryption */
+ const GENERAL_CD_ENCRYPTED = 13;
+
+ /**
+ * Private constructor
+ * @param string $fileName
+ * @param callable $callback
+ * @param array $options
+ */
+ protected function __construct( $fileName, $callback, $options ) {
+ $this->fileName = $fileName;
+ $this->callback = $callback;
+
+ if ( isset( $options['zip64'] ) ) {
+ $this->zip64 = $options['zip64'];
+ }
+ }
+
+ /**
+ * Read the directory according to settings in $this.
+ *
+ * @return Status
+ */
+ function execute() {
+ $this->file = fopen( $this->fileName, 'r' );
+ $this->data = [];
+ if ( !$this->file ) {
+ return Status::newFatal( 'zip-file-open-error' );
+ }
+
+ $status = Status::newGood();
+ try {
+ $this->readEndOfCentralDirectoryRecord();
+ if ( $this->zip64 ) {
+ list( $offset, $size ) = $this->findZip64CentralDirectory();
+ $this->readCentralDirectory( $offset, $size );
+ } else {
+ if ( $this->eocdr['CD size'] == 0xffffffff
+ || $this->eocdr['CD offset'] == 0xffffffff
+ || $this->eocdr['CD entries total'] == 0xffff
+ ) {
+ $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
+ 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
+ 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
+ }
+
+ list( $offset, $size ) = $this->findOldCentralDirectory();
+ $this->readCentralDirectory( $offset, $size );
+ }
+ } catch ( ZipDirectoryReaderError $e ) {
+ $status->fatal( $e->getErrorCode() );
+ }
+
+ fclose( $this->file );
+
+ return $status;
+ }
+
+ /**
+ * Throw an error, and log a debug message
+ * @param mixed $code
+ * @param string $debugMessage
+ * @throws ZipDirectoryReaderError
+ */
+ function error( $code, $debugMessage ) {
+ wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
+ throw new ZipDirectoryReaderError( $code );
+ }
+
+ /**
+ * Read the header which is at the end of the central directory,
+ * unimaginatively called the "end of central directory record" by the ZIP
+ * spec.
+ */
+ function readEndOfCentralDirectoryRecord() {
+ $info = [
+ 'signature' => 4,
+ 'disk' => 2,
+ 'CD start disk' => 2,
+ 'CD entries this disk' => 2,
+ 'CD entries total' => 2,
+ 'CD size' => 4,
+ 'CD offset' => 4,
+ 'file comment length' => 2,
+ ];
+ $structSize = $this->getStructSize( $info );
+ $startPos = $this->getFileLength() - 65536 - $structSize;
+ if ( $startPos < 0 ) {
+ $startPos = 0;
+ }
+
+ if ( $this->getFileLength() === 0 ) {
+ $this->error( 'zip-wrong-format', "The file is empty." );
+ }
+
+ $block = $this->getBlock( $startPos );
+ $sigPos = strrpos( $block, "PK\x05\x06" );
+ if ( $sigPos === false ) {
+ $this->error( 'zip-wrong-format',
+ "zip file lacks EOCDR signature. It probably isn't a zip file." );
+ }
+
+ $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
+ $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
+
+ if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
+ $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
+ }
+ if ( $this->eocdr['disk'] !== 0
+ || $this->eocdr['CD start disk'] !== 0
+ ) {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
+ }
+ $this->eocdr += $this->unpack(
+ $block,
+ [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
+ $sigPos + $structSize );
+ $this->eocdr['position'] = $startPos + $sigPos;
+ }
+
+ /**
+ * Read the header called the "ZIP64 end of central directory locator". An
+ * error will be raised if it does not exist.
+ */
+ function readZip64EndOfCentralDirectoryLocator() {
+ $info = [
+ 'signature' => [ 'string', 4 ],
+ 'eocdr64 start disk' => 4,
+ 'eocdr64 offset' => 8,
+ 'number of disks' => 4,
+ ];
+ $structSize = $this->getStructSize( $info );
+
+ $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
+ $block = $this->getBlock( $start, $structSize );
+ $this->eocdr64Locator = $data = $this->unpack( $block, $info );
+
+ if ( $data['signature'] !== "PK\x06\x07" ) {
+ // Note: Java will allow this and continue to read the
+ // EOCDR64, so we have to reject the upload, we can't
+ // just use the EOCDR header instead.
+ $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
+ }
+ }
+
+ /**
+ * Read the header called the "ZIP64 end of central directory record". It
+ * may replace the regular "end of central directory record" in ZIP64 files.
+ */
+ function readZip64EndOfCentralDirectoryRecord() {
+ if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
+ || $this->eocdr64Locator['number of disks'] != 0
+ ) {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
+ }
+
+ $info = [
+ 'signature' => [ 'string', 4 ],
+ 'EOCDR64 size' => 8,
+ 'version made by' => 2,
+ 'version needed' => 2,
+ 'disk' => 4,
+ 'CD start disk' => 4,
+ 'CD entries this disk' => 8,
+ 'CD entries total' => 8,
+ 'CD size' => 8,
+ 'CD offset' => 8
+ ];
+ $structSize = $this->getStructSize( $info );
+ $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
+ $this->eocdr64 = $data = $this->unpack( $block, $info );
+ if ( $data['signature'] !== "PK\x06\x06" ) {
+ $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
+ }
+ if ( $data['disk'] !== 0
+ || $data['CD start disk'] !== 0
+ ) {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
+ }
+ }
+
+ /**
+ * Find the location of the central directory, as would be seen by a
+ * non-ZIP64 reader.
+ *
+ * @return array List containing offset, size and end position.
+ */
+ function findOldCentralDirectory() {
+ $size = $this->eocdr['CD size'];
+ $offset = $this->eocdr['CD offset'];
+ $endPos = $this->eocdr['position'];
+
+ // Some readers use the EOCDR position instead of the offset field
+ // to find the directory, so to be safe, we check if they both agree.
+ if ( $offset + $size != $endPos ) {
+ $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
+ 'of central directory record' );
+ }
+
+ return [ $offset, $size ];
+ }
+
+ /**
+ * Find the location of the central directory, as would be seen by a
+ * ZIP64-compliant reader.
+ *
+ * @return array List containing offset, size and end position.
+ */
+ function findZip64CentralDirectory() {
+ // The spec is ambiguous about the exact rules of precedence between the
+ // ZIP64 headers and the original headers. Here we follow zip_util.c
+ // from OpenJDK 7.
+ $size = $this->eocdr['CD size'];
+ $offset = $this->eocdr['CD offset'];
+ $numEntries = $this->eocdr['CD entries total'];
+ $endPos = $this->eocdr['position'];
+ if ( $size == 0xffffffff
+ || $offset == 0xffffffff
+ || $numEntries == 0xffff
+ ) {
+ $this->readZip64EndOfCentralDirectoryLocator();
+
+ if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
+ $this->readZip64EndOfCentralDirectoryRecord();
+ if ( isset( $this->eocdr64['CD offset'] ) ) {
+ $size = $this->eocdr64['CD size'];
+ $offset = $this->eocdr64['CD offset'];
+ $endPos = $this->eocdr64Locator['eocdr64 offset'];
+ }
+ }
+ }
+ // Some readers use the EOCDR position instead of the offset field
+ // to find the directory, so to be safe, we check if they both agree.
+ if ( $offset + $size != $endPos ) {
+ $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
+ 'of central directory record' );
+ }
+
+ return [ $offset, $size ];
+ }
+
+ /**
+ * Read the central directory at the given location
+ * @param int $offset
+ * @param int $size
+ */
+ function readCentralDirectory( $offset, $size ) {
+ $block = $this->getBlock( $offset, $size );
+
+ $fixedInfo = [
+ 'signature' => [ 'string', 4 ],
+ 'version made by' => 2,
+ 'version needed' => 2,
+ 'general bits' => 2,
+ 'compression method' => 2,
+ 'mod time' => 2,
+ 'mod date' => 2,
+ 'crc-32' => 4,
+ 'compressed size' => 4,
+ 'uncompressed size' => 4,
+ 'name length' => 2,
+ 'extra field length' => 2,
+ 'comment length' => 2,
+ 'disk number start' => 2,
+ 'internal attrs' => 2,
+ 'external attrs' => 4,
+ 'local header offset' => 4,
+ ];
+ $fixedSize = $this->getStructSize( $fixedInfo );
+
+ $pos = 0;
+ while ( $pos < $size ) {
+ $data = $this->unpack( $block, $fixedInfo, $pos );
+ $pos += $fixedSize;
+
+ if ( $data['signature'] !== "PK\x01\x02" ) {
+ $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
+ }
+
+ $variableInfo = [
+ 'name' => [ 'string', $data['name length'] ],
+ 'extra field' => [ 'string', $data['extra field length'] ],
+ 'comment' => [ 'string', $data['comment length'] ],
+ ];
+ $data += $this->unpack( $block, $variableInfo, $pos );
+ $pos += $this->getStructSize( $variableInfo );
+
+ if ( $this->zip64 && (
+ $data['compressed size'] == 0xffffffff
+ || $data['uncompressed size'] == 0xffffffff
+ || $data['local header offset'] == 0xffffffff )
+ ) {
+ $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
+ if ( $zip64Data ) {
+ $data = $zip64Data + $data;
+ }
+ }
+
+ if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
+ $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
+ }
+
+ // Convert the timestamp into MediaWiki format
+ // For the format, please see the MS-DOS 2.0 Programmer's Reference,
+ // pages 3-5 and 3-6.
+ $time = $data['mod time'];
+ $date = $data['mod date'];
+
+ $year = 1980 + ( $date >> 9 );
+ $month = ( $date >> 5 ) & 15;
+ $day = $date & 31;
+ $hour = ( $time >> 11 ) & 31;
+ $minute = ( $time >> 5 ) & 63;
+ $second = ( $time & 31 ) * 2;
+ $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
+ $year, $month, $day, $hour, $minute, $second );
+
+ // Convert the character set in the file name
+ if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
+ $name = $data['name'];
+ } else {
+ $name = iconv( 'CP437', 'UTF-8', $data['name'] );
+ }
+
+ // Compile a data array for the user, with a sensible format
+ $userData = [
+ 'name' => $name,
+ 'mtime' => $timestamp,
+ 'size' => $data['uncompressed size'],
+ ];
+ call_user_func( $this->callback, $userData );
+ }
+ }
+
+ /**
+ * Interpret ZIP64 "extra field" data and return an associative array.
+ * @param string $extraField
+ * @return array|bool
+ */
+ function unpackZip64Extra( $extraField ) {
+ $extraHeaderInfo = [
+ 'id' => 2,
+ 'size' => 2,
+ ];
+ $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
+
+ $zip64ExtraInfo = [
+ 'uncompressed size' => 8,
+ 'compressed size' => 8,
+ 'local header offset' => 8,
+ 'disk number start' => 4,
+ ];
+
+ $extraPos = 0;
+ while ( $extraPos < strlen( $extraField ) ) {
+ $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
+ $extraPos += $extraHeaderSize;
+ $extra += $this->unpack( $extraField,
+ [ 'data' => [ 'string', $extra['size'] ] ],
+ $extraPos );
+ $extraPos += $extra['size'];
+
+ if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
+ return $this->unpack( $extra['data'], $zip64ExtraInfo );
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Get the length of the file.
+ * @return int
+ */
+ function getFileLength() {
+ if ( $this->fileLength === null ) {
+ $stat = fstat( $this->file );
+ $this->fileLength = $stat['size'];
+ }
+
+ return $this->fileLength;
+ }
+
+ /**
+ * Get the file contents from a given offset. If there are not enough bytes
+ * in the file to satisfy the request, an exception will be thrown.
+ *
+ * @param int $start The byte offset of the start of the block.
+ * @param int $length The number of bytes to return. If omitted, the remainder
+ * of the file will be returned.
+ *
+ * @return string
+ */
+ function getBlock( $start, $length = null ) {
+ $fileLength = $this->getFileLength();
+ if ( $start >= $fileLength ) {
+ $this->error( 'zip-bad', "getBlock() requested position $start, " .
+ "file length is $fileLength" );
+ }
+ if ( $length === null ) {
+ $length = $fileLength - $start;
+ }
+ $end = $start + $length;
+ if ( $end > $fileLength ) {
+ $this->error( 'zip-bad', "getBlock() requested end position $end, " .
+ "file length is $fileLength" );
+ }
+ $startSeg = floor( $start / self::SEGSIZE );
+ $endSeg = ceil( $end / self::SEGSIZE );
+
+ $block = '';
+ for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
+ $block .= $this->getSegment( $segIndex );
+ }
+
+ $block = substr( $block,
+ $start - $startSeg * self::SEGSIZE,
+ $length );
+
+ if ( strlen( $block ) < $length ) {
+ $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
+ }
+
+ return $block;
+ }
+
+ /**
+ * Get a section of the file starting at position $segIndex * self::SEGSIZE,
+ * of length self::SEGSIZE. The result is cached. This is a helper function
+ * for getBlock().
+ *
+ * If there are not enough bytes in the file to satisfy the request, the
+ * return value will be truncated. If a request is made for a segment beyond
+ * the end of the file, an empty string will be returned.
+ *
+ * @param int $segIndex
+ *
+ * @return string
+ */
+ function getSegment( $segIndex ) {
+ if ( !isset( $this->buffer[$segIndex] ) ) {
+ $bytePos = $segIndex * self::SEGSIZE;
+ if ( $bytePos >= $this->getFileLength() ) {
+ $this->buffer[$segIndex] = '';
+
+ return '';
+ }
+ if ( fseek( $this->file, $bytePos ) ) {
+ $this->error( 'zip-bad', "seek to $bytePos failed" );
+ }
+ $seg = fread( $this->file, self::SEGSIZE );
+ if ( $seg === false ) {
+ $this->error( 'zip-bad', "read from $bytePos failed" );
+ }
+ $this->buffer[$segIndex] = $seg;
+ }
+
+ return $this->buffer[$segIndex];
+ }
+
+ /**
+ * Get the size of a structure in bytes. See unpack() for the format of $struct.
+ * @param array $struct
+ * @return int
+ */
+ function getStructSize( $struct ) {
+ $size = 0;
+ foreach ( $struct as $type ) {
+ if ( is_array( $type ) ) {
+ list( , $fieldSize ) = $type;
+ $size += $fieldSize;
+ } else {
+ $size += $type;
+ }
+ }
+
+ return $size;
+ }
+
+ /**
+ * Unpack a binary structure. This is like the built-in unpack() function
+ * except nicer.
+ *
+ * @param string $string The binary data input
+ *
+ * @param array $struct An associative array giving structure members and their
+ * types. In the key is the field name. The value may be either an
+ * integer, in which case the field is a little-endian unsigned integer
+ * encoded in the given number of bytes, or an array, in which case the
+ * first element of the array is the type name, and the subsequent
+ * elements are type-dependent parameters. Only one such type is defined:
+ * - "string": The second array element gives the length of string.
+ * Not null terminated.
+ *
+ * @param int $offset The offset into the string at which to start unpacking.
+ *
+ * @throws MWException
+ * @return array Unpacked associative array. Note that large integers in the input
+ * may be represented as floating point numbers in the return value, so
+ * the use of weak comparison is advised.
+ */
+ function unpack( $string, $struct, $offset = 0 ) {
+ $size = $this->getStructSize( $struct );
+ if ( $offset + $size > strlen( $string ) ) {
+ $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
+ }
+
+ $data = [];
+ $pos = $offset;
+ foreach ( $struct as $key => $type ) {
+ if ( is_array( $type ) ) {
+ list( $typeName, $fieldSize ) = $type;
+ switch ( $typeName ) {
+ case 'string':
+ $data[$key] = substr( $string, $pos, $fieldSize );
+ $pos += $fieldSize;
+ break;
+ default:
+ throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
+ }
+ } else {
+ // Unsigned little-endian integer
+ $length = intval( $type );
+
+ // Calculate the value. Use an algorithm which automatically
+ // upgrades the value to floating point if necessary.
+ $value = 0;
+ for ( $i = $length - 1; $i >= 0; $i-- ) {
+ $value *= 256;
+ $value += ord( $string[$pos + $i] );
+ }
+
+ // Throw an exception if there was loss of precision
+ if ( $value > pow( 2, 52 ) ) {
+ $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
+ 'This could happen if we tried to unpack a 64-bit structure ' .
+ 'at an invalid location.' );
+ }
+ $data[$key] = $value;
+ $pos += $length;
+ }
+ }
+
+ return $data;
+ }
+
+ /**
+ * Returns a bit from a given position in an integer value, converted to
+ * boolean.
+ *
+ * @param int $value
+ * @param int $bitIndex The index of the bit, where 0 is the LSB.
+ * @return bool
+ */
+ function testBit( $value, $bitIndex ) {
+ return (bool)( ( $value >> $bitIndex ) & 1 );
+ }
+
+ /**
+ * Debugging helper function which dumps a string in hexdump -C format.
+ * @param string $s
+ */
+ function hexDump( $s ) {
+ $n = strlen( $s );
+ for ( $i = 0; $i < $n; $i += 16 ) {
+ printf( "%08X ", $i );
+ for ( $j = 0; $j < 16; $j++ ) {
+ print " ";
+ if ( $j == 8 ) {
+ print " ";
+ }
+ if ( $i + $j >= $n ) {
+ print " ";
+ } else {
+ printf( "%02X", ord( $s[$i + $j] ) );
+ }
+ }
+
+ print " |";
+ for ( $j = 0; $j < 16; $j++ ) {
+ if ( $i + $j >= $n ) {
+ print " ";
+ } elseif ( ctype_print( $s[$i + $j] ) ) {
+ print $s[$i + $j];
+ } else {
+ print '.';
+ }
+ }
+ print "|\n";
+ }
+ }
+}
diff --git a/www/wiki/includes/utils/ZipDirectoryReaderError.php b/www/wiki/includes/utils/ZipDirectoryReaderError.php
new file mode 100644
index 00000000..592036e3
--- /dev/null
+++ b/www/wiki/includes/utils/ZipDirectoryReaderError.php
@@ -0,0 +1,38 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Internal exception class. Will be caught by private code.
+ */
+class ZipDirectoryReaderError extends Exception {
+ protected $errorCode;
+
+ function __construct( $code ) {
+ $this->errorCode = $code;
+ parent::__construct( "ZipDirectoryReader error: $code" );
+ }
+
+ /**
+ * @return mixed
+ */
+ function getErrorCode() {
+ return $this->errorCode;
+ }
+}