summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Translate/webservices
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Translate/webservices
first commit
Diffstat (limited to 'www/wiki/extensions/Translate/webservices')
-rw-r--r--www/wiki/extensions/Translate/webservices/ApertiumWebService.php81
-rw-r--r--www/wiki/extensions/Translate/webservices/CaighdeanWebService.php93
-rw-r--r--www/wiki/extensions/Translate/webservices/CxserverWebService.php82
-rw-r--r--www/wiki/extensions/Translate/webservices/MicrosoftWebService.php142
-rw-r--r--www/wiki/extensions/Translate/webservices/QueryAggregator.php89
-rw-r--r--www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php17
-rw-r--r--www/wiki/extensions/Translate/webservices/RESTBaseWebService.php80
-rw-r--r--www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php61
-rw-r--r--www/wiki/extensions/Translate/webservices/TranslationQuery.php105
-rw-r--r--www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php65
-rw-r--r--www/wiki/extensions/Translate/webservices/TranslationWebService.php352
-rw-r--r--www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php18
-rw-r--r--www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php18
-rw-r--r--www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php20
-rw-r--r--www/wiki/extensions/Translate/webservices/YandexWebService.php99
15 files changed, 1322 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/webservices/ApertiumWebService.php b/www/wiki/extensions/Translate/webservices/ApertiumWebService.php
new file mode 100644
index 00000000..d333621b
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/ApertiumWebService.php
@@ -0,0 +1,81 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support Apetrium translator api.
+ * @see http://wiki.apertium.org/wiki/Apertium_web_service
+ * @ingroup TranslationWebService
+ * @since 2013-01-01
+ */
+class ApertiumWebService extends TranslationWebService {
+ public function getType() {
+ return 'mt';
+ }
+
+ protected function mapCode( $code ) {
+ return str_replace( '-', '_', LanguageCode::bcp47( $code ) );
+ }
+
+ protected function doPairs() {
+ $pairs = [];
+ $json = Http::get(
+ $this->config['pairs'],
+ [ 'timeout' => $this->config['timeout'] ],
+ __METHOD__
+ );
+ $response = FormatJson::decode( $json );
+
+ if ( !is_object( $response ) ) {
+ $error = 'Malformed reply from remote server: ' . (string)$json;
+ throw new TranslationWebServiceException( $error );
+ }
+
+ foreach ( $response->responseData as $pair ) {
+ $source = $pair->sourceLanguage;
+ $target = $pair->targetLanguage;
+ $pairs[$source][$target] = true;
+ }
+
+ return $pairs;
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ if ( !isset( $this->config['key'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'API key is not set' );
+ }
+
+ $text = trim( $text );
+ $text = $this->wrapUntranslatable( $text );
+
+ $params = [
+ 'q' => $text,
+ 'langpair' => "$from|$to",
+ 'x-application' => 'MediaWiki Translate extension ' . TRANSLATE_VERSION,
+ ];
+
+ return TranslationQuery::factory( $this->config['url'] )
+ ->timeout( $this->config['timeout'] )
+ ->queryParameters( $params );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+ $response = FormatJson::decode( $body );
+ if ( !is_object( $response ) ) {
+ throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
+ } elseif ( $response->responseStatus !== 200 ) {
+ throw new TranslationWebServiceException( $response->responseDetails );
+ }
+
+ $text = Sanitizer::decodeCharReferences( $response->responseData->translatedText );
+ $text = $this->unwrapUntranslatable( $text );
+
+ return trim( $text );
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/CaighdeanWebService.php b/www/wiki/extensions/Translate/webservices/CaighdeanWebService.php
new file mode 100644
index 00000000..cb472d93
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/CaighdeanWebService.php
@@ -0,0 +1,93 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support Caighdean translator api.
+ * @see https://github.com/kscanne/caighdean/blob/master/API.md
+ * @ingroup TranslationWebService
+ * @since 2017.04
+ */
+class CaighdeanWebService extends TranslationWebService {
+ public function getType() {
+ return 'mt';
+ }
+
+ public function mapCode( $code ) {
+ return $code;
+ }
+
+ protected function doPairs() {
+ $pairs = [
+ 'gd' => [ 'ga' => true ],
+ 'gv' => [ 'ga' => true ],
+ ];
+
+ return $pairs;
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ if ( !isset( $this->config['url'] ) ) {
+ throw new TranslationWebServiceConfigurationException( '`url` not set in configuration' );
+ }
+
+ $text = trim( $text );
+ if ( $text === '' ) {
+ throw new TranslationWebServiceInvalidInputException( 'Input is empty' );
+ }
+
+ $data = wfArrayToCgi( [
+ 'foinse' => $from,
+ 'teacs' => $text,
+ ] );
+
+ // Maximum payload is 16 KiB. Based ont testing 16000 bytes is safe by leaving 224
+ // bytes for other things.
+ if ( strlen( $data ) > 16000 ) {
+ throw new TranslationWebServiceInvalidInputException( 'Input is over 16000 bytes long' );
+ }
+
+ return TranslationQuery::factory( $this->config['url'] )
+ ->timeout( $this->config['timeout'] )
+ ->postWithData( $data )
+ ->attachProcessingInstructions( $text );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+ $response = FormatJson::decode( $body );
+ if ( !is_array( $response ) ) {
+ throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
+ }
+
+ $text = '';
+ $originalText = $reply->getQuery()->getProcessingInstructions();
+ foreach ( $response as list( $sourceToken, $targetToken ) ) {
+ $separator = ' ';
+ $pos = strpos( $originalText, $sourceToken );
+ // Try to keep the effects local. If we fail to match at token, we could accidentally
+ // scan very far ahead in the text, find a false match and not find matches for all
+ // of the tokens in the between.
+ if ( $pos !== false && $pos < 50 ) {
+ // Remove the portion of text we have processed. $pos should be zero, unless
+ // we failed to match something earlier.
+ $originalText = substr( $originalText, $pos + strlen( $sourceToken ) );
+ if ( preg_match( '/^\s+/', $originalText, $match ) ) {
+ $separator = $match[ 0 ];
+ $originalText = substr( $originalText, strlen( $separator ) );
+ } else {
+ $separator = '';
+ }
+ }
+
+ $text .= $targetToken . $separator;
+ }
+
+ return $text;
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/CxserverWebService.php b/www/wiki/extensions/Translate/webservices/CxserverWebService.php
new file mode 100644
index 00000000..6ed7189d
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/CxserverWebService.php
@@ -0,0 +1,82 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support for cxserver api
+ * @ingroup TranslationWebService
+ * @since 2015.02
+ */
+class CxserverWebService extends TranslationWebService {
+ public function getType() {
+ return 'mt';
+ }
+
+ protected function mapCode( $code ) {
+ return $code;
+ }
+
+ protected function doPairs() {
+ if ( !isset( $this->config['host'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'Cxserver host not set' );
+ }
+
+ $pairs = [];
+
+ $url = $this->config['host'] . '/v1/list/mt';
+ $json = Http::get(
+ $url,
+ [ $this->config['timeout'] ],
+ __METHOD__
+ );
+ $response = FormatJson::decode( $json, true );
+
+ if ( !is_array( $response ) ) {
+ $exception = 'Malformed reply from remote server: ' . (string)$json;
+ throw new TranslationWebServiceException( $exception );
+ }
+
+ foreach ( $response['Apertium'] as $source => $targets ) {
+ foreach ( $targets as $target ) {
+ $pairs[$source][$target] = true;
+ }
+ }
+
+ return $pairs;
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ if ( !isset( $this->config['host'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'Cxserver host not set' );
+ }
+
+ $text = trim( $text );
+ $text = $this->wrapUntranslatable( $text );
+ $url = $this->config['host'] . "/v1/mt/$from/$to/Apertium";
+
+ return TranslationQuery::factory( $url )
+ ->timeout( $this->config['timeout'] )
+ ->postWithData( wfArrayToCgi( [ 'html' => $text ] ) );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+ $response = FormatJson::decode( $body );
+ if ( !is_object( $response ) ) {
+ throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
+ }
+
+ $text = $response->contents;
+ if ( preg_match( '~^<div>(.*)</div>$~', $text ) ) {
+ $text = preg_replace( '~^<div>(.*)</div>$~', '\1', $text );
+ }
+ $text = $this->unwrapUntranslatable( $text );
+
+ return trim( $text );
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php b/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php
new file mode 100644
index 00000000..221944d5
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php
@@ -0,0 +1,142 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @author Ulrich Strauss
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support for Microsoft translation api v3.
+ * @see https://docs.microsoft.com/fi-fi/azure/cognitive-services/Translator/reference/v3-0-reference
+ * @ingroup TranslationWebService
+ * @since 2013-01-01
+ */
+class MicrosoftWebService extends TranslationWebService {
+ public function getType() {
+ return 'mt';
+ }
+
+ protected function mapCode( $code ) {
+ $map = [
+ 'tl' => 'fil',
+ 'zh-hant' => 'zh-Hant',
+ 'zh-hans' => 'zh-Hans',
+ 'sr-ec' => 'sr-Cyrl',
+ 'sr-el' => 'sr-Latn',
+ 'pt-br' => 'pt',
+ ];
+
+ return $map[$code] ?? $code;
+ }
+
+ protected function doPairs() {
+ if ( !isset( $this->config['key'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'key is not set' );
+ }
+
+ $key = $this->config['key'];
+
+ $options = [];
+ $options['method'] = 'GET';
+ $options['timeout'] = $this->config['timeout'];
+
+ $url = $this->config['url'] . '/languages?api-version=3.0';
+
+ $req = MWHttpRequest::factory( $url, $options );
+ $req->setHeader( 'Ocp-Apim-Subscription-Key', $key );
+
+ $status = $req->execute();
+ if ( !$status->isOK() ) {
+ $error = $req->getContent();
+ // Most likely a timeout or other general error
+ throw new TranslationWebServiceException(
+ 'Http::get failed:' . serialize( $error ) . serialize( $status )
+ );
+ }
+
+ $json = $req->getContent();
+ $response = json_decode( $json, true );
+ if ( !isset( $response[ 'translation' ] ) ) {
+ throw new TranslationWebServiceException(
+ 'Unable to fetch list of available languages: ' . $json
+ );
+ }
+
+ $languages = array_keys( $response[ 'translation' ] );
+
+ // Let's make a cartesian product, assuming we can translate from any language to any language
+ $pairs = [];
+ foreach ( $languages as $from ) {
+ foreach ( $languages as $to ) {
+ $pairs[$from][$to] = true;
+ }
+ }
+
+ return $pairs;
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ if ( !isset( $this->config['key'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'key is not set' );
+ }
+
+ $key = $this->config['key'];
+ $text = trim( $text );
+ $text = $this->wrapUntranslatable( $text );
+
+ $url = $this->config['url'] . '/translate';
+ $params = [
+ 'api-version' => '3.0',
+ 'from' => $from,
+ 'to' => $to,
+ 'textType' => 'html',
+ ];
+ $headers = [
+ 'Ocp-Apim-Subscription-Key' => $key,
+ 'Content-Type' => 'application/json',
+ ];
+ $body = json_encode( [ [ 'Text' => $text ] ] );
+
+ if ( strlen( $body ) > 5000 ) {
+ throw new TranslationWebServiceInvalidInputException( 'Source text too long' );
+ }
+
+ return TranslationQuery::factory( $url )
+ ->timeout( $this->config['timeout'] )
+ ->queryParameters( $params )
+ ->queryHeaders( $headers )
+ ->postWithData( $body );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+
+ $response = json_decode( $body, true );
+ if ( !isset( $response[ 0 ][ 'translations' ][ 0 ][ 'text' ] ) ) {
+ throw new TranslationWebServiceException(
+ 'Unable to parse translation response: ' . $body
+ );
+ }
+
+ $text = $response[ 0 ][ 'translations' ][ 0 ][ 'text' ];
+ $text = $this->unwrapUntranslatable( $text );
+
+ return $text;
+ }
+
+ /// Override from parent
+ protected function wrapUntranslatable( $text ) {
+ $pattern = '~%[^% ]+%|\$\d|{VAR:[^}]+}|{?{(PLURAL|GRAMMAR|GENDER):[^|]+\||%(\d\$)?[sd]~';
+ $wrap = '<span class="notranslate">\0</span>';
+ return preg_replace( $pattern, $wrap, $text );
+ }
+
+ /// Override from parent
+ protected function unwrapUntranslatable( $text ) {
+ $pattern = '~<span class="notranslate">\s*(.*?)\s*</span>~';
+ return preg_replace( $pattern, '\1', $text );
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/QueryAggregator.php b/www/wiki/extensions/Translate/webservices/QueryAggregator.php
new file mode 100644
index 00000000..6cc6465b
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/QueryAggregator.php
@@ -0,0 +1,89 @@
+<?php
+/**
+ * Web service utility class.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Runs multiple web service queries asynchronously to save time.
+ *
+ * @ingroup TranslationWebService
+ * @since 2015.02
+ */
+class QueryAggregator {
+ protected $queries = [];
+ protected $responses = [];
+ protected $timeout = 0;
+ protected $hasRun = false;
+
+ /**
+ * Register a query to be run.
+ * @param TranslationQuery $query
+ * @return mixed Query id that can be used to fetch results.
+ */
+ public function addQuery( TranslationQuery $query ) {
+ $this->queries[] = $query;
+
+ $this->timeout = max( $query->getTimeout(), $this->timeout );
+ return count( $this->queries ) - 1;
+ }
+
+ /**
+ * Returns a response for a query.
+ * @param mixed $id Query id.
+ * @return TranslationQueryResponse
+ * @throws RuntimeException if called before run() has been called.
+ */
+ public function getResponse( $id ) {
+ if ( !$this->hasRun ) {
+ throw new RuntimeException( 'Tried to get response before queries ran' );
+ }
+
+ return TranslationQueryResponse::newFromMultiHttp(
+ $this->responses[$id],
+ $this->queries[$id]
+ );
+ }
+
+ /**
+ * Runs all the queries.
+ */
+ public function run() {
+ global $wgSitename;
+
+ $version = TRANSLATE_VERSION;
+
+ $http = new MultiHttpClient( [
+ 'reqTimeout' => $this->timeout,
+ 'connTimeout' => 3,
+ 'userAgent' => "MediaWiki Translate extension $version for $wgSitename"
+ ] );
+ $responses = $http->runMulti( $this->getMultiHttpQueries( $this->queries ) );
+ foreach ( $responses as $index => $response ) {
+ $this->responses[$index] = $response;
+ }
+ $this->hasRun = true;
+ }
+
+ /**
+ * Formats queries for format used by MultiHttpClient class.
+ * @param TranslationQuery[] $queries
+ * @return array[]
+ */
+ protected function getMultiHttpQueries( $queries ) {
+ $converter = function ( TranslationQuery $q ) {
+ return [
+ 'url' => $q->getUrl(),
+ 'method' => $q->getMethod(),
+ 'query' => $q->getQueryParameters(),
+ 'body' => $q->getBody(),
+ 'headers' => $q->getHeaders(),
+ ];
+ };
+
+ return array_map( $converter, $queries );
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php b/www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php
new file mode 100644
index 00000000..c5c0e9a5
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php
@@ -0,0 +1,17 @@
+<?php
+/**
+ * Web service utility interface.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Interface for classes that want to use QueryAggregator.
+ * @since 2015.12
+ */
+interface QueryAggregatorAware {
+ public function setQueryAggregator( QueryAggregator $aggregator );
+ public function populateQueries();
+}
diff --git a/www/wiki/extensions/Translate/webservices/RESTBaseWebService.php b/www/wiki/extensions/Translate/webservices/RESTBaseWebService.php
new file mode 100644
index 00000000..2ff80c43
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/RESTBaseWebService.php
@@ -0,0 +1,80 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support for cxserver proxied through RESTBase
+ * @ingroup TranslationWebService
+ * @since 2017.10
+ */
+class RESTBaseWebService extends TranslationWebService {
+ public function getType() {
+ return 'mt';
+ }
+
+ protected function mapCode( $code ) {
+ return $code;
+ }
+
+ protected function doPairs() {
+ if ( !isset( $this->config['host'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'RESTBase host not set' );
+ }
+
+ $pairs = [];
+
+ $url = $this->config['host'] . '/rest_v1/transform/list/tool/mt/';
+ $json = Http::get(
+ $url,
+ [ $this->config['timeout'] ],
+ __METHOD__
+ );
+ $response = FormatJson::decode( $json, true );
+
+ if ( !is_array( $response ) ) {
+ $exception = 'Malformed reply from remote server: ' . $url . ' ' . (string)$json;
+ throw new TranslationWebServiceException( $exception );
+ }
+
+ foreach ( $response['Apertium'] as $source => $targets ) {
+ foreach ( $targets as $target ) {
+ $pairs[$source][$target] = true;
+ }
+ }
+
+ return $pairs;
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ if ( !isset( $this->config['host'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'RESTBase host not set' );
+ }
+
+ $text = trim( $text );
+ $text = $this->wrapUntranslatable( $text );
+ $url = $this->config['host'] . "/rest_v1/transform/html/from/$from/to/$to/Apertium";
+
+ return TranslationQuery::factory( $url )
+ ->timeout( $this->config['timeout'] )
+ ->postWithData( wfArrayToCgi( [ 'html' => $text ] ) );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+
+ $response = FormatJson::decode( $body );
+ if ( !is_object( $response ) ) {
+ throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
+ }
+
+ $text = $response->contents;
+ $text = $this->unwrapUntranslatable( $text );
+
+ return trim( $text );
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php b/www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php
new file mode 100644
index 00000000..c54d319a
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php
@@ -0,0 +1,61 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2010-2013 Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support for ttmserver via MediaWiki API.
+ * @see https://www.mediawiki.org/wiki/Help:Extension:Translate/Translation_memories
+ * @ingroup TranslationWebService
+ * @since 2013-01-01
+ */
+class RemoteTTMServerWebService extends TranslationWebService {
+ public function getType() {
+ return 'ttmserver';
+ }
+
+ protected function mapCode( $code ) {
+ return $code; // Unused
+ }
+
+ protected function doPairs() {
+ return null; // Unused
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ $params = [
+ 'format' => 'json',
+ 'action' => 'ttmserver',
+ 'sourcelanguage' => $from,
+ 'targetlanguage' => $to,
+ 'text' => $text
+ ];
+
+ if ( isset( $this->config['service'] ) ) {
+ $params['service'] = $this->config['service'];
+ }
+
+ return TranslationQuery::factory( $this->config['url'] )
+ ->timeout( $this->config['timeout'] )
+ ->queryParameters( $params );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+ $parsed = FormatJson::decode( $body, true );
+ if ( !is_array( $parsed ) ) {
+ throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
+ }
+
+ if ( !isset( $parsed['ttmserver'] ) ) {
+ throw new TranslationWebServiceException( 'Unexpected reply from remote server' );
+ }
+
+ return $parsed['ttmserver'];
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/TranslationQuery.php b/www/wiki/extensions/Translate/webservices/TranslationQuery.php
new file mode 100644
index 00000000..3e0fbf31
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/TranslationQuery.php
@@ -0,0 +1,105 @@
+<?php
+/**
+ * Contains code related to web services support.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Mutable objects that represents a HTTP(S) query.
+ * NB: Too lazy to make TranslationQueryFactory to make this class immutable.
+ * @since 2015.02
+ */
+class TranslationQuery {
+ protected $url;
+ protected $timeout = 0;
+ protected $method = 'GET';
+ protected $params = [];
+ protected $body;
+ protected $headers = [];
+
+ /**
+ * @var mixed Arbitrary data that is returned with TranslationQueryResponse
+ */
+ protected $instructions;
+
+ // URL is mandatory, so using it here
+ public static function factory( $url ) {
+ $obj = new self();
+ $obj->url = $url;
+ return $obj;
+ }
+
+ /**
+ * Make this a POST request with given data.
+ *
+ * @param string $data
+ * @return $this
+ */
+ public function postWithData( $data ) {
+ $this->method = 'POST';
+ $this->body = $data;
+ return $this;
+ }
+
+ public function queryParameters( array $params ) {
+ $this->params = $params;
+ return $this;
+ }
+
+ public function queryHeaders( array $headers ) {
+ $this->headers = $headers;
+ return $this;
+ }
+
+ public function timeout( $timeout ) {
+ $this->timeout = $timeout;
+ return $this;
+ }
+
+ /**
+ * Attach arbitrary data that is necessary to process the results.
+ * @param mixed $data
+ * @return self
+ * @since 2017.04
+ */
+ public function attachProcessingInstructions( $data ) {
+ $this->instructions = $data;
+ return $this;
+ }
+
+ public function getTimeout() {
+ return $this->timeout;
+ }
+
+ public function getUrl() {
+ return $this->url;
+ }
+
+ public function getMethod() {
+ return $this->method;
+ }
+
+ public function getQueryParameters() {
+ return $this->params;
+ }
+
+ public function getBody() {
+ return $this->body;
+ }
+
+ public function getHeaders() {
+ return $this->headers;
+ }
+
+ /**
+ * Get previously attached result processing instructions.
+ * @return mixed
+ * @since 2017.04
+ */
+ public function getProcessingInstructions() {
+ return $this->instructions;
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php b/www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php
new file mode 100644
index 00000000..a8f9f6dd
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php
@@ -0,0 +1,65 @@
+<?php
+/**
+ * Contains code related to web services support.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Value object that represents a HTTP(S) query response.
+ * @since 2015.02
+ */
+class TranslationQueryResponse {
+ protected $code;
+ protected $reason;
+ protected $headers;
+ protected $body;
+ protected $error;
+
+ /**
+ * @var TranslationQuery
+ */
+ protected $query;
+
+ protected function __construct() {
+ }
+
+ public static function newFromMultiHttp( array $data, TranslationQuery $query ) {
+ $response = $data['response'];
+ $obj = new self();
+ $obj->code = (int)$response['code'];
+ $obj->reason = $response['reason'];
+ $obj->headers = $response['headers'];
+ $obj->body = $response['body'];
+ $obj->error = $response['error'];
+ $obj->query = $query;
+ return $obj;
+ }
+
+ public function getStatusCode() {
+ return $this->code;
+ }
+
+ public function getStatusMessage() {
+ if ( $this->code === 0 ) {
+ return $this->error;
+ } else {
+ return $this->reason;
+ }
+ }
+
+ public function getBody() {
+ return $this->body;
+ }
+
+ /**
+ * Get the TranslationQuery that was made for this request.
+ * @return TranslationQuery
+ * @since 2017.04
+ */
+ public function getQuery() {
+ return $this->query;
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebService.php b/www/wiki/extensions/Translate/webservices/TranslationWebService.php
new file mode 100644
index 00000000..a72be868
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/TranslationWebService.php
@@ -0,0 +1,352 @@
+<?php
+/**
+ * Contains code related to web service support.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+use MediaWiki\Logger\LoggerFactory;
+use Psr\Log\LoggerAwareInterface;
+use Psr\Log\LoggerInterface;
+
+/**
+ * Multipurpose class:
+ * - 1) Interface for web services.
+ * - 2) Source text picking logic.
+ * - 3) Factory class.
+ * - 4) Service failure tracking and suspending.
+ * @since 2013-01-01
+ * @defgroup TranslationWebService Translation Web Services
+ */
+abstract class TranslationWebService implements LoggerAwareInterface {
+ /* Public api */
+
+ /**
+ * Get a webservice handler.
+ *
+ * @see $wgTranslateTranslationServices
+ * @param string $name Name of the service.
+ * @param array $config
+ * @return TranslationWebService|null
+ */
+ public static function factory( $name, $config ) {
+ $handlers = [
+ 'microsoft' => 'MicrosoftWebService',
+ 'apertium' => 'ApertiumWebService',
+ 'yandex' => 'YandexWebService',
+ 'remote-ttmserver' => 'RemoteTTMServerWebService',
+ 'cxserver' => 'CxserverWebService',
+ 'restbase' => 'RESTBaseWebService',
+ 'caighdean' => 'CaighdeanWebService',
+ ];
+
+ if ( !isset( $config['timeout'] ) ) {
+ $config['timeout'] = 3;
+ }
+
+ // Alter local ttmserver instance to appear as remote
+ // to take advantage of the query aggregator. But only
+ // if they are public.
+ if (
+ isset( $config['class'] ) &&
+ $config['class'] === 'ElasticSearchTTMServer' &&
+ isset( $config['public'] ) &&
+ $config['public'] === true
+ ) {
+ $config['type'] = 'remote-ttmserver';
+ $config['service'] = $name;
+ $config['url'] = wfExpandUrl( wfScript( 'api' ), PROTO_CANONICAL );
+ }
+
+ if ( isset( $handlers[$config['type']] ) ) {
+ $class = $handlers[$config['type']];
+
+ $obj = new $class( $name, $config );
+ $obj->setLogger( LoggerFactory::getInstance( 'translationservices' ) );
+ return $obj;
+ }
+
+ return null;
+ }
+
+ /**
+ * Gets the name of this service, for example to display it for the user.
+ *
+ * @return string Plain text name for this service.
+ * @since 2014.02
+ */
+ public function getName() {
+ return $this->service;
+ }
+
+ /**
+ * Get queries for this service. Queries from multiple services can be
+ * collected and run asynchronously with QueryAggregator.
+ *
+ * @param string $text Source text
+ * @param string $from Source language
+ * @param string $to Target language
+ * @return TranslationQuery[]
+ * @since 2015.12
+ * @throws TranslationWebServiceConfigurationException
+ */
+ public function getQueries( $text, $from, $to ) {
+ $from = $this->mapCode( $from );
+ $to = $this->mapCode( $to );
+
+ try {
+ return [ $this->getQuery( $text, $from, $to ) ];
+ } catch ( TranslationWebServiceException $e ) {
+ $this->reportTranslationServiceFailure( $e->getMessage() );
+ return [];
+ } catch ( TranslationWebServiceInvalidInputException $e ) {
+ // Not much we can do about this, just ignore.
+ return [];
+ }
+ }
+
+ /**
+ * Get the web service specific response returned by QueryAggregator.
+ *
+ * @param TranslationQueryResponse $response
+ * @return mixed|null Returns null on error.
+ * @since 2015.12
+ */
+ public function getResultData( TranslationQueryResponse $response ) {
+ if ( $response->getStatusCode() !== 200 ) {
+ $this->reportTranslationServiceFailure(
+ 'STATUS: ' . $response->getStatusMessage() . "\n" .
+ 'BODY: ' . $response->getBody()
+ );
+ return null;
+ }
+
+ try {
+ return $this->parseResponse( $response );
+ } catch ( TranslationWebServiceException $e ) {
+ $this->reportTranslationServiceFailure( $e->getMessage() );
+ return null;
+ }
+ }
+
+ /**
+ * Returns the type of this web service.
+ * @see TranslationAid::getTypes
+ * @return string
+ */
+ abstract public function getType();
+
+ /* Service api */
+
+ /**
+ * Map a MediaWiki (almost standard) language code to the code used by the
+ * translation service.
+ *
+ * @param string $code MediaWiki language code.
+ * @return string Translation service language code.
+ */
+ abstract protected function mapCode( $code );
+
+ /**
+ * Get the list of supported language pairs for the web service. The codes
+ * should be the ones used by the service. Caching is handled by the public
+ * getSupportedLanguagePairs.
+ *
+ * @return array $list[source language][target language] = true
+ * @throws TranslationWebServiceException
+ * @throws TranslationWebServiceConfigurationException
+ */
+ abstract protected function doPairs();
+
+ /**
+ * Get the query. See getQueries for the public method.
+ *
+ * @param string $text Text to translate.
+ * @param string $from Language code of the text, as used by the service.
+ * @param string $to Language code of the translation, as used by the service.
+ * @return TranslationQuery
+ * @since 2015.02
+ * @throws TranslationWebServiceException
+ * @throws TranslationWebServiceConfigurationException
+ * @throws TranslationWebServiceInvalidInputException
+ */
+ abstract protected function getQuery( $text, $from, $to );
+
+ /**
+ * Get the response. See getResultData for the public method.
+ *
+ * @param TranslationQueryResponse $response
+ * @return string
+ * @since 2015.02
+ * @throws TranslationWebServiceException
+ */
+ abstract protected function parseResponse( TranslationQueryResponse $response );
+
+ /* Default implementation */
+
+ /**
+ * @var string Name of this webservice.
+ */
+ protected $service;
+
+ /**
+ * @var array
+ */
+ protected $config;
+
+ /**
+ * @param string $service Name of the webservice
+ * @param array $config
+ */
+ protected function __construct( $service, $config ) {
+ $this->service = $service;
+ $this->config = $config;
+ }
+
+ /**
+ * Test whether given language pair is supported by the service.
+ *
+ * @param string $from Source language
+ * @param string $to Target language
+ * @return bool
+ * @since 2015.12
+ * @throws TranslationWebServiceConfigurationException
+ */
+ public function isSupportedLanguagePair( $from, $to ) {
+ $pairs = $this->getSupportedLanguagePairs();
+ $from = $this->mapCode( $from );
+ $to = $this->mapCode( $to );
+
+ return isset( $pairs[$from][$to] );
+ }
+
+ /**
+ * @see self::doPairs
+ * @return array
+ * @throws TranslationWebServiceConfigurationException
+ */
+ protected function getSupportedLanguagePairs() {
+ $key = wfMemcKey( 'translate-tmsug-pairs-' . $this->service );
+ $pairs = wfGetCache( CACHE_ANYTHING )->get( $key );
+ if ( !is_array( $pairs ) ) {
+ try {
+ $pairs = $this->doPairs();
+ } catch ( Exception $e ) {
+ $this->reportTranslationServiceFailure( $e->getMessage() );
+ return [];
+ }
+ // Cache the result for a day
+ wfGetCache( CACHE_ANYTHING )->set( $key, $pairs, 60 * 60 * 24 );
+ }
+
+ return $pairs;
+ }
+
+ /**
+ * Some mangling that tries to keep some parts of the message unmangled
+ * by the translation service. Most of them support either class=notranslate
+ * or translate=no.
+ * @param string $text
+ * @return string
+ */
+ protected function wrapUntranslatable( $text ) {
+ $text = str_replace( "\n", '!N!', $text );
+ $pattern = '~%[^% ]+%|\$\d|{VAR:[^}]+}|{?{(PLURAL|GRAMMAR|GENDER):[^|]+\||%(\d\$)?[sd]~';
+ $wrap = '<span class="notranslate" translate="no">\0</span>';
+ return preg_replace( $pattern, $wrap, $text );
+ }
+
+ /**
+ * Undo the hopyfully untouched mangling done by wrapUntranslatable.
+ * @param string $text
+ * @return string
+ */
+ protected function unwrapUntranslatable( $text ) {
+ $text = str_replace( '!N!', "\n", $text );
+ $pattern = '~<span class="notranslate" translate="no">(.*?)</span>~';
+ return preg_replace( $pattern, '\1', $text );
+ }
+
+ /* Failure handling and suspending */
+
+ public function setLogger( LoggerInterface $logger ) {
+ $this->logger = $logger;
+ }
+
+ /**
+ * @var int How many failures during failure period need to happen to
+ * consider the service being temporarily off-line.
+ */
+ protected $serviceFailureCount = 5;
+
+ /**
+ * @var int How long after the last detected failure we clear the status and
+ * try again.
+ */
+ protected $serviceFailurePeriod = 900;
+
+ /**
+ * Checks whether the service has exceeded failure count
+ * @return bool
+ */
+ public function checkTranslationServiceFailure() {
+ $service = $this->service;
+ $key = wfMemcKey( "translate-service-$service" );
+ $value = wfGetCache( CACHE_ANYTHING )->get( $key );
+ if ( !is_string( $value ) ) {
+ return false;
+ }
+ list( $count, $failed ) = explode( '|', $value, 2 );
+
+ if ( $failed + ( 2 * $this->serviceFailurePeriod ) < wfTimestamp() ) {
+ if ( $count >= $this->serviceFailureCount ) {
+ $this->logger->warning( "Translation service $service (was) restored" );
+ }
+ wfGetCache( CACHE_ANYTHING )->delete( $key );
+
+ return false;
+ } elseif ( $failed + $this->serviceFailurePeriod < wfTimestamp() ) {
+ /* We are in suspicious mode and one failure is enough to update
+ * failed timestamp. If the service works however, let's use it.
+ * Previous failures are forgotten after another failure period
+ * has passed */
+ return false;
+ }
+
+ // Check the failure count against the limit
+ return $count >= $this->serviceFailureCount;
+ }
+
+ /**
+ * Increases the failure count for this service
+ * @param string $msg
+ */
+ protected function reportTranslationServiceFailure( $msg ) {
+ $service = $this->service;
+ $this->logger->warning( "Translation service $service problem: $msg" );
+
+ $key = wfMemcKey( "translate-service-$service" );
+ $value = wfGetCache( CACHE_ANYTHING )->get( $key );
+ if ( !is_string( $value ) ) {
+ $count = 0;
+ } else {
+ list( $count, ) = explode( '|', $value, 2 );
+ }
+
+ $count++;
+ $failed = wfTimestamp();
+ wfGetCache( CACHE_ANYTHING )->set(
+ $key,
+ "$count|$failed",
+ $this->serviceFailurePeriod * 5
+ );
+
+ if ( $count === $this->serviceFailureCount ) {
+ $this->logger->error( "Translation service $service suspended" );
+ } elseif ( $count > $this->serviceFailureCount ) {
+ $this->logger->warning( "Translation service $service still suspended" );
+ }
+ }
+}
diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php b/www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php
new file mode 100644
index 00000000..509224a4
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php
@@ -0,0 +1,18 @@
+<?php
+/**
+ * Contains code related to web service support.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Used to signal a configuration mistake in an external web service. This is in
+ * contrast to TranslationWebServiceException that signals a failure in the web
+ * service itself.
+ * @since 2017.04
+ * @ingroup TranslationWebService
+ */
+class TranslationWebServiceConfigurationException extends Exception {
+}
diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php b/www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php
new file mode 100644
index 00000000..fa4c1240
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php
@@ -0,0 +1,18 @@
+<?php
+/**
+ * Contains code related to web service support.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2010-2013 Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Used to signal a failure in an external web service. If the web service has
+ * too many failures in a short period, it is suspended to avoid wasting time.
+ * @since 2013-01-01
+ * @ingroup TranslationWebService
+ */
+class TranslationWebServiceException extends MWException {
+}
diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php b/www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php
new file mode 100644
index 00000000..e8ef9d08
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php
@@ -0,0 +1,20 @@
+<?php
+/**
+ * Contains code related to web service support.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Used to signal that the requested input is rejected and cannot be used with
+ * an external web service. This is in contrast to a failure in the web service
+ * itself that is not in our control. Most common case for this is input that is
+ * too long.
+ * service itself.
+ * @since 2017.04
+ * @ingroup TranslationWebService
+ */
+class TranslationWebServiceInvalidInputException extends Exception {
+}
diff --git a/www/wiki/extensions/Translate/webservices/YandexWebService.php b/www/wiki/extensions/Translate/webservices/YandexWebService.php
new file mode 100644
index 00000000..fbb16844
--- /dev/null
+++ b/www/wiki/extensions/Translate/webservices/YandexWebService.php
@@ -0,0 +1,99 @@
+<?php
+/**
+ * Contains a class for querying external translation service.
+ *
+ * @file
+ * @author Niklas Laxström
+ * @license GPL-2.0-or-later
+ */
+
+/**
+ * Implements support for Yandex translation api v1.
+ * @see https://tech.yandex.com/translate/
+ * @ingroup TranslationWebService
+ * @since 2013-01-01
+ */
+class YandexWebService extends TranslationWebService {
+ public function getType() {
+ return 'mt';
+ }
+
+ protected function mapCode( $code ) {
+ if ( $code === 'be-tarask' ) {
+ $code = 'be';
+ }
+ return $code;
+ }
+
+ protected function doPairs() {
+ if ( !isset( $this->config['key'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'API key is not set' );
+ }
+
+ $pairs = [];
+
+ $params = [
+ 'key' => $this->config['key'],
+ ];
+
+ $url = $this->config['pairs'] . '?' . wfArrayToCgi( $params );
+ $json = Http::get(
+ $url,
+ [ 'timeout' => $this->config['timeout'] ],
+ __METHOD__
+ );
+ $response = FormatJson::decode( $json );
+
+ if ( !is_object( $response ) ) {
+ $exception = 'Malformed reply from remote server: ' . (string)$json;
+ throw new TranslationWebServiceException( $exception );
+ }
+
+ foreach ( $response->dirs as $pair ) {
+ list( $source, $target ) = explode( '-', $pair );
+ $pairs[$source][$target] = true;
+ }
+
+ return $pairs;
+ }
+
+ protected function getQuery( $text, $from, $to ) {
+ if ( !isset( $this->config['key'] ) ) {
+ throw new TranslationWebServiceConfigurationException( 'API key is not set' );
+ }
+
+ # https://tech.yandex.com/translate/doc/dg/reference/translate-docpage/
+ if ( strlen( $text ) > 10000 ) {
+ throw new TranslationWebServiceInvalidInputException( 'Source text too long' );
+ }
+
+ $text = trim( $text );
+ $text = $this->wrapUntranslatable( $text );
+
+ return TranslationQuery::factory( $this->config['url'] )
+ ->timeout( $this->config['timeout'] )
+ ->postWithData(
+ [
+ 'key' => $this->config['key'],
+ 'text' => $text,
+ 'lang' => "$from-$to",
+ 'format' => 'html',
+ ]
+ );
+ }
+
+ protected function parseResponse( TranslationQueryResponse $reply ) {
+ $body = $reply->getBody();
+ $response = FormatJson::decode( $body );
+ if ( !is_object( $response ) ) {
+ throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
+ } elseif ( $response->code !== 200 ) {
+ throw new TranslationWebServiceException( $response->message );
+ }
+
+ $text = Sanitizer::decodeCharReferences( $response->text[0] );
+ $text = $this->unwrapUntranslatable( $text );
+
+ return trim( $text );
+ }
+}