diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/Translate/webservices |
first commit
Diffstat (limited to 'www/wiki/extensions/Translate/webservices')
15 files changed, 1322 insertions, 0 deletions
diff --git a/www/wiki/extensions/Translate/webservices/ApertiumWebService.php b/www/wiki/extensions/Translate/webservices/ApertiumWebService.php new file mode 100644 index 00000000..d333621b --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/ApertiumWebService.php @@ -0,0 +1,81 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Implements support Apetrium translator api. + * @see http://wiki.apertium.org/wiki/Apertium_web_service + * @ingroup TranslationWebService + * @since 2013-01-01 + */ +class ApertiumWebService extends TranslationWebService { + public function getType() { + return 'mt'; + } + + protected function mapCode( $code ) { + return str_replace( '-', '_', LanguageCode::bcp47( $code ) ); + } + + protected function doPairs() { + $pairs = []; + $json = Http::get( + $this->config['pairs'], + [ 'timeout' => $this->config['timeout'] ], + __METHOD__ + ); + $response = FormatJson::decode( $json ); + + if ( !is_object( $response ) ) { + $error = 'Malformed reply from remote server: ' . (string)$json; + throw new TranslationWebServiceException( $error ); + } + + foreach ( $response->responseData as $pair ) { + $source = $pair->sourceLanguage; + $target = $pair->targetLanguage; + $pairs[$source][$target] = true; + } + + return $pairs; + } + + protected function getQuery( $text, $from, $to ) { + if ( !isset( $this->config['key'] ) ) { + throw new TranslationWebServiceConfigurationException( 'API key is not set' ); + } + + $text = trim( $text ); + $text = $this->wrapUntranslatable( $text ); + + $params = [ + 'q' => $text, + 'langpair' => "$from|$to", + 'x-application' => 'MediaWiki Translate extension ' . TRANSLATE_VERSION, + ]; + + return TranslationQuery::factory( $this->config['url'] ) + ->timeout( $this->config['timeout'] ) + ->queryParameters( $params ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + $response = FormatJson::decode( $body ); + if ( !is_object( $response ) ) { + throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) ); + } elseif ( $response->responseStatus !== 200 ) { + throw new TranslationWebServiceException( $response->responseDetails ); + } + + $text = Sanitizer::decodeCharReferences( $response->responseData->translatedText ); + $text = $this->unwrapUntranslatable( $text ); + + return trim( $text ); + } +} diff --git a/www/wiki/extensions/Translate/webservices/CaighdeanWebService.php b/www/wiki/extensions/Translate/webservices/CaighdeanWebService.php new file mode 100644 index 00000000..cb472d93 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/CaighdeanWebService.php @@ -0,0 +1,93 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Implements support Caighdean translator api. + * @see https://github.com/kscanne/caighdean/blob/master/API.md + * @ingroup TranslationWebService + * @since 2017.04 + */ +class CaighdeanWebService extends TranslationWebService { + public function getType() { + return 'mt'; + } + + public function mapCode( $code ) { + return $code; + } + + protected function doPairs() { + $pairs = [ + 'gd' => [ 'ga' => true ], + 'gv' => [ 'ga' => true ], + ]; + + return $pairs; + } + + protected function getQuery( $text, $from, $to ) { + if ( !isset( $this->config['url'] ) ) { + throw new TranslationWebServiceConfigurationException( '`url` not set in configuration' ); + } + + $text = trim( $text ); + if ( $text === '' ) { + throw new TranslationWebServiceInvalidInputException( 'Input is empty' ); + } + + $data = wfArrayToCgi( [ + 'foinse' => $from, + 'teacs' => $text, + ] ); + + // Maximum payload is 16 KiB. Based ont testing 16000 bytes is safe by leaving 224 + // bytes for other things. + if ( strlen( $data ) > 16000 ) { + throw new TranslationWebServiceInvalidInputException( 'Input is over 16000 bytes long' ); + } + + return TranslationQuery::factory( $this->config['url'] ) + ->timeout( $this->config['timeout'] ) + ->postWithData( $data ) + ->attachProcessingInstructions( $text ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + $response = FormatJson::decode( $body ); + if ( !is_array( $response ) ) { + throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) ); + } + + $text = ''; + $originalText = $reply->getQuery()->getProcessingInstructions(); + foreach ( $response as list( $sourceToken, $targetToken ) ) { + $separator = ' '; + $pos = strpos( $originalText, $sourceToken ); + // Try to keep the effects local. If we fail to match at token, we could accidentally + // scan very far ahead in the text, find a false match and not find matches for all + // of the tokens in the between. + if ( $pos !== false && $pos < 50 ) { + // Remove the portion of text we have processed. $pos should be zero, unless + // we failed to match something earlier. + $originalText = substr( $originalText, $pos + strlen( $sourceToken ) ); + if ( preg_match( '/^\s+/', $originalText, $match ) ) { + $separator = $match[ 0 ]; + $originalText = substr( $originalText, strlen( $separator ) ); + } else { + $separator = ''; + } + } + + $text .= $targetToken . $separator; + } + + return $text; + } +} diff --git a/www/wiki/extensions/Translate/webservices/CxserverWebService.php b/www/wiki/extensions/Translate/webservices/CxserverWebService.php new file mode 100644 index 00000000..6ed7189d --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/CxserverWebService.php @@ -0,0 +1,82 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Implements support for cxserver api + * @ingroup TranslationWebService + * @since 2015.02 + */ +class CxserverWebService extends TranslationWebService { + public function getType() { + return 'mt'; + } + + protected function mapCode( $code ) { + return $code; + } + + protected function doPairs() { + if ( !isset( $this->config['host'] ) ) { + throw new TranslationWebServiceConfigurationException( 'Cxserver host not set' ); + } + + $pairs = []; + + $url = $this->config['host'] . '/v1/list/mt'; + $json = Http::get( + $url, + [ $this->config['timeout'] ], + __METHOD__ + ); + $response = FormatJson::decode( $json, true ); + + if ( !is_array( $response ) ) { + $exception = 'Malformed reply from remote server: ' . (string)$json; + throw new TranslationWebServiceException( $exception ); + } + + foreach ( $response['Apertium'] as $source => $targets ) { + foreach ( $targets as $target ) { + $pairs[$source][$target] = true; + } + } + + return $pairs; + } + + protected function getQuery( $text, $from, $to ) { + if ( !isset( $this->config['host'] ) ) { + throw new TranslationWebServiceConfigurationException( 'Cxserver host not set' ); + } + + $text = trim( $text ); + $text = $this->wrapUntranslatable( $text ); + $url = $this->config['host'] . "/v1/mt/$from/$to/Apertium"; + + return TranslationQuery::factory( $url ) + ->timeout( $this->config['timeout'] ) + ->postWithData( wfArrayToCgi( [ 'html' => $text ] ) ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + $response = FormatJson::decode( $body ); + if ( !is_object( $response ) ) { + throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) ); + } + + $text = $response->contents; + if ( preg_match( '~^<div>(.*)</div>$~', $text ) ) { + $text = preg_replace( '~^<div>(.*)</div>$~', '\1', $text ); + } + $text = $this->unwrapUntranslatable( $text ); + + return trim( $text ); + } +} diff --git a/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php b/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php new file mode 100644 index 00000000..221944d5 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php @@ -0,0 +1,142 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @author Ulrich Strauss + * @license GPL-2.0-or-later + */ + +/** + * Implements support for Microsoft translation api v3. + * @see https://docs.microsoft.com/fi-fi/azure/cognitive-services/Translator/reference/v3-0-reference + * @ingroup TranslationWebService + * @since 2013-01-01 + */ +class MicrosoftWebService extends TranslationWebService { + public function getType() { + return 'mt'; + } + + protected function mapCode( $code ) { + $map = [ + 'tl' => 'fil', + 'zh-hant' => 'zh-Hant', + 'zh-hans' => 'zh-Hans', + 'sr-ec' => 'sr-Cyrl', + 'sr-el' => 'sr-Latn', + 'pt-br' => 'pt', + ]; + + return $map[$code] ?? $code; + } + + protected function doPairs() { + if ( !isset( $this->config['key'] ) ) { + throw new TranslationWebServiceConfigurationException( 'key is not set' ); + } + + $key = $this->config['key']; + + $options = []; + $options['method'] = 'GET'; + $options['timeout'] = $this->config['timeout']; + + $url = $this->config['url'] . '/languages?api-version=3.0'; + + $req = MWHttpRequest::factory( $url, $options ); + $req->setHeader( 'Ocp-Apim-Subscription-Key', $key ); + + $status = $req->execute(); + if ( !$status->isOK() ) { + $error = $req->getContent(); + // Most likely a timeout or other general error + throw new TranslationWebServiceException( + 'Http::get failed:' . serialize( $error ) . serialize( $status ) + ); + } + + $json = $req->getContent(); + $response = json_decode( $json, true ); + if ( !isset( $response[ 'translation' ] ) ) { + throw new TranslationWebServiceException( + 'Unable to fetch list of available languages: ' . $json + ); + } + + $languages = array_keys( $response[ 'translation' ] ); + + // Let's make a cartesian product, assuming we can translate from any language to any language + $pairs = []; + foreach ( $languages as $from ) { + foreach ( $languages as $to ) { + $pairs[$from][$to] = true; + } + } + + return $pairs; + } + + protected function getQuery( $text, $from, $to ) { + if ( !isset( $this->config['key'] ) ) { + throw new TranslationWebServiceConfigurationException( 'key is not set' ); + } + + $key = $this->config['key']; + $text = trim( $text ); + $text = $this->wrapUntranslatable( $text ); + + $url = $this->config['url'] . '/translate'; + $params = [ + 'api-version' => '3.0', + 'from' => $from, + 'to' => $to, + 'textType' => 'html', + ]; + $headers = [ + 'Ocp-Apim-Subscription-Key' => $key, + 'Content-Type' => 'application/json', + ]; + $body = json_encode( [ [ 'Text' => $text ] ] ); + + if ( strlen( $body ) > 5000 ) { + throw new TranslationWebServiceInvalidInputException( 'Source text too long' ); + } + + return TranslationQuery::factory( $url ) + ->timeout( $this->config['timeout'] ) + ->queryParameters( $params ) + ->queryHeaders( $headers ) + ->postWithData( $body ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + + $response = json_decode( $body, true ); + if ( !isset( $response[ 0 ][ 'translations' ][ 0 ][ 'text' ] ) ) { + throw new TranslationWebServiceException( + 'Unable to parse translation response: ' . $body + ); + } + + $text = $response[ 0 ][ 'translations' ][ 0 ][ 'text' ]; + $text = $this->unwrapUntranslatable( $text ); + + return $text; + } + + /// Override from parent + protected function wrapUntranslatable( $text ) { + $pattern = '~%[^% ]+%|\$\d|{VAR:[^}]+}|{?{(PLURAL|GRAMMAR|GENDER):[^|]+\||%(\d\$)?[sd]~'; + $wrap = '<span class="notranslate">\0</span>'; + return preg_replace( $pattern, $wrap, $text ); + } + + /// Override from parent + protected function unwrapUntranslatable( $text ) { + $pattern = '~<span class="notranslate">\s*(.*?)\s*</span>~'; + return preg_replace( $pattern, '\1', $text ); + } +} diff --git a/www/wiki/extensions/Translate/webservices/QueryAggregator.php b/www/wiki/extensions/Translate/webservices/QueryAggregator.php new file mode 100644 index 00000000..6cc6465b --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/QueryAggregator.php @@ -0,0 +1,89 @@ +<?php +/** + * Web service utility class. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Runs multiple web service queries asynchronously to save time. + * + * @ingroup TranslationWebService + * @since 2015.02 + */ +class QueryAggregator { + protected $queries = []; + protected $responses = []; + protected $timeout = 0; + protected $hasRun = false; + + /** + * Register a query to be run. + * @param TranslationQuery $query + * @return mixed Query id that can be used to fetch results. + */ + public function addQuery( TranslationQuery $query ) { + $this->queries[] = $query; + + $this->timeout = max( $query->getTimeout(), $this->timeout ); + return count( $this->queries ) - 1; + } + + /** + * Returns a response for a query. + * @param mixed $id Query id. + * @return TranslationQueryResponse + * @throws RuntimeException if called before run() has been called. + */ + public function getResponse( $id ) { + if ( !$this->hasRun ) { + throw new RuntimeException( 'Tried to get response before queries ran' ); + } + + return TranslationQueryResponse::newFromMultiHttp( + $this->responses[$id], + $this->queries[$id] + ); + } + + /** + * Runs all the queries. + */ + public function run() { + global $wgSitename; + + $version = TRANSLATE_VERSION; + + $http = new MultiHttpClient( [ + 'reqTimeout' => $this->timeout, + 'connTimeout' => 3, + 'userAgent' => "MediaWiki Translate extension $version for $wgSitename" + ] ); + $responses = $http->runMulti( $this->getMultiHttpQueries( $this->queries ) ); + foreach ( $responses as $index => $response ) { + $this->responses[$index] = $response; + } + $this->hasRun = true; + } + + /** + * Formats queries for format used by MultiHttpClient class. + * @param TranslationQuery[] $queries + * @return array[] + */ + protected function getMultiHttpQueries( $queries ) { + $converter = function ( TranslationQuery $q ) { + return [ + 'url' => $q->getUrl(), + 'method' => $q->getMethod(), + 'query' => $q->getQueryParameters(), + 'body' => $q->getBody(), + 'headers' => $q->getHeaders(), + ]; + }; + + return array_map( $converter, $queries ); + } +} diff --git a/www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php b/www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php new file mode 100644 index 00000000..c5c0e9a5 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/QueryAggregatorAware.php @@ -0,0 +1,17 @@ +<?php +/** + * Web service utility interface. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Interface for classes that want to use QueryAggregator. + * @since 2015.12 + */ +interface QueryAggregatorAware { + public function setQueryAggregator( QueryAggregator $aggregator ); + public function populateQueries(); +} diff --git a/www/wiki/extensions/Translate/webservices/RESTBaseWebService.php b/www/wiki/extensions/Translate/webservices/RESTBaseWebService.php new file mode 100644 index 00000000..2ff80c43 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/RESTBaseWebService.php @@ -0,0 +1,80 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Implements support for cxserver proxied through RESTBase + * @ingroup TranslationWebService + * @since 2017.10 + */ +class RESTBaseWebService extends TranslationWebService { + public function getType() { + return 'mt'; + } + + protected function mapCode( $code ) { + return $code; + } + + protected function doPairs() { + if ( !isset( $this->config['host'] ) ) { + throw new TranslationWebServiceConfigurationException( 'RESTBase host not set' ); + } + + $pairs = []; + + $url = $this->config['host'] . '/rest_v1/transform/list/tool/mt/'; + $json = Http::get( + $url, + [ $this->config['timeout'] ], + __METHOD__ + ); + $response = FormatJson::decode( $json, true ); + + if ( !is_array( $response ) ) { + $exception = 'Malformed reply from remote server: ' . $url . ' ' . (string)$json; + throw new TranslationWebServiceException( $exception ); + } + + foreach ( $response['Apertium'] as $source => $targets ) { + foreach ( $targets as $target ) { + $pairs[$source][$target] = true; + } + } + + return $pairs; + } + + protected function getQuery( $text, $from, $to ) { + if ( !isset( $this->config['host'] ) ) { + throw new TranslationWebServiceConfigurationException( 'RESTBase host not set' ); + } + + $text = trim( $text ); + $text = $this->wrapUntranslatable( $text ); + $url = $this->config['host'] . "/rest_v1/transform/html/from/$from/to/$to/Apertium"; + + return TranslationQuery::factory( $url ) + ->timeout( $this->config['timeout'] ) + ->postWithData( wfArrayToCgi( [ 'html' => $text ] ) ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + + $response = FormatJson::decode( $body ); + if ( !is_object( $response ) ) { + throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) ); + } + + $text = $response->contents; + $text = $this->unwrapUntranslatable( $text ); + + return trim( $text ); + } +} diff --git a/www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php b/www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php new file mode 100644 index 00000000..c54d319a --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/RemoteTTMServerWebService.php @@ -0,0 +1,61 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @copyright Copyright © 2010-2013 Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Implements support for ttmserver via MediaWiki API. + * @see https://www.mediawiki.org/wiki/Help:Extension:Translate/Translation_memories + * @ingroup TranslationWebService + * @since 2013-01-01 + */ +class RemoteTTMServerWebService extends TranslationWebService { + public function getType() { + return 'ttmserver'; + } + + protected function mapCode( $code ) { + return $code; // Unused + } + + protected function doPairs() { + return null; // Unused + } + + protected function getQuery( $text, $from, $to ) { + $params = [ + 'format' => 'json', + 'action' => 'ttmserver', + 'sourcelanguage' => $from, + 'targetlanguage' => $to, + 'text' => $text + ]; + + if ( isset( $this->config['service'] ) ) { + $params['service'] = $this->config['service']; + } + + return TranslationQuery::factory( $this->config['url'] ) + ->timeout( $this->config['timeout'] ) + ->queryParameters( $params ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + $parsed = FormatJson::decode( $body, true ); + if ( !is_array( $parsed ) ) { + throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) ); + } + + if ( !isset( $parsed['ttmserver'] ) ) { + throw new TranslationWebServiceException( 'Unexpected reply from remote server' ); + } + + return $parsed['ttmserver']; + } +} diff --git a/www/wiki/extensions/Translate/webservices/TranslationQuery.php b/www/wiki/extensions/Translate/webservices/TranslationQuery.php new file mode 100644 index 00000000..3e0fbf31 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/TranslationQuery.php @@ -0,0 +1,105 @@ +<?php +/** + * Contains code related to web services support. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Mutable objects that represents a HTTP(S) query. + * NB: Too lazy to make TranslationQueryFactory to make this class immutable. + * @since 2015.02 + */ +class TranslationQuery { + protected $url; + protected $timeout = 0; + protected $method = 'GET'; + protected $params = []; + protected $body; + protected $headers = []; + + /** + * @var mixed Arbitrary data that is returned with TranslationQueryResponse + */ + protected $instructions; + + // URL is mandatory, so using it here + public static function factory( $url ) { + $obj = new self(); + $obj->url = $url; + return $obj; + } + + /** + * Make this a POST request with given data. + * + * @param string $data + * @return $this + */ + public function postWithData( $data ) { + $this->method = 'POST'; + $this->body = $data; + return $this; + } + + public function queryParameters( array $params ) { + $this->params = $params; + return $this; + } + + public function queryHeaders( array $headers ) { + $this->headers = $headers; + return $this; + } + + public function timeout( $timeout ) { + $this->timeout = $timeout; + return $this; + } + + /** + * Attach arbitrary data that is necessary to process the results. + * @param mixed $data + * @return self + * @since 2017.04 + */ + public function attachProcessingInstructions( $data ) { + $this->instructions = $data; + return $this; + } + + public function getTimeout() { + return $this->timeout; + } + + public function getUrl() { + return $this->url; + } + + public function getMethod() { + return $this->method; + } + + public function getQueryParameters() { + return $this->params; + } + + public function getBody() { + return $this->body; + } + + public function getHeaders() { + return $this->headers; + } + + /** + * Get previously attached result processing instructions. + * @return mixed + * @since 2017.04 + */ + public function getProcessingInstructions() { + return $this->instructions; + } +} diff --git a/www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php b/www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php new file mode 100644 index 00000000..a8f9f6dd --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/TranslationQueryResponse.php @@ -0,0 +1,65 @@ +<?php +/** + * Contains code related to web services support. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Value object that represents a HTTP(S) query response. + * @since 2015.02 + */ +class TranslationQueryResponse { + protected $code; + protected $reason; + protected $headers; + protected $body; + protected $error; + + /** + * @var TranslationQuery + */ + protected $query; + + protected function __construct() { + } + + public static function newFromMultiHttp( array $data, TranslationQuery $query ) { + $response = $data['response']; + $obj = new self(); + $obj->code = (int)$response['code']; + $obj->reason = $response['reason']; + $obj->headers = $response['headers']; + $obj->body = $response['body']; + $obj->error = $response['error']; + $obj->query = $query; + return $obj; + } + + public function getStatusCode() { + return $this->code; + } + + public function getStatusMessage() { + if ( $this->code === 0 ) { + return $this->error; + } else { + return $this->reason; + } + } + + public function getBody() { + return $this->body; + } + + /** + * Get the TranslationQuery that was made for this request. + * @return TranslationQuery + * @since 2017.04 + */ + public function getQuery() { + return $this->query; + } +} diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebService.php b/www/wiki/extensions/Translate/webservices/TranslationWebService.php new file mode 100644 index 00000000..a72be868 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/TranslationWebService.php @@ -0,0 +1,352 @@ +<?php +/** + * Contains code related to web service support. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +use MediaWiki\Logger\LoggerFactory; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; + +/** + * Multipurpose class: + * - 1) Interface for web services. + * - 2) Source text picking logic. + * - 3) Factory class. + * - 4) Service failure tracking and suspending. + * @since 2013-01-01 + * @defgroup TranslationWebService Translation Web Services + */ +abstract class TranslationWebService implements LoggerAwareInterface { + /* Public api */ + + /** + * Get a webservice handler. + * + * @see $wgTranslateTranslationServices + * @param string $name Name of the service. + * @param array $config + * @return TranslationWebService|null + */ + public static function factory( $name, $config ) { + $handlers = [ + 'microsoft' => 'MicrosoftWebService', + 'apertium' => 'ApertiumWebService', + 'yandex' => 'YandexWebService', + 'remote-ttmserver' => 'RemoteTTMServerWebService', + 'cxserver' => 'CxserverWebService', + 'restbase' => 'RESTBaseWebService', + 'caighdean' => 'CaighdeanWebService', + ]; + + if ( !isset( $config['timeout'] ) ) { + $config['timeout'] = 3; + } + + // Alter local ttmserver instance to appear as remote + // to take advantage of the query aggregator. But only + // if they are public. + if ( + isset( $config['class'] ) && + $config['class'] === 'ElasticSearchTTMServer' && + isset( $config['public'] ) && + $config['public'] === true + ) { + $config['type'] = 'remote-ttmserver'; + $config['service'] = $name; + $config['url'] = wfExpandUrl( wfScript( 'api' ), PROTO_CANONICAL ); + } + + if ( isset( $handlers[$config['type']] ) ) { + $class = $handlers[$config['type']]; + + $obj = new $class( $name, $config ); + $obj->setLogger( LoggerFactory::getInstance( 'translationservices' ) ); + return $obj; + } + + return null; + } + + /** + * Gets the name of this service, for example to display it for the user. + * + * @return string Plain text name for this service. + * @since 2014.02 + */ + public function getName() { + return $this->service; + } + + /** + * Get queries for this service. Queries from multiple services can be + * collected and run asynchronously with QueryAggregator. + * + * @param string $text Source text + * @param string $from Source language + * @param string $to Target language + * @return TranslationQuery[] + * @since 2015.12 + * @throws TranslationWebServiceConfigurationException + */ + public function getQueries( $text, $from, $to ) { + $from = $this->mapCode( $from ); + $to = $this->mapCode( $to ); + + try { + return [ $this->getQuery( $text, $from, $to ) ]; + } catch ( TranslationWebServiceException $e ) { + $this->reportTranslationServiceFailure( $e->getMessage() ); + return []; + } catch ( TranslationWebServiceInvalidInputException $e ) { + // Not much we can do about this, just ignore. + return []; + } + } + + /** + * Get the web service specific response returned by QueryAggregator. + * + * @param TranslationQueryResponse $response + * @return mixed|null Returns null on error. + * @since 2015.12 + */ + public function getResultData( TranslationQueryResponse $response ) { + if ( $response->getStatusCode() !== 200 ) { + $this->reportTranslationServiceFailure( + 'STATUS: ' . $response->getStatusMessage() . "\n" . + 'BODY: ' . $response->getBody() + ); + return null; + } + + try { + return $this->parseResponse( $response ); + } catch ( TranslationWebServiceException $e ) { + $this->reportTranslationServiceFailure( $e->getMessage() ); + return null; + } + } + + /** + * Returns the type of this web service. + * @see TranslationAid::getTypes + * @return string + */ + abstract public function getType(); + + /* Service api */ + + /** + * Map a MediaWiki (almost standard) language code to the code used by the + * translation service. + * + * @param string $code MediaWiki language code. + * @return string Translation service language code. + */ + abstract protected function mapCode( $code ); + + /** + * Get the list of supported language pairs for the web service. The codes + * should be the ones used by the service. Caching is handled by the public + * getSupportedLanguagePairs. + * + * @return array $list[source language][target language] = true + * @throws TranslationWebServiceException + * @throws TranslationWebServiceConfigurationException + */ + abstract protected function doPairs(); + + /** + * Get the query. See getQueries for the public method. + * + * @param string $text Text to translate. + * @param string $from Language code of the text, as used by the service. + * @param string $to Language code of the translation, as used by the service. + * @return TranslationQuery + * @since 2015.02 + * @throws TranslationWebServiceException + * @throws TranslationWebServiceConfigurationException + * @throws TranslationWebServiceInvalidInputException + */ + abstract protected function getQuery( $text, $from, $to ); + + /** + * Get the response. See getResultData for the public method. + * + * @param TranslationQueryResponse $response + * @return string + * @since 2015.02 + * @throws TranslationWebServiceException + */ + abstract protected function parseResponse( TranslationQueryResponse $response ); + + /* Default implementation */ + + /** + * @var string Name of this webservice. + */ + protected $service; + + /** + * @var array + */ + protected $config; + + /** + * @param string $service Name of the webservice + * @param array $config + */ + protected function __construct( $service, $config ) { + $this->service = $service; + $this->config = $config; + } + + /** + * Test whether given language pair is supported by the service. + * + * @param string $from Source language + * @param string $to Target language + * @return bool + * @since 2015.12 + * @throws TranslationWebServiceConfigurationException + */ + public function isSupportedLanguagePair( $from, $to ) { + $pairs = $this->getSupportedLanguagePairs(); + $from = $this->mapCode( $from ); + $to = $this->mapCode( $to ); + + return isset( $pairs[$from][$to] ); + } + + /** + * @see self::doPairs + * @return array + * @throws TranslationWebServiceConfigurationException + */ + protected function getSupportedLanguagePairs() { + $key = wfMemcKey( 'translate-tmsug-pairs-' . $this->service ); + $pairs = wfGetCache( CACHE_ANYTHING )->get( $key ); + if ( !is_array( $pairs ) ) { + try { + $pairs = $this->doPairs(); + } catch ( Exception $e ) { + $this->reportTranslationServiceFailure( $e->getMessage() ); + return []; + } + // Cache the result for a day + wfGetCache( CACHE_ANYTHING )->set( $key, $pairs, 60 * 60 * 24 ); + } + + return $pairs; + } + + /** + * Some mangling that tries to keep some parts of the message unmangled + * by the translation service. Most of them support either class=notranslate + * or translate=no. + * @param string $text + * @return string + */ + protected function wrapUntranslatable( $text ) { + $text = str_replace( "\n", '!N!', $text ); + $pattern = '~%[^% ]+%|\$\d|{VAR:[^}]+}|{?{(PLURAL|GRAMMAR|GENDER):[^|]+\||%(\d\$)?[sd]~'; + $wrap = '<span class="notranslate" translate="no">\0</span>'; + return preg_replace( $pattern, $wrap, $text ); + } + + /** + * Undo the hopyfully untouched mangling done by wrapUntranslatable. + * @param string $text + * @return string + */ + protected function unwrapUntranslatable( $text ) { + $text = str_replace( '!N!', "\n", $text ); + $pattern = '~<span class="notranslate" translate="no">(.*?)</span>~'; + return preg_replace( $pattern, '\1', $text ); + } + + /* Failure handling and suspending */ + + public function setLogger( LoggerInterface $logger ) { + $this->logger = $logger; + } + + /** + * @var int How many failures during failure period need to happen to + * consider the service being temporarily off-line. + */ + protected $serviceFailureCount = 5; + + /** + * @var int How long after the last detected failure we clear the status and + * try again. + */ + protected $serviceFailurePeriod = 900; + + /** + * Checks whether the service has exceeded failure count + * @return bool + */ + public function checkTranslationServiceFailure() { + $service = $this->service; + $key = wfMemcKey( "translate-service-$service" ); + $value = wfGetCache( CACHE_ANYTHING )->get( $key ); + if ( !is_string( $value ) ) { + return false; + } + list( $count, $failed ) = explode( '|', $value, 2 ); + + if ( $failed + ( 2 * $this->serviceFailurePeriod ) < wfTimestamp() ) { + if ( $count >= $this->serviceFailureCount ) { + $this->logger->warning( "Translation service $service (was) restored" ); + } + wfGetCache( CACHE_ANYTHING )->delete( $key ); + + return false; + } elseif ( $failed + $this->serviceFailurePeriod < wfTimestamp() ) { + /* We are in suspicious mode and one failure is enough to update + * failed timestamp. If the service works however, let's use it. + * Previous failures are forgotten after another failure period + * has passed */ + return false; + } + + // Check the failure count against the limit + return $count >= $this->serviceFailureCount; + } + + /** + * Increases the failure count for this service + * @param string $msg + */ + protected function reportTranslationServiceFailure( $msg ) { + $service = $this->service; + $this->logger->warning( "Translation service $service problem: $msg" ); + + $key = wfMemcKey( "translate-service-$service" ); + $value = wfGetCache( CACHE_ANYTHING )->get( $key ); + if ( !is_string( $value ) ) { + $count = 0; + } else { + list( $count, ) = explode( '|', $value, 2 ); + } + + $count++; + $failed = wfTimestamp(); + wfGetCache( CACHE_ANYTHING )->set( + $key, + "$count|$failed", + $this->serviceFailurePeriod * 5 + ); + + if ( $count === $this->serviceFailureCount ) { + $this->logger->error( "Translation service $service suspended" ); + } elseif ( $count > $this->serviceFailureCount ) { + $this->logger->warning( "Translation service $service still suspended" ); + } + } +} diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php b/www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php new file mode 100644 index 00000000..509224a4 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/TranslationWebServiceConfigurationException.php @@ -0,0 +1,18 @@ +<?php +/** + * Contains code related to web service support. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Used to signal a configuration mistake in an external web service. This is in + * contrast to TranslationWebServiceException that signals a failure in the web + * service itself. + * @since 2017.04 + * @ingroup TranslationWebService + */ +class TranslationWebServiceConfigurationException extends Exception { +} diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php b/www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php new file mode 100644 index 00000000..fa4c1240 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/TranslationWebServiceException.php @@ -0,0 +1,18 @@ +<?php +/** + * Contains code related to web service support. + * + * @file + * @author Niklas Laxström + * @copyright Copyright © 2010-2013 Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Used to signal a failure in an external web service. If the web service has + * too many failures in a short period, it is suspended to avoid wasting time. + * @since 2013-01-01 + * @ingroup TranslationWebService + */ +class TranslationWebServiceException extends MWException { +} diff --git a/www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php b/www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php new file mode 100644 index 00000000..e8ef9d08 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/TranslationWebServiceInvalidInputException.php @@ -0,0 +1,20 @@ +<?php +/** + * Contains code related to web service support. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Used to signal that the requested input is rejected and cannot be used with + * an external web service. This is in contrast to a failure in the web service + * itself that is not in our control. Most common case for this is input that is + * too long. + * service itself. + * @since 2017.04 + * @ingroup TranslationWebService + */ +class TranslationWebServiceInvalidInputException extends Exception { +} diff --git a/www/wiki/extensions/Translate/webservices/YandexWebService.php b/www/wiki/extensions/Translate/webservices/YandexWebService.php new file mode 100644 index 00000000..fbb16844 --- /dev/null +++ b/www/wiki/extensions/Translate/webservices/YandexWebService.php @@ -0,0 +1,99 @@ +<?php +/** + * Contains a class for querying external translation service. + * + * @file + * @author Niklas Laxström + * @license GPL-2.0-or-later + */ + +/** + * Implements support for Yandex translation api v1. + * @see https://tech.yandex.com/translate/ + * @ingroup TranslationWebService + * @since 2013-01-01 + */ +class YandexWebService extends TranslationWebService { + public function getType() { + return 'mt'; + } + + protected function mapCode( $code ) { + if ( $code === 'be-tarask' ) { + $code = 'be'; + } + return $code; + } + + protected function doPairs() { + if ( !isset( $this->config['key'] ) ) { + throw new TranslationWebServiceConfigurationException( 'API key is not set' ); + } + + $pairs = []; + + $params = [ + 'key' => $this->config['key'], + ]; + + $url = $this->config['pairs'] . '?' . wfArrayToCgi( $params ); + $json = Http::get( + $url, + [ 'timeout' => $this->config['timeout'] ], + __METHOD__ + ); + $response = FormatJson::decode( $json ); + + if ( !is_object( $response ) ) { + $exception = 'Malformed reply from remote server: ' . (string)$json; + throw new TranslationWebServiceException( $exception ); + } + + foreach ( $response->dirs as $pair ) { + list( $source, $target ) = explode( '-', $pair ); + $pairs[$source][$target] = true; + } + + return $pairs; + } + + protected function getQuery( $text, $from, $to ) { + if ( !isset( $this->config['key'] ) ) { + throw new TranslationWebServiceConfigurationException( 'API key is not set' ); + } + + # https://tech.yandex.com/translate/doc/dg/reference/translate-docpage/ + if ( strlen( $text ) > 10000 ) { + throw new TranslationWebServiceInvalidInputException( 'Source text too long' ); + } + + $text = trim( $text ); + $text = $this->wrapUntranslatable( $text ); + + return TranslationQuery::factory( $this->config['url'] ) + ->timeout( $this->config['timeout'] ) + ->postWithData( + [ + 'key' => $this->config['key'], + 'text' => $text, + 'lang' => "$from-$to", + 'format' => 'html', + ] + ); + } + + protected function parseResponse( TranslationQueryResponse $reply ) { + $body = $reply->getBody(); + $response = FormatJson::decode( $body ); + if ( !is_object( $response ) ) { + throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) ); + } elseif ( $response->code !== 200 ) { + throw new TranslationWebServiceException( $response->message ); + } + + $text = Sanitizer::decodeCharReferences( $response->text[0] ); + $text = $this->unwrapUntranslatable( $text ); + + return trim( $text ); + } +} |