diff options
Diffstat (limited to 'www/wiki/extensions/Translate/webservices/MicrosoftWebService.php')
-rw-r--r-- | www/wiki/extensions/Translate/webservices/MicrosoftWebService.php | 114 |
1 files changed, 76 insertions, 38 deletions
diff --git a/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php b/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php index 0efac0d3..221944d5 100644 --- a/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php +++ b/www/wiki/extensions/Translate/webservices/MicrosoftWebService.php @@ -4,12 +4,13 @@ * * @file * @author Niklas Laxström - * @license GPL-2.0+ + * @author Ulrich Strauss + * @license GPL-2.0-or-later */ /** - * Implements support for Microsoft translation api v2. - * @see http://msdn.microsoft.com/en-us/library/ff512421.aspx + * Implements support for Microsoft translation api v3. + * @see https://docs.microsoft.com/fi-fi/azure/cognitive-services/Translator/reference/v3-0-reference * @ingroup TranslationWebService * @since 2013-01-01 */ @@ -19,50 +20,55 @@ class MicrosoftWebService extends TranslationWebService { } protected function mapCode( $code ) { - $map = array( - 'zh-hant' => 'zh-CHT', - 'zh-hans' => 'zh-CHS', - ); - - return isset( $map[$code] ) ? $map[$code] : $code; + $map = [ + 'tl' => 'fil', + 'zh-hant' => 'zh-Hant', + 'zh-hans' => 'zh-Hans', + 'sr-ec' => 'sr-Cyrl', + 'sr-el' => 'sr-Latn', + 'pt-br' => 'pt', + ]; + + return $map[$code] ?? $code; } protected function doPairs() { if ( !isset( $this->config['key'] ) ) { - throw new TranslationWebServiceException( 'API key is not set' ); + throw new TranslationWebServiceConfigurationException( 'key is not set' ); } - $options = array(); - $options['method'] = 'GET'; - $options['timeout'] = $this->config['timeout']; + $key = $this->config['key']; - $params = array( - 'appId' => $this->config['key'], - ); + $options = []; + $options['method'] = 'GET'; + $options['timeout'] = $this->config['timeout']; - $url = 'http://api.microsofttranslator.com/V2/Http.svc/GetLanguagesForTranslate?'; - $url .= wfArrayToCgi( $params ); + $url = $this->config['url'] . '/languages?api-version=3.0'; $req = MWHttpRequest::factory( $url, $options ); - $status = $req->execute(); + $req->setHeader( 'Ocp-Apim-Subscription-Key', $key ); + $status = $req->execute(); if ( !$status->isOK() ) { $error = $req->getContent(); // Most likely a timeout or other general error - $exception = 'Http request failed:' . serialize( $error ) . serialize( $status ); - throw new TranslationWebServiceException( $exception ); + throw new TranslationWebServiceException( + 'Http::get failed:' . serialize( $error ) . serialize( $status ) + ); } - $xml = simplexml_load_string( $req->getContent() ); - - $languages = array(); - foreach ( $xml->string as $language ) { - $languages[] = (string)$language; + $json = $req->getContent(); + $response = json_decode( $json, true ); + if ( !isset( $response[ 'translation' ] ) ) { + throw new TranslationWebServiceException( + 'Unable to fetch list of available languages: ' . $json + ); } - // Let's make a cartesian product, assuming we can translate from any - // language to any language - $pairs = array(); + $languages = array_keys( $response[ 'translation' ] ); + + // Let's make a cartesian product, assuming we can translate from any language to any language + $pairs = []; foreach ( $languages as $from ) { foreach ( $languages as $to ) { $pairs[$from][$to] = true; @@ -74,31 +80,63 @@ class MicrosoftWebService extends TranslationWebService { protected function getQuery( $text, $from, $to ) { if ( !isset( $this->config['key'] ) ) { - throw new TranslationWebServiceException( 'API key is not set' ); + throw new TranslationWebServiceConfigurationException( 'key is not set' ); } + $key = $this->config['key']; $text = trim( $text ); $text = $this->wrapUntranslatable( $text ); - $params = array( - 'text' => $text, + $url = $this->config['url'] . '/translate'; + $params = [ + 'api-version' => '3.0', 'from' => $from, 'to' => $to, - 'appId' => $this->config['key'], - ); + 'textType' => 'html', + ]; + $headers = [ + 'Ocp-Apim-Subscription-Key' => $key, + 'Content-Type' => 'application/json', + ]; + $body = json_encode( [ [ 'Text' => $text ] ] ); + + if ( strlen( $body ) > 5000 ) { + throw new TranslationWebServiceInvalidInputException( 'Source text too long' ); + } - return TranslationQuery::factory( $this->config['url'] ) + return TranslationQuery::factory( $url ) ->timeout( $this->config['timeout'] ) - ->queryParamaters( $params ); + ->queryParameters( $params ) + ->queryHeaders( $headers ) + ->postWithData( $body ); } protected function parseResponse( TranslationQueryResponse $reply ) { $body = $reply->getBody(); - $text = preg_replace( '~<string.*>(.*)</string>~', '\\1', $body ); - $text = Sanitizer::decodeCharReferences( $text ); + $response = json_decode( $body, true ); + if ( !isset( $response[ 0 ][ 'translations' ][ 0 ][ 'text' ] ) ) { + throw new TranslationWebServiceException( + 'Unable to parse translation response: ' . $body + ); + } + + $text = $response[ 0 ][ 'translations' ][ 0 ][ 'text' ]; $text = $this->unwrapUntranslatable( $text ); return $text; } + + /// Override from parent + protected function wrapUntranslatable( $text ) { + $pattern = '~%[^% ]+%|\$\d|{VAR:[^}]+}|{?{(PLURAL|GRAMMAR|GENDER):[^|]+\||%(\d\$)?[sd]~'; + $wrap = '<span class="notranslate">\0</span>'; + return preg_replace( $pattern, $wrap, $text ); + } + + /// Override from parent + protected function unwrapUntranslatable( $text ) { + $pattern = '~<span class="notranslate">\s*(.*?)\s*</span>~'; + return preg_replace( $pattern, '\1', $text ); + } } |