diff options
author | Yaco <franco@reevo.org> | 2021-10-19 20:30:39 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2021-10-19 20:30:39 -0300 |
commit | 8c201ace3699b4928daf41eb7b4cdcb4565c6f3b (patch) | |
tree | fbd98f026864e9c1919d3ee740b6799ca0c651e2 /www/wiki/extensions/Scribunto/includes/engines/LuaCommon/lualib/ustring/make-tables.php | |
parent | e3880a1c86acaa3bbd05786ad2f5c586e6511a58 (diff) |
adds Scribunto
Diffstat (limited to 'www/wiki/extensions/Scribunto/includes/engines/LuaCommon/lualib/ustring/make-tables.php')
-rwxr-xr-x | www/wiki/extensions/Scribunto/includes/engines/LuaCommon/lualib/ustring/make-tables.php | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/www/wiki/extensions/Scribunto/includes/engines/LuaCommon/lualib/ustring/make-tables.php b/www/wiki/extensions/Scribunto/includes/engines/LuaCommon/lualib/ustring/make-tables.php new file mode 100755 index 00000000..feeb5fa1 --- /dev/null +++ b/www/wiki/extensions/Scribunto/includes/engines/LuaCommon/lualib/ustring/make-tables.php @@ -0,0 +1,154 @@ +#!/usr/bin/php +<?php + +if ( PHP_SAPI !== 'cli' && PHP_SAPI !== 'phpdbg' ) { + die( "This script may only be executed from the command line.\n" ); +} + +$chars = []; +for ( $i = 0; $i <= 0x10ffff; $i++ ) { + if ( $i < 0xd800 || $i > 0xdfff ) { // Skip UTF-16 surrogates + $chars[$i] = mb_convert_encoding( pack( 'N', $i ), 'UTF-8', 'UTF-32BE' ); + } +} + +### Uppercase and Lowercase mappings +echo "Creating upper and lower tables...\n"; +$L = fopen( __DIR__ . '/lower.lua', 'w' ); +if ( !$L ) { + die( "Failed to open lower.lua\n" ); +} +$U = fopen( __DIR__ . '/upper.lua', 'w' ); +if ( !$U ) { + die( "Failed to open upper.lua\n" ); +} +fprintf( $L, "-- This file is automatically generated by make-tables.php\n" ); +fprintf( $L, "return {\n" ); +fprintf( $U, "-- This file is automatically generated by make-tables.php\n" ); +fprintf( $U, "return {\n" ); +foreach ( $chars as $i => $c ) { + $l = mb_strtolower( $c, 'UTF-8' ); + $u = mb_strtoupper( $c, 'UTF-8' ); + if ( $c !== $l ) { + fprintf( $L, "\t[\"%s\"] = \"%s\",\n", $c, $l ); + } + if ( $c !== $u ) { + fprintf( $U, "\t[\"%s\"] = \"%s\",\n", $c, $u ); + } +} +fprintf( $L, "}\n" ); +fprintf( $U, "}\n" ); +fclose( $L ); +fclose( $U ); + +### Pattern code mappings +echo "Creating charsets table...\n"; +$fh = fopen( __DIR__ . '/charsets.lua', 'w' ); +if ( !$fh ) { + die( "Failed to open charsets.lua\n" ); +} +$pats = [ + // These should match the expressions in UstringLibrary::patternToRegex() + 'a' => [ '\p{L}', 'lu' ], + 'c' => [ '\p{Cc}', null ], + 'd' => [ '\p{Nd}', null ], + 'l' => [ '\p{Ll}', null ], + 'p' => [ '\p{P}', null ], + 's' => [ '\p{Xps}', null ], + 'u' => [ '\p{Lu}', null ], + 'w' => [ null, 'da' ], # '[\p{L}\p{Nd}]' exactly matches 'a' + 'd' + 'x' => [ '[0-9A-Fa-f0-9A-Fa-f]', null ], + 'z' => [ '\0', null ], +]; + +$ranges = []; +// @codingStandardsIgnoreLine MediaWiki.NamingConventions.PrefixedGlobalFunctions +function addRange( $k, $start, $end ) { + // @codingStandardsIgnoreLine MediaWiki.NamingConventions.ValidGlobalName + global $fh, $ranges; + // Speed/memory tradeoff + if ( !( $start >= 0x20 && $start < 0x7f ) && $end - $start >= 10 ) { + $ranges[$k][] = sprintf( "c >= 0x%06x and c < 0x%06x", $start, $end ); + } else { + for ( $i = $start; $i < $end; $i++ ) { + fprintf( $fh, "\t\t[0x%06x] = 1,\n", $i ); + } + } +} + +fprintf( $fh, "-- This file is automatically generated by make-tables.php\n" ); +fprintf( $fh, "local pats = {\n" ); +foreach ( $pats as $k => $pp ) { + $ranges[$k] = []; + $re = $pp[0]; + if ( !$re ) { + fprintf( $fh, "\t[0x%02x] = {},\n", ord( $k ) ); + continue; + } + + $re2 = 'fail'; + if ( $pp[1] ) { + $re2 = []; + foreach ( str_split( $pp[1] ) as $p ) { + $re2[] = $pats[$p][0]; + } + $re2 = implode( '|', $re2 ); + } + + fprintf( $fh, "\t[0x%02x] = {\n", ord( $k ) ); + $rstart = null; + foreach ( $chars as $i => $c ) { + if ( preg_match( "/^$re$/u", $c ) && !preg_match( "/^$re2$/u", $c ) ) { + if ( $rstart === null ) { + $rstart = $i; + } + } else { + if ( $rstart !== null ) { + addRange( $k, $rstart, $i ); + $rstart = null; + } + } + } + if ( $rstart !== null ) { + addRange( $k, $rstart, 0x110000 ); + } + fprintf( $fh, "\t},\n" ); +} +foreach ( $pats as $k => $pp ) { + $kk = strtoupper( $k ); + fprintf( $fh, "\t[0x%02x] = {},\n", ord( $kk ) ); +} +fprintf( $fh, "}\n" ); +foreach ( $pats as $k => $pp ) { + $body = ''; + $check = []; + if ( $pp[1] ) { + foreach ( str_split( $pp[1] ) as $p ) { + $check[] = sprintf( "pats[0x%02x][k]", ord( $p ) ); + } + } + if ( $ranges[$k] ) { + $body = "\tlocal c = tonumber( k ) or 0/0;\n"; + $check = array_merge( $check, $ranges[$k] ); + } + if ( $check ) { + $body .= "\treturn " . implode( " or\n\t\t", $check ); + fprintf( $fh, "setmetatable( pats[0x%02x], { __index = function ( t, k )\n%s\nend } )\n", + ord( $k ), $body ); + } +} +foreach ( $pats as $k => $pp ) { + fprintf( $fh, "setmetatable( pats[0x%02x], { ", ord( strtoupper( $k ) ) ); + fprintf( $fh, "__index = function ( t, k ) return k and not pats[0x%02x][k] end", ord( $k ) ); + fprintf( $fh, " } )\n" ); +} +fprintf( $fh, "\n-- For speed, cache printable ASCII characters in main tables\n" ); +fprintf( $fh, "for k, t in pairs( pats ) do\n" ); +fprintf( $fh, "\tif k >= 0x61 then\n" ); +fprintf( $fh, "\t\tfor i = 0x20, 0x7e do\n" ); +fprintf( $fh, "\t\t\tt[i] = t[i] or false\n" ); +fprintf( $fh, "\t\tend\n" ); +fprintf( $fh, "\tend\n" ); +fprintf( $fh, "end\n" ); +fprintf( $fh, "\nreturn pats\n" ); +fclose( $fh ); |