summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/Scribunto/includes/engines/LuaStandalone/MWServer.lua
blob: a8227bed2a0ec8e52934f7ed266644294e698b65 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
MWServer = {}

--- Create a new MWServer object
function MWServer:new( interpreterId, intSize )
	interpreterId = tonumber( interpreterId )
	if not interpreterId then
		error( "bad argument #1 to 'MWServer:new' (must be a number or convertible to a number)", 2 )
	end
	intSize = tonumber( intSize )
	if intSize ~= 4 and intSize ~= 8 then
		error( "bad argument #2 to 'MWServer:new' (must be 4 or 8)", 2 )
	end

	obj = {
		interpreterId = interpreterId,
		nextChunkId = 1,
		chunks = {},
		xchunks = {},
		protectedFunctions = {},
		protectedEnvironments = {},
		baseEnv = {}
	}
	if intSize == 4 then
		obj.intMax = 2147483648
		obj.intKeyMax = 2147483648
	else
		-- Lua can't represent most larger integers, so they may as well be sent to PHP as floats.
		obj.intMax = 9007199254740992
		obj.intKeyMax = 9223372036854775807
	end
	setmetatable( obj, self )
	self.__index = self

	obj:init()

	return obj
end

--- Initialise a new MWServer object
function MWServer:init()
	self.baseEnv = self:newEnvironment()
	for funcName, func in pairs( self ) do
		if type(func) == 'function' then
			self.protectedFunctions[func] = true
		end
	end
	self.protectedEnvironments[_G] = true
end

--- Serve requests until exit is requested
function MWServer:execute()
	self:dispatch( nil )
	self:debug( 'MWServer:execute: returning' )
end

-- Convert a multiple-return-value or a ... into a count and a table
function MWServer:listToCountAndTable( ... )
	return select( '#', ... ), { ... }
end

--- Call a PHP function
-- Raise an error if the PHP handler requests it. May return any number
-- of values.
--
-- @param id The function ID, specified by a registerLibrary message
-- @param nargs Count of function arguments
-- @param args The function arguments
-- @return The return values from the PHP function
function MWServer:call( id, nargs, args )
	local result = self:dispatch( {
		op = 'call',
		id = id,
		nargs = nargs,
		args = args
	} )
	if result.op == 'return' then
		return unpack( result.values, 1, result.nvalues )
	elseif result.op == 'error' then
		-- Raise an error in the actual user code that called the function
		-- The level is 3 since our immediate caller is a closure
		error( result.value, 3 )
	else
		self:internalError( 'MWServer:call: unexpected result op' )
	end
end

--- Handle a "call" message from PHP. Call the relevant function.
--
-- @param message The message from PHP
-- @return A response message to send back to PHP
function MWServer:handleCall( message )
	if not self.chunks[message.id] then
		return {
			op = 'error',
			value = 'function id ' .. message.id .. ' does not exist'
		}
	end

	local n, result = self:listToCountAndTable( xpcall(
		function ()
			return self.chunks[message.id]( unpack( message.args, 1, message.nargs ) )
		end,
		function ( err )
			return MWServer:attachTrace( err )
		end
	) )

	if result[1] then
		-- table.remove( result, 1 ) renumbers from 2 to #result. But #result
		-- is not necessarily "right" if result contains nils.
		result = { unpack( result, 2, n ) }
		return {
			op = 'return',
			nvalues = n - 1,
			values = result
		}
	else
		if result[2].value and result[2].trace then
			return {
				op = 'error',
				value = result[2].value,
				trace = result[2].trace,
			}
		else
			return {
				op = 'error',
				value = result[2]
			}
		end
	end
end

--- The xpcall() error handler for handleCall(). Modifies the error object
-- to include a structured backtrace
--
-- @param err The error object
-- @return The new error object
function MWServer:attachTrace( err )
	return {
		value = err,
		trace = self:getStructuredTrace( 2 )
	}
end

--- Handle a "loadString" message from PHP.
-- Load the function and return a chunk ID.
--
-- @param message The message from PHP
-- @return A response message to send back to PHP
function MWServer:handleLoadString( message )
	if string.find( message.text, '\27Lua', 1, true ) then
		return {
			op = 'error',
			value = 'cannot load code with a Lua binary chunk marker escape sequence in it'
		}
	end
	local chunk, errorMsg = loadstring( message.text, message.chunkName )
	if chunk then
		setfenv( chunk, self.baseEnv )
		local id = self:addChunk( chunk )
		return {
			op = 'return',
			nvalues = 1,
			values = {id}
		}
	else
		return {
			op = 'error',
			value = errorMsg
		}
	end
end

--- Add a function value to the list of tracked chunks and return its associated ID.
-- Adding a chunk allows it to be referred to in messages from PHP.
--
-- @param chunk The function value
-- @return The chunk ID
function MWServer:addChunk( chunk )
	local id = self.nextChunkId
	self.nextChunkId = id + 1
	self.chunks[id] = chunk
	self.xchunks[chunk] = id
	return id
end

--- Handle a "cleanupChunks" message from PHP.
-- Remove any chunks no longer referenced by PHP code.
--
-- @param message The message from PHP
-- @return A response message to send back to PHP
function MWServer:handleCleanupChunks( message )
	for id, chunk in pairs( self.chunks ) do
		if not message.ids[id] then
			self.chunks[id] = nil
			self.xchunks[chunk] = nil
		end
	end

	return {
		op = 'return',
		nvalues = 0,
		values = {}
	}
end

--- Handle a "registerLibrary" message from PHP.
-- Add the relevant functions to the base environment.
--
-- @param message The message from PHP
-- @return The response message
function MWServer:handleRegisterLibrary( message )
	local startPos = 1
	local component
	if not self.baseEnv[message.name] then
		self.baseEnv[message.name] = {}
	end
	local t = self.baseEnv[message.name]

	for name, id in pairs( message.functions ) do
		t[name] = function( ... )
			return self:call( id, self:listToCountAndTable( ... ) )
		end
		-- Protect the function against setfenv()
		self.protectedFunctions[t[name]] = true
	end

	return {
		op = 'return',
		nvalues = 0,
		values = {}
	}
end

--- Handle a "wrapPhpFunction" message from PHP.
-- Create an anonymous function
--
-- @param message The message from PHP
-- @return The response message
function MWServer:handleWrapPhpFunction( message )
	local id = message.id
	local func = function( ... )
		return self:call( id, self:listToCountAndTable( ... ) )
	end
	-- Protect the function against setfenv()
	self.protectedFunctions[func] = true

	return {
		op = 'return',
		nvalues = 1,
		values = { func }
	}
end

--- Handle a "getStatus" message from PHP
--
-- @param message The request message
-- @return The response message
function MWServer:handleGetStatus( message )
	local nullRet = {
		op = 'return',
		nvalues = 0,
		values = {}
	}
	local file = io.open( '/proc/self/stat' )
	if not file then
		return nullRet
	end
	local s = file:read('*a')
	file:close()
	local t = {}
	for token in string.gmatch(s, '[^ ]+') do
		t[#t + 1] = token
	end
	if #t < 22 then
		return nullRet
	end
	return {
		op = 'return',
		nvalues = 1,
		values = {{
			pid = tonumber(t[1]),
			time = tonumber(t[14]) + tonumber(t[15]) + tonumber(t[16]) + tonumber(t[17]),
			vsize = tonumber(t[23]),
		}}
	}
end

--- The main request/response loop
--
-- Send a request message and return its matching reply message. Handle any
-- intervening requests (i.e. re-entrant calls) by dispatching them to the
-- relevant handler function.
--
-- The request message may optionally be omitted, to listen for request messages
-- without first sending a request of its own. Such a dispatch() call will
-- continue running until termination is requested by PHP. Typically, PHP does
-- this with a SIGTERM signal.
--
-- @param msgToPhp The message to send to PHP. Optional.
-- @return The matching response message
function MWServer:dispatch( msgToPhp )
	if msgToPhp then
		self:sendMessage( msgToPhp, 'call' )
	end
	while true do
		local msgFromPhp = self:receiveMessage()
		local msgToPhp
		local op = msgFromPhp.op
		if op == 'return' or op == 'error' then
			return msgFromPhp
		elseif op == 'call' then
			msgToPhp = self:handleCall( msgFromPhp )
			self:sendMessage( msgToPhp, 'reply' )
		elseif op == 'loadString' then
			msgToPhp = self:handleLoadString( msgFromPhp )
			self:sendMessage( msgToPhp, 'reply' )
		elseif op == 'registerLibrary' then
			msgToPhp = self:handleRegisterLibrary( msgFromPhp )
			self:sendMessage( msgToPhp, 'reply' )
		elseif op == 'wrapPhpFunction' then
			msgToPhp = self:handleWrapPhpFunction( msgFromPhp )
			self:sendMessage( msgToPhp, 'reply' )
		elseif op == 'cleanupChunks' then
			msgToPhp = self:handleCleanupChunks( msgFromPhp )
			self:sendMessage( msgToPhp, 'reply' )
		elseif op == 'getStatus' then
			msgToPhp = self:handleGetStatus( msgFromPhp )
			self:sendMessage( msgToPhp, 'reply' )
		elseif op == 'quit' then
			self:debug( 'MWServer:dispatch: quit message received' )
			os.exit(0)
		elseif op == 'testquit' then
			self:debug( 'MWServer:dispatch: testquit message received' )
			os.exit(42)
		else
			self:internalError( "Invalid message operation" )
		end
	end
end

--- Write a message to the debug output stream.
-- Some day this may be configurable, currently it just unconditionally writes
-- the message to stderr. The PHP host will redirect those errors to /dev/null
-- by default, but it can be configured to send them to a file.
--
-- @param s The message
function MWServer:debug( s )
	if ( type(s) == 'string' ) then
		io.stderr:write( s .. '\n' )
	else
		io.stderr:write( self:serialize( s ) .. '\n' )
	end
end

--- Raise an internal error
-- Write a message to stderr and then exit with a failure status. This should
-- be called for errors which cannot be allowed to be caught with pcall().
--
-- This must be used for protocol errors, or indeed any error from a context
-- where a dispatch() call lies between the error source and a possible pcall()
-- handler. If dispatch() were terminated by a regular error() call, the
-- resulting protocol violation could lead to a deadlock.
--
-- @param msg The error message
function MWServer:internalError( msg )
	io.stderr:write( debug.traceback( msg ) .. '\n' )
	os.exit( 1 )
end

--- Raise an I/O error
-- Helper function for errors from the io and file modules, which may optionally
-- return an informative error message as their second return value.
function MWServer:ioError( header, info )
	if type( info) == 'string' then
		self:internalError( header .. ': ' .. info )
	else
		self:internalError( header )
	end
end

--- Send a message to PHP
-- @param msg The message table
-- @param direction 'call' or 'reply'
function MWServer:sendMessage( msg, direction )
	if not msg.op then
		self:internalError( "MWServer:sendMessage: invalid message", 2 )
	end
	self:debug('TX ==> ' .. msg.op)

	-- If we're making an outgoing call, let errors go to our caller. If we're
	-- replying to a call from PHP, catch serialization errors and return them
	-- to PHP.
	local encMsg;
	if direction == 'reply' then
		local ok
		ok, encMsg = pcall( self.encodeMessage, self, msg )
		if not ok then
			self:debug('Serialization failed: ' .. encMsg)
			self:debug('TX ==> error')
			encMsg = self:encodeMessage( { op = 'error', value = encMsg } )
		end
	else
		encMsg = self:encodeMessage( msg )
	end

	local success, errorMsg = io.stdout:write( encMsg )
	if not success then
		self:ioError( 'Write error', errorMsg )
	end
	io.stdout:flush()
end

--- Wait for a message from PHP and then decode and return it as a table
-- @return The received message
function MWServer:receiveMessage()
	-- Read the header
	local header, errorMsg = io.stdin:read( 16 )
	if header == nil and errorMsg == nil then
		-- End of file on stdin, exit gracefully
		os.exit(0)
	end

	if not header or #header ~= 16 then
		self:ioError( 'Read error', errorMsg )
	end
	local length = self:decodeHeader( header )

	-- Read the body
	local body, errorMsg = io.stdin:read( length )
	if not body then
		self:ioError( 'Read error', errorMsg )
	end
	if #body ~= length then
		self:ioError( 'Read error', errorMsg )
	end

	-- Unserialize it
	msg = self:unserialize( body )
	self:debug('RX <== ' .. msg.op)
	if msg.op == 'error' then
		self:debug( 'Error: ' .. tostring( msg.value ) )
	end
	return msg
end

--- Encode a message for sending to PHP
function MWServer:encodeMessage( message )
	local serialized = self:serialize( message )
	local length = #serialized
	local check = length * 2 - 1
	return string.format( '%08x%08x', length, check ) .. serialized
end

-- Faster to create the table once than for each call to MWServer:serialize()
local serialize_replacements = {
	['\r'] = '\\r',
	['\n'] = '\\n',
	['\\'] = '\\\\',
}

--- Convert a value to a string suitable for passing to PHP's unserialize().
-- Note that the following replacements must be performed before calling
-- unserialize:
--   "\\r" => "\r"
--   "\\n" => "\n"
--   "\\\\" => "\\"
--
-- @param var The value.
function MWServer:serialize( var )
	local done = {}

	local function isInteger( var, max )
		return type(var) == 'number'
			and math.floor( var ) == var
			and var >= -max
			and var < max
	end

	local function recursiveEncode( var, level )
		local t = type( var )
		if t == 'nil' then
			return 'N;'
		elseif t == 'number' then
			if isInteger( var, self.intMax ) then
				return 'i:' .. string.format( '%d', var ) .. ';'
			elseif var < math.huge and var > -math.huge then
				return 'd:' .. string.format( '%.17g', var ) .. ';'
			elseif var == math.huge then
				return 'd:INF;'
			elseif var == -math.huge then
				return 'd:-INF;'
			else
				return 'd:NAN;'
			end
		elseif t == 'string' then
			return 's:' .. string.len( var ) .. ':"' .. var .. '";'
		elseif t == 'boolean' then
			if var then
				return 'b:1;'
			else
				return 'b:0;'
			end
		elseif t == 'table' then
			if done[var] then
				error("Cannot pass circular reference to PHP")
			end
			done[var] = true
			local buf = { '' }
			local numElements = 0
			local seen = {}
			for key, value in pairs(var) do
				local k = key
				local t = type( k )

				-- Convert integers in range to look like standard integers.
				-- Use tostring() for the rest. Reject all other non-strings.
				if isInteger( k, self.intKeyMax ) then
					k = string.format( '%d', k )
				elseif t == 'number' then
					k = tostring( k );
				elseif t ~= 'string' then
					error("Cannot use " .. t .. " as an array key when passing data from Lua to PHP");
				end

				-- Zend PHP doesn't really care whether integer keys are serialized
				-- as ints or strings, it converts them correctly on unserialize.
				-- But HHVM does depend on it, so keep doing it for now.
				local n = nil
				if k == '0' or k:match( '^-?[1-9]%d*$' ) then
					n = tonumber( k )
					if n == -9223372036854775808 and k ~= '-9223372036854775808' then
						-- Bad edge rounding
						n = nil
					end
				end
				if isInteger( n, self.intKeyMax ) then
					buf[#buf + 1] = 'i:' .. k .. ';'
				else
					buf[#buf + 1] = recursiveEncode( k, level + 1 )
				end

				-- Detect collisions, e.g. { [0] = 'foo', ["0"] = 'bar' }
				if seen[k] then
					error( 'Collision for array key ' .. k .. ' when passing data from Lua to PHP' );
				end
				seen[k] = true

				buf[#buf + 1] = recursiveEncode( value, level + 1 )
				numElements = numElements + 1
			end
			buf[1] = 'a:' .. numElements .. ':{'
			buf[#buf + 1] = '}'
			return table.concat(buf)
		elseif t == 'function' then
			local id
			if self.xchunks[var] then
				id = self.xchunks[var]
			else
				id = self:addChunk(var)
			end
			return 'O:42:"Scribunto_LuaStandaloneInterpreterFunction":2:{s:13:"interpreterId";i:' ..
				self.interpreterId .. ';s:2:"id";i:' .. id .. ';}'
		elseif t == 'thread' then
			error("Cannot pass thread to PHP")
		elseif t == 'userdata' then
			error("Cannot pass userdata to PHP")
		else
			error("Cannot pass unrecognised type to PHP")
		end
	end

	return recursiveEncode( var, 0 ):gsub( '[\r\n\\]', serialize_replacements )
end

--- Convert a Lua expression string to its corresponding value.
-- Convert any references of the form chunk[id] to the corresponding function
-- values.
function MWServer:unserialize( text )
	local func = loadstring( 'return ' .. text )
	if not func then
		self:internalError( "MWServer:unserialize: invalid chunk" )
	end
	-- Don't waste JIT cache space by storing every message in it
	if jit then
		jit.off( func )
	end
	setfenv( func, { chunks = self.chunks } )
	return func()
end

--- Decode a message header.
-- @param header The header string
-- @return The body length
function MWServer:decodeHeader( header )
	local length = string.sub( header, 1, 8 )
	local check = string.sub( header, 9, 16 )
	if not string.match( length, '^%x+$' ) or not string.match( check, '^%x+$' ) then
		self:internalError( "Error decoding message header: " .. length .. '/' .. check )
	end
	length = tonumber( length, 16 )
	check = tonumber( check, 16 )
	if length * 2 - 1 ~= check then
		self:internalError( "Error decoding message header" )
	end
	return length
end

--- Get a traceback similar to the one from debug.traceback(), but as a table
-- rather than formatted as a string
--
-- @param The level to start at: 1 for the function that called getStructuredTrace()
-- @return A table with the backtrace information
function MWServer:getStructuredTrace( level )
	level = level + 1
	local trace = {}
	while true do
		local rawInfo = debug.getinfo( level, 'nSl' )
		if rawInfo == nil then
			break
		end
		local info = {}
		for i, key in ipairs({'short_src', 'what', 'currentline', 'name', 'namewhat', 'linedefined'}) do
			info[key] = rawInfo[key]
		end
		if string.match( info['short_src'], '/MWServer.lua$' ) then
			info['short_src'] = 'MWServer.lua'
		end
		if string.match( rawInfo['short_src'], '/mw_main.lua$' ) then
			info['short_src'] = 'mw_main.lua'
		end
		table.insert( trace, info )
		level = level + 1
	end
	return trace
end

--- Create a table to be used as a restricted environment, based on the current
-- global environment.
--
-- @return The environment table
function MWServer:newEnvironment()
	local allowedGlobals = {
		-- base
		"assert",
		"error",
		"getmetatable",
		"ipairs",
		"next",
		"pairs",
		"pcall",
		"rawequal",
		"rawget",
		"rawset",
		"select",
		"setmetatable",
		"tonumber",
		"type",
		"unpack",
		"xpcall",
		"_VERSION",
		-- libs
		"table",
		"math"
	}

	local env = {}
	for i = 1, #allowedGlobals do
		env[allowedGlobals[i]] = mw.clone( _G[allowedGlobals[i]] )
	end

	-- Cloning 'string' doesn't work right, because strings still use the old
	-- 'string' as the metatable. So just copy it.
	env.string = string

	env._G = env
	env.tostring = function( val )
		return self:tostring( val )
	end
	env.string.dump = nil
	env.setfenv, env.getfenv = mw.makeProtectedEnvFuncs(
		self.protectedEnvironments, self.protectedFunctions )
	env.debug = {
		traceback = debug.traceback
	}
	env.os = {
		date = os.date,
		difftime = os.difftime,
		time = os.time,
		clock = os.clock
	}
	return env
end

--- An implementation of tostring() which does not expose pointers.
function MWServer:tostring(val)
	local mt = getmetatable( val )
	if mt and mt.__tostring then
		return mt.__tostring(val)
	end
	local typeName = type(val)
	local nonPointerTypes = {number = true, string = true, boolean = true, ['nil'] = true}
	if nonPointerTypes[typeName] then
		return tostring(val)
	else
		return typeName
	end
end

return MWServer