1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
<?php
namespace SMW;
/**
* @license GNU GPL v2+
* @since 2.1
*
* @author mwjames
*/
class Encoder {
/**
* @see SMWInfolink::encodeParameters
*
* Escape certain problematic values. Use SMW-escape
* (like URLencode but - instead of % to prevent double encoding by later MW actions)
* : SMW's parameter separator, must not occur within params
* // - : used in SMW-encoding strings, needs escaping too
* [ ] < > < > '' |: problematic in MW titles
* & : sometimes problematic in MW titles ([[&]] is OK, [[&test]] is OK, [[&test;]] is not OK)
* (Note: '&' in strings obtained during parsing already has &entities; replaced by
* UTF8 anyway)
* ' ': are equivalent with '_' in MW titles, but are not equivalent in certain parameter values
* "\n": real breaks not possible in [[...]]
* "#": has special meaning in URLs, triggers additional MW escapes (using . for %)
* '%': must be escaped to prevent any impact of double decoding when replacing -
* by % before urldecode
* '?': if not escaped, strange effects were observed on some sites (printout and other
* parameters ignored without obvious cause); SMW-escaping is always save to do -- it just
* make URLs less readable
*
* @since 2.2
*
* @param string $string
*
* @return string
*/
public static function escape( $string ) {
$value = str_replace(
[ '-', '#', "\n", ' ', '/', '[', ']', '<', '>', '<', '>', '&', '\'\'', '|', '&', '%', '?', '$', "\\", ";", '_' ],
[ '-2D', '-23', '-0A', '-20', '-2F', '-5B', '-5D', '-3C', '-3E', '-3C', '-3E', '-26', '-27-27', '-7C', '-26', '-25', '-3F', '-24', '-5C', "-3B", '-5F' ],
$string
);
return $value;
}
/**
* Reverse of self::escape
*
* @since 2.5
*
* @param $string
*
* @return string
*/
public static function unescape( $string ) {
$value = str_replace(
[ '-20', '-23', '-0A', '-2F', '-5B', '-5D', '-3C', '-3E', '-3C', '-3E', '-26', '-27-27', '-7C', '-26', '-25', '-3F', '-24', '-5C', "-3B", "-3A", '-5F', '-2D' ],
[ ' ', '#', "\n", '/', '[', ']', '<', '>', '<', '>', '&', '\'\'', '|', '&', '%', '?', '$', "\\", ";", ":", "_", '-' ],
$string
);
return $value;
}
/**
* @see SMWInfolink::encodeParameters
*
* @since 2.2
*
* @param string $string
*
* @return string
*/
public static function encode( $string ) {
return rawurlencode( $string );
}
/**
* @since 2.1
*
* @param string $string
*
* @return string
*/
public static function decode( $string ) {
// Apply decoding for SMW's own url encoding strategy (see SMWInfolink)
$string = str_replace( '%', '-', rawurldecode( str_replace( '-', '%', $string ) ) );
$string = str_replace( [ '-2D', '-3A' ], [ '-', ':' ], $string );
// Sanitize remaining string content
$string = trim( htmlspecialchars( $string, ENT_NOQUOTES ) );
$string = str_replace( ' ', ' ', str_replace( [ ' ', '&' ], [ ' ', '&' ], $string ) );
return $string;
}
}
|