tokenizer = $tokenizer; } /** * @since 0.1 * * {@inheritDoc} */ public function setOption( $name, $value ) { if ( $this->tokenizer !== null ) { $this->tokenizer->setOption( $name, $value ); } if ( $name === self::REGEX_EXEMPTION ) { $this->patternExemption = $value; } } /** * @since 0.1 * * {@inheritDoc} */ public function isWordTokenizer() { return $this->tokenizer !== null ? $this->tokenizer->isWordTokenizer() : true; } /** * @since 0.1 * * @param string $string * * @return array|false */ public function tokenize( $string ) { if ( $this->tokenizer !== null ) { $string = implode( " ", $this->tokenizer->tokenize( $string ) ); } $pattern = str_replace( $this->patternExemption, '', '_-・,、;:!?.。…◆★◇□■()【】《》〈〉;:“”"〃'`[]{}「」@*\/&#%`^+<=>|~≪≫─$"_\-・,、;:!?.。()[\]{}「」@*\/&#%`^+<=>|~«»$"\s' ); $result = preg_split( '/[' . $pattern . ']+/u', $string, null, PREG_SPLIT_NO_EMPTY ); if ( $result === false ) { $result = array(); } return $result; } }