diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/DataTransfer/includes |
first commit
Diffstat (limited to 'www/wiki/extensions/DataTransfer/includes')
9 files changed, 903 insertions, 0 deletions
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_Hooks.php b/www/wiki/extensions/DataTransfer/includes/DT_Hooks.php new file mode 100644 index 00000000..95999339 --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_Hooks.php @@ -0,0 +1,26 @@ +<?php +/** + * Static functions called by various outside hooks. + * + * @author Yaron Koren + * @ingroup DataTransfer + */ +class DTHooks { + + /** + * Add links to the 'AdminLinks' special page, defined by the Admin Links + * extension + */ + public static function addToAdminLinks( $admin_links_tree ) { + $import_export_section = $admin_links_tree->getSection( wfMessage( 'adminlinks_importexport' )->text() ); + $main_row = $import_export_section->getRow( 'main' ); + $main_row->addItem( ALItem::newFromSpecialPage( 'ViewXML' ) ); + $main_row->addItem( ALItem::newFromSpecialPage( 'ImportXML' ) ); + $main_row->addItem( ALItem::newFromSpecialPage( 'ImportCSV' ) ); + if ( class_exists( 'PHPExcel' )) { + $main_row->addItem( ALItem::newFromSpecialPage( 'ImportSpreadsheet' ) ); + } + return true; + } + +}
\ No newline at end of file diff --git a/www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php b/www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php new file mode 100644 index 00000000..67a31489 --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php @@ -0,0 +1,69 @@ +<?php + +/** + * Background job to import a page into the wiki, for use by Data Transfer + * + * @author Yaron Koren + */ +class DTImportJob extends Job { + + function __construct( $title, $params = '', $id = 0 ) { + parent::__construct( 'dtImport', $title, $params, $id ); + } + + /** + * Run a dtImport job + * @return boolean success + */ + function run() { + if ( is_null( $this->title ) ) { + $this->error = "dtImport: Invalid title"; + return false; + } + + $wikiPage = WikiPage::factory( $this->title ); + if ( !$wikiPage ) { + $this->error = 'dtImport: Wiki page not found "' . $this->title->getPrefixedDBkey() . '"'; + return false; + } + + $for_pages_that_exist = $this->params['for_pages_that_exist']; + if ( $for_pages_that_exist == 'skip' && $this->title->exists() ) { + return true; + } + + // Change global $wgUser variable to the one specified by + // the job only for the extent of this import. + global $wgUser; + $actual_user = $wgUser; + $wgUser = User::newFromId( $this->params['user_id'] ); + $text = $this->params['text']; + if ( $this->title->exists() ) { + if ( $for_pages_that_exist == 'append' ) { + $existingText = ContentHandler::getContentText( $wikiPage->getContent() ); + $text = $existingText . "\n" . $text; + } elseif ( $for_pages_that_exist == 'merge' ) { + $existingPageStructure = DTPageStructure::newFromTitle( $this->title ); + $newPageStructure = new DTPageStructure; + $newPageStructure->parsePageContents( $text ); + $existingPageStructure->mergeInPageStructure( $newPageStructure ); + $text = $existingPageStructure->toWikitext(); + } + // otherwise, $for_pages_that_exist == 'overwrite' + } + $edit_summary = $this->params['edit_summary']; + $new_content = new WikitextContent( $text ); + // It's strange that doEditContent() doesn't + // automatically attach the 'bot' flag when the user + // is a bot... + if ( $wgUser->isAllowed( 'bot' ) ) { + $flags = EDIT_FORCE_BOT; + } else { + $flags = 0; + } + $wikiPage->doEditContent( $new_content, $edit_summary, $flags ); + + $wgUser = $actual_user; + return true; + } +} diff --git a/www/wiki/extensions/DataTransfer/includes/DT_Page.php b/www/wiki/extensions/DataTransfer/includes/DT_Page.php new file mode 100644 index 00000000..eb3c92fc --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_Page.php @@ -0,0 +1,56 @@ +<?php +/** + * Class holding the data of a page to be imported + * + * @author Yaron Koren + */ + +class DTPage { + var $mName; + var $mTemplates; + var $mFreeText; + + public function __construct() { + $this->mTemplates = array(); + } + + function setName( $name ) { + $this->mName = $name; + } + + function getName() { + return $this->mName; + } + + function addTemplateField( $template_name, $field_name, $value ) { + + if ( !array_key_exists( $template_name, $this->mTemplates ) ) { + $this->mTemplates[$template_name] = array(); + } + $this->mTemplates[$template_name][$field_name] = $value; + } + + function setFreeText( $free_text ) { + $this->mFreeText = $free_text; + } + + function createText() { + $text = ""; + foreach ( $this->mTemplates as $template_name => $fields ) { + $fieldsAdded = false; + $text .= '{{' . $template_name; + foreach ( $fields as $field_name => $val ) { + if ( $val != '' ) { + $text .= "\n|$field_name=$val"; + $fieldsAdded = true; + } + } + if ( $fieldsAdded ) { + $text .= "\n"; + } + $text .= '}}' . "\n"; + } + $text .= $this->mFreeText; + return $text; + } +} diff --git a/www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php b/www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php new file mode 100644 index 00000000..24c91719 --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php @@ -0,0 +1,131 @@ +<?php +/** + * Class that represents a single "component" of a page - either a template + * or a piece of free text. + * + * @author Yaron Koren + * @author DataTransfer + */ +class DTPageComponent { + var $mIsTemplate = false; + var $mTemplateName; + static $mUnnamedFieldCounter; + var $mFields; + var $mFreeText; + static $mFreeTextIDCounter = 1; + var $mFreeTextID; + + public static function newTemplate( $templateName ) { + $dtPageComponent = new DTPageComponent(); + $dtPageComponent->mTemplateName = trim( $templateName ); + $dtPageComponent->mIsTemplate = true; + $dtPageComponent->mFields = array(); + self::$mUnnamedFieldCounter = 1; + return $dtPageComponent; + } + public static function newFreeText( $freeText ) { + $dtPageComponent = new DTPageComponent(); + $dtPageComponent->mIsTemplate = false; + $dtPageComponent->mFreeText = $freeText; + $dtPageComponent->mFreeTextID = self::$mFreeTextIDCounter++; + return $dtPageComponent; + } + + public function addNamedField( $fieldName, $fieldValue ) { + $this->mFields[trim( $fieldName )] = trim( $fieldValue ); + } + + public function addUnnamedField( $fieldValue ) { + $fieldName = self::$mUnnamedFieldCounter++; + $this->mFields[$fieldName] = trim( $fieldValue ); + } + + public function toWikitext() { + if ( $this->mIsTemplate ) { + $wikitext = '{{' . $this->mTemplateName; + foreach ( $this->mFields as $fieldName => $fieldValue ) { + if ( is_numeric( $fieldName ) ) { + $wikitext .= '|' . $fieldValue; + } else { + $wikitext .= "\n|$fieldName=$fieldValue"; + } + } + $wikitext .= "\n}}"; + return $wikitext; + } else { + return $this->mFreeText; + } + } + + public function toXML( $isSimplified ) { + global $wgDataTransferViewXMLParseFields; + global $wgDataTransferViewXMLParseFreeText; + global $wgParser, $wgTitle; + + if ( $this->mIsTemplate ) { + global $wgContLang; + $namespace_labels = $wgContLang->getNamespaces(); + $template_label = $namespace_labels[NS_TEMPLATE]; + $field_str = str_replace( ' ', '_', wfMessage( 'dt_xml_field' )->inContentLanguage()->text() ); + $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() ); + + $bodyXML = ''; + foreach ( $this->mFields as $fieldName => $fieldValue ) { + // If this field itself holds template calls, + // get the XML for those calls. + if ( is_array( $fieldValue ) ) { + $fieldValueXML = ''; + foreach ( $fieldValue as $subComponent ) { + $fieldValueXML .= $subComponent->toXML( $isSimplified ); + } + } elseif ( $wgDataTransferViewXMLParseFields ) { + // Avoid table of contents and "edit" links + $fieldValue = $wgParser->parse( "__NOTOC__ __NOEDITSECTION__\n" . $fieldValue, $wgTitle, new ParserOptions() )->getText(); + } + + if ( $isSimplified ) { + if ( is_numeric( $fieldName ) ) { + // add "Field" to the beginning of the file name, since + // XML tags that are simply numbers aren't allowed + $fieldTag = $field_str . '_' . $fieldName; + } else { + $fieldTag = str_replace( ' ', '_', trim( $fieldName ) ); + } + $attrs = null; + } else { + $fieldTag = $field_str; + $attrs = array( $name_str => $fieldName ); + } + if ( is_array( $fieldValue ) ) { + $bodyXML .= Xml::tags( $fieldTag, $attrs, $fieldValueXML ); + } else { + $bodyXML .= Xml::element( $fieldTag, $attrs, $fieldValue ); + } + } + + if ( $isSimplified ) { + $templateName = str_replace( ' ', '_', $this->mTemplateName ); + return Xml::tags( $templateName, null, $bodyXML ); + } else { + return Xml::tags( $template_label, array( $name_str => $this->mTemplateName ), $bodyXML ); + } + } else { + $free_text_str = str_replace( ' ', '_', wfMessage( 'dt_xml_freetext' )->inContentLanguage()->text() ); + if ( $wgDataTransferViewXMLParseFreeText ) { + $freeText = $this->mFreeText; + // Undo the escaping that happened before. + $freeText = str_replace( array( '{', '}' ), array( '{', '}' ), $freeText ); + // Get rid of table of contents. + $mw = MagicWord::get( 'toc' ); + if ( $mw->match( $freeText ) ) { + $freeText = $mw->replace( '', $freeText ); + } + // Avoid "edit" links. + $freeText = $wgParser->parse( "__NOTOC__ __NOEDITSECTION__\n" . $freeText, $wgTitle, new ParserOptions() )->getText(); + } else { + $freeText = $this->mFreeText; + } + return XML::element( $free_text_str, array( 'id' => $this->mFreeTextID ), $freeText ); + } + } +}
\ No newline at end of file diff --git a/www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php b/www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php new file mode 100644 index 00000000..253cd99e --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php @@ -0,0 +1,243 @@ +<?php + +/** + * Class that holds the structure of a single wiki page. It is used for both + * turning wikitext into XML, and vice versa. + * + * @author Yaron Koren + * @ingroup DataTransfer + */ +class DTPageStructure { + var $mPageTitle; + var $mComponents = array(); + + function addComponent( $dtPageComponent ) { + $this->mComponents[] = $dtPageComponent; + DTPageComponent::$mFreeTextIDCounter = 1; + } + + public static function newFromTitle( $pageTitle ) { + $pageStructure = new DTPageStructure(); + $pageStructure->mPageTitle = $pageTitle; + + $wiki_page = WikiPage::factory( $pageTitle ); + $page_contents = ContentHandler::getContentText( $wiki_page->getContent() ); + + $pageStructure->parsePageContents( $page_contents ); + + // Now, go through the field values and see if any of them + // hold template calls - if any of them do, parse the value + // as if it's the full contents of a page, and add the + // resulting "components" to that field. + foreach ( $pageStructure->mComponents as $pageComponent ) { + if ( $pageComponent->mIsTemplate ) { + foreach ( $pageComponent->mFields as $fieldName => $fieldValue ) { + if ( strpos( $fieldValue, '{{' ) !== false ) { + $dummyPageStructure = new DTPageStructure(); + $dummyPageStructure->parsePageContents( $fieldValue ); + $pageComponent->mFields[$fieldName] = $dummyPageStructure->mComponents; + } + } + } + } + return $pageStructure; + } + + /** + * Parses the contents of a wiki page, turning template calls into + * an arracy of DTPageComponent objects. + */ + public function parsePageContents( $page_contents ) { + // escape out variables like "{{PAGENAME}}" + $page_contents = str_replace( '{{PAGENAME}}', '{{PAGENAME}}', $page_contents ); + // escape out parser functions + $page_contents = preg_replace( '/{{(#.+)}}/', '{{$1}}', $page_contents ); + // escape out transclusions, and calls like "DEFAULTSORT" + $page_contents = preg_replace( '/{{(.*:.+)}}/', '{{$1}}', $page_contents ); + // escape out variable names + $page_contents = str_replace( '{{{', '{{{', $page_contents ); + $page_contents = str_replace( '}}}', '}}}', $page_contents ); + // escape out tables + $page_contents = str_replace( '{|', '{|', $page_contents ); + $page_contents = str_replace( '|}', '|}', $page_contents ); + + // traverse the page contents, one character at a time + $uncompleted_curly_brackets = 0; + $free_text = ""; + $template_name = ""; + $field_name = ""; + $field_value = ""; + $field_has_name = false; + for ( $i = 0; $i < strlen( $page_contents ); $i++ ) { + $c = $page_contents[$i]; + if ( $uncompleted_curly_brackets == 0 ) { + if ( $c == "{" || $i == strlen( $page_contents ) - 1 ) { + if ( $i == strlen( $page_contents ) - 1 ) + $free_text .= $c; + $uncompleted_curly_brackets++; + $free_text = trim( $free_text ); + if ( $free_text != "" ) { + $freeTextComponent = DTPageComponent::newFreeText( $free_text ); + $this->addComponent( $freeTextComponent ); + $free_text = ""; + } + } elseif ( $c == "{" ) { + // do nothing + } else { + $free_text .= $c; + } + } elseif ( $uncompleted_curly_brackets == 1 ) { + if ( $c == "{" ) { + $uncompleted_curly_brackets++; + $creating_template_name = true; + } elseif ( $c == "}" ) { + $uncompleted_curly_brackets--; + // is this needed? + // if ($field_name != "") { + // $field_name = ""; + // } + if ( $page_contents[$i - 1] == '}' ) { + $this->addComponent( $curTemplate ); + } + $template_name = ""; + } + } elseif ( $uncompleted_curly_brackets == 2 ) { + if ( $c == "}" ) { + $uncompleted_curly_brackets--; + } + if ( $c == "{" ) { + $uncompleted_curly_brackets++; + $field_value .= $c; + } else { + if ( $creating_template_name ) { + if ( $c == "|" || $c == "}" ) { + $curTemplate = DTPageComponent::newTemplate( $template_name ); + $template_name = str_replace( ' ', '_', trim( $template_name ) ); + $template_name = str_replace( '&', '&', $template_name ); + $creating_template_name = false; + $creating_field_name = true; + $field_id = 1; + } else { + $template_name .= $c; + } + } else { + if ( $c == "|" || $c == "}" ) { + if ( $field_has_name ) { + $curTemplate->addNamedField( $field_name, $field_value ); + $field_value = ""; + $field_has_name = false; + } else { + // "field_name" is actually the value + $curTemplate->addUnnamedField( $field_name ); + } + $creating_field_name = true; + $field_name = ""; + } elseif ( $c == "=" ) { + // handle case of = in value + if ( ! $creating_field_name ) { + $field_value .= $c; + } else { + $creating_field_name = false; + $field_has_name = true; + } + } elseif ( $creating_field_name ) { + $field_name .= $c; + } else { + $field_value .= $c; + } + } + } + } else { // greater than 2 + if ( $c == "}" ) { + $uncompleted_curly_brackets--; + } elseif ( $c == "{" ) { + $uncompleted_curly_brackets++; + } + $field_value .= $c; + } + } + } + + /** + * Helper function for mergeInPageStructure(). + */ + private function getSingleInstanceTemplates() { + $instancesPerTemplate = array(); + foreach ( $this->mComponents as $pageComponent ) { + if ( $pageComponent->mIsTemplate ) { + $templateName = $pageComponent->mTemplateName; + if ( array_key_exists( $templateName, $instancesPerTemplate ) ) { + $instancesPerTemplate[$templateName]++; + } else { + $instancesPerTemplate[$templateName] = 1; + } + } + } + + $singleInstanceTemplates = array(); + foreach ( $instancesPerTemplate as $templateName => $instances ) { + if ( $instances == 1 ) { + $singleInstanceTemplates[] = $templateName; + } + } + return $singleInstanceTemplates; + } + + private function getIndexOfTemplateName( $templateName ) { + foreach ( $this->mComponents as $i => $pageComponent ) { + if ( $pageComponent->mTemplateName == $templateName ) { + return $i; + } + } + return null; + } + + /** + * Used when doing a "merge" in an XML or CSV import. + */ + public function mergeInPageStructure( $secondPageStructure ) { + // If there are any templates that have one instance in both + // pages, replace values for their fields with values from + // the second page. + $singleInstanceTemplatesHere = $this->getSingleInstanceTemplates(); + $singleInstanceTemplatesThere = $secondPageStructure->getSingleInstanceTemplates(); + $singleInstanceTemplatesInBoth = array_intersect( $singleInstanceTemplatesHere, $singleInstanceTemplatesThere ); + foreach ( $secondPageStructure->mComponents as $pageComponent ) { + if ( in_array( $pageComponent->mTemplateName, $singleInstanceTemplatesInBoth ) ) { + $indexOfThisTemplate = $this->getIndexOfTemplateName( $pageComponent->mTemplateName ); + foreach ( $pageComponent->mFields as $fieldName => $fieldValue ) { + $this->mComponents[$indexOfThisTemplate]->mFields[$fieldName] = $fieldValue; + } + } else { + $this->mComponents[] = $pageComponent; + } + } + } + + public function toWikitext() { + $wikitext = ''; + foreach ( $this->mComponents as $pageComponent ) { + $wikitext .= $pageComponent->toWikitext() . "\n"; + } + return trim( $wikitext ); + } + + public function toXML( $isSimplified ) { + $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() ); + $id_str = str_replace( ' ', '_', wfMessage( 'dt_xml_id' )->inContentLanguage()->text() ); + $title_str = str_replace( ' ', '_', wfMessage( 'dt_xml_title' )->inContentLanguage()->text() ); + + $bodyXML = ''; + foreach ( $this->mComponents as $pageComponent ) { + $bodyXML .= $pageComponent->toXML( $isSimplified ); + } + $articleID = $this->mPageTitle->getArticleID(); + $pageName = $this->mPageTitle->getText(); + if ( $isSimplified ) { + return Xml::tags( $page_str, null, Xml::tags( $id_str, null, $articleID ) . Xml::tags( $title_str, null, $pageName ) . $bodyXML ); + } else { + return Xml::tags( $page_str, array( $id_str => $articleID, $title_str => $pageName ), $bodyXML ); + } + } + +} diff --git a/www/wiki/extensions/DataTransfer/includes/DT_Utils.php b/www/wiki/extensions/DataTransfer/includes/DT_Utils.php new file mode 100644 index 00000000..03af8e6a --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_Utils.php @@ -0,0 +1,86 @@ +<?php + +/** + * Utility functions for the Data Transfer extension. + * + * @author Yaron Koren + */ +class DTUtils { + + static function printImportingMessage() { + return "\t" . Xml::element( 'p', null, wfMessage( 'dt_import_importing' )->text() ) . "\n"; + } + + static function printFileSelector( $fileType ) { + $text = "\n\t" . Xml::element( 'p', null, wfMessage( 'dt_import_selectfile', $fileType )->text() ) . "\n"; + $text .= <<<END + <p><input type="file" name="file_name" size="25" /></p> + +END; + $text .= "\t" . '<hr style="margin: 10px 0 10px 0" />' . "\n"; + return $text; + } + + static function printExistingPagesHandling() { + $text = "\t" . Xml::element( 'p', null, wfMessage( 'dt_import_forexisting' )->text() ) . "\n"; + $existingPagesText = "\n\t" . + Xml::element( 'input', + array( + 'type' => 'radio', + 'name' => 'pagesThatExist', + 'value' => 'overwrite', + 'checked' => 'checked' + ) ) . "\n" . + "\t" . wfMessage( 'dt_import_overwriteexisting' )->text() . "<br />" . "\n" . + "\t" . Xml::element( 'input', + array( + 'type' => 'radio', + 'name' => 'pagesThatExist', + 'value' => 'merge', + ) ) . "\n" . + "\t" . wfMessage( 'dt_import_mergeintoexisting' )->text() . "<br />" . "\n\t" . + "\t" . Xml::element( 'input', + array( + 'type' => 'radio', + 'name' => 'pagesThatExist', + 'value' => 'skip', + ) ) . "\n" . + "\t" . wfMessage( 'dt_import_skipexisting' )->text() . "<br />" . "\n" . + "\t" . Xml::element( 'input', + array( + 'type' => 'radio', + 'name' => 'pagesThatExist', + 'value' => 'append', + ) ) . "\n" . + "\t" . wfMessage( 'dt_import_appendtoexisting' )->text() . "<br />" . "\n\t"; + $text .= "\t" . Xml::tags( 'p', null, $existingPagesText ) . "\n"; + $text .= "\t" . '<hr style="margin: 10px 0 10px 0" />' . "\n"; + return $text; + } + + static function printImportSummaryInput( $fileType ) { + $importSummaryText = "\t" . Xml::element( 'input', + array( + 'type' => 'text', + 'id' => 'wpSummary', // ID is necessary for CSS formatting + 'class' => 'mw-summary', + 'name' => 'import_summary', + 'value' => wfMessage( 'dt_import_editsummary', $fileType )->inContentLanguage()->text() + ) + ) . "\n"; + return "\t" . Xml::tags( 'p', null, + wfMessage( 'dt_import_summarydesc' )->text() . "\n" . + $importSummaryText ) . "\n"; + } + + static function printSubmitButton() { + $formSubmitText = Xml::element( 'input', + array( + 'type' => 'submit', + 'name' => 'import_file', + 'value' => wfMessage( 'import-interwiki-submit' )->text() + ) + ); + return "\t" . Xml::tags( 'p', null, $formSubmitText ) . "\n"; + } +} diff --git a/www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php b/www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php new file mode 100644 index 00000000..1bfc848f --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php @@ -0,0 +1,40 @@ +<?php +/** + * Class for representing a wiki page. + * + * @author Yaron Koren + * @ingroup DataTransfer + */ + +class DTWikiPage { + private $mPageName = null; + private $mElements = array(); + + public function __construct( $name ) { + $this->mPageName = $name; + } + + function getName() { + return $this->mPageName; + } + + function addTemplate( $template ) { + $this->mElements[] = $template; + } + + function addFreeText( $free_text ) { + $this->mElements[] = $free_text; + } + + function createText() { + $text = ""; + foreach ( $this->mElements as $elem ) { + if ( $elem instanceof DTWikiTemplate ) { + $text .= $elem->createText(); + } else { + $text .= $elem; + } + } + return $text; + } +}
\ No newline at end of file diff --git a/www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php b/www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php new file mode 100644 index 00000000..9ab2864e --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php @@ -0,0 +1,37 @@ +<?php +/** + * Class for representing a template call within a wiki page. + * + * @author Yaron Koren + * @ingroup DataTransfer + */ + +class DTWikiTemplate { + private $mName = null; + private $mFields = array(); + + public function __construct( $name ) { + $this->mName = $name; + } + + function addField( $name, $value ) { + $this->mFields[$name] = $value; + } + + function createText() { + $multi_line_template = false; + $text = '{{' . $this->mName; + foreach ( $this->mFields as $field_name => $field_val ) { + if ( is_numeric( $field_name ) ) { + $text .= "|$field_val"; + } else { + $text .= "\n|$field_name=$field_val"; + $multi_line_template = true; + } + } + if ( $multi_line_template ) + $text .= "\n"; + $text .= '}}' . "\n"; + return $text; + } +}
\ No newline at end of file diff --git a/www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php b/www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php new file mode 100644 index 00000000..259e6f5b --- /dev/null +++ b/www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php @@ -0,0 +1,215 @@ +<?php +/** + * Class for parsing XML representing wiki pages and their template calls + * + * @author Yaron Koren + * @ingroup DataTransfer + */ + +class DTXMLParser { + var $mDebug = false; + var $mSource = null; + var $mCurFieldName = null; + var $mCurFieldValue = ''; + var $mCurTemplate = null; + var $mCurPage = null; // new DTWikiPage(); + var $mPages = array(); + + function __construct( $source ) { + $this->mSource = $source; + } + + function debug( $text ) { + // print "$text. "; + } + + function throwXMLerror( $text ) { + print htmlspecialchars( $text ); + } + + function doParse() { + $parser = xml_parser_create( "UTF-8" ); + + # case folding violates XML standard, turn it off + xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); + + xml_set_object( $parser, $this ); + xml_set_element_handler( $parser, "in_start", "" ); + + $offset = 0; // for context extraction on error reporting + do { + $chunk = $this->mSource->readChunk(); + if ( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) { + wfDebug( "WikiImporter::doImport encountered XML parsing error\n" ); + // return new WikiXmlError( $parser, wfMessage( 'import-parse-failure' )->escaped(), $chunk, $offset ); + } + $offset += strlen( $chunk ); + } while ( $chunk !== false && !$this->mSource->atEnd() ); + xml_parser_free( $parser ); + } + + function donothing( $parser, $x, $y = "" ) { + # $this->debug( "donothing" ); + } + + + function in_start( $parser, $name, $attribs ) { + // $this->debug( "in_start $name" ); + $pages_str = str_replace( ' ', '_', wfMessage( 'dt_xml_pages' )->inContentLanguage()->text() ); + if ( $name != $pages_str ) { + print( "Expected '$pages_str', got '$name'" ); + } + xml_set_element_handler( $parser, "in_pages", "out_pages" ); + } + + function in_pages( $parser, $name, $attribs ) { + $this->debug( "in_pages $name" ); + $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() ); + if ( $name == $page_str ) { + $title_str = str_replace( ' ', '_', wfMessage( 'dt_xml_title' )->inContentLanguage()->text() ); + if ( array_key_exists( $title_str, $attribs ) ) { + $this->mCurPage = new DTWikiPage( $attribs[$title_str] ); + xml_set_element_handler( $parser, "in_page", "out_page" ); + } else { + $this->throwXMLerror( "'$title_str' attribute missing for page" ); + return; + } + } else { + $this->throwXMLerror( "Expected <$page_str>, got <$name>" ); + } + + return; + } + + function out_pages( $parser, $name ) { + $this->debug( "out_pages $name" ); + xml_set_element_handler( $parser, "donothing", "donothing" ); + } + + function in_category( $parser, $name, $attribs ) { + $this->debug( "in_category $name" ); + $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() ); + if ( $name == $page_str ) { + if ( array_key_exists( $title_str, $attribs ) ) { + $this->mCurPage = new DTWikiPage( $attribs[$title_str] ); + xml_set_element_handler( $parser, "in_page", "out_page" ); + } else { + $this->throwXMLerror( "'$title_str' attribute missing for page" ); + return; + } + } else { + $this->throwXMLerror( "Expected <$page_str>, got <$name>" ); + return; + } + } + + function out_category( $parser, $name ) { + $this->debug( "out_category $name" ); + if ( $name != "category" ) { + $this->throwXMLerror( "Expected </category>, got </$name>" ); + return; + } + xml_set_element_handler( $parser, "donothing", "donothing" ); + } + + function in_page( $parser, $name, $attribs ) { + $this->debug( "in_page $name" ); + $template_str = str_replace( ' ', '_', wfMessage( 'dt_xml_template' )->inContentLanguage()->text() ); + $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() ); + $free_text_str = str_replace( ' ', '_', wfMessage( 'dt_xml_freetext' )->inContentLanguage()->text() ); + if ( $name == $template_str ) { + if ( array_key_exists( $name_str, $attribs ) ) { + $this->mCurTemplate = new DTWikiTemplate( $attribs[$name_str] ); + xml_set_element_handler( $parser, "in_template", "out_template" ); + } else { + $this->throwXMLerror( "'$name_str' attribute missing for template" ); + return; + } + } elseif ( $name == $free_text_str ) { + xml_set_element_handler( $parser, "in_freetext", "out_freetext" ); + xml_set_character_data_handler( $parser, "freetext_value" ); + } else { + $this->throwXMLerror( "Expected <$template_str>, got <$name>" ); + return; + } + } + + function out_page( $parser, $name ) { + $this->debug( "out_page $name" ); + $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() ); + if ( $name != $page_str ) { + $this->throwXMLerror( "Expected </$page_str>, got </$name>" ); + return; + } + $this->mPages[] = $this->mCurPage; + xml_set_element_handler( $parser, "in_pages", "out_pages" ); + } + + function in_template( $parser, $name, $attribs ) { + $this->debug( "in_template $name" ); + $field_str = str_replace( ' ', '_', wfMessage( 'dt_xml_field' )->inContentLanguage()->text() ); + if ( $name == $field_str ) { + $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() ); + if ( array_key_exists( $name_str, $attribs ) ) { + $this->mCurFieldName = $attribs[$name_str]; + // $this->push( $name ); + $this->workRevisionCount = 0; + $this->workSuccessCount = 0; + $this->uploadCount = 0; + $this->uploadSuccessCount = 0; + xml_set_element_handler( $parser, "in_field", "out_field" ); + xml_set_character_data_handler( $parser, "field_value" ); + } else { + $this->throwXMLerror( "'$name_str' attribute missing for field" ); + return; + } + } else { + $this->throwXMLerror( "Expected <$field_str>, got <$name>" ); + return; + } + } + + function out_template( $parser, $name ) { + $this->debug( "out_template $name" ); + $template_str = str_replace( ' ', '_', wfMessage( 'dt_xml_template' )->inContentLanguage()->text() ); + if ( $name != $template_str ) { + $this->throwXMLerror( "Expected </$template_str>, got </$name>" ); + return; + } + $this->mCurPage->addTemplate( $this->mCurTemplate ); + xml_set_element_handler( $parser, "in_page", "out_page" ); + } + + function in_field( $parser, $name, $attribs ) { + // xml_set_element_handler( $parser, "donothing", "donothing" ); + } + + function out_field( $parser, $name ) { + $this->debug( "out_field $name" ); + $field_str = str_replace( ' ', '_', wfMessage( 'dt_xml_field' )->inContentLanguage()->text() ); + if ( $name == $field_str ) { + $this->mCurTemplate->addField( $this->mCurFieldName, $this->mCurFieldValue ); + $this->mCurFieldValue = ''; + } else { + $this->throwXMLerror( "Expected </$field_str>, got </$name>" ); + return; + } + xml_set_element_handler( $parser, "in_template", "out_template" ); + } + + function field_value( $parser, $data ) { + $this->mCurFieldValue .= $data; + } + + function in_freetext( $parser, $name, $attribs ) { + // xml_set_element_handler( $parser, "donothing", "donothing" ); + } + + function out_freetext( $parser, $name ) { + xml_set_element_handler( $parser, "in_page", "out_page" ); + } + + function freetext_value( $parser, $data ) { + $this->mCurPage->addFreeText( $data ); + } +} |