summaryrefslogtreecommitdiff
path: root/www/wiki/extensions/DataTransfer/includes
diff options
context:
space:
mode:
authorYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
committerYaco <franco@reevo.org>2020-06-04 11:01:00 -0300
commitfc7369835258467bf97eb64f184b93691f9a9fd5 (patch)
treedaabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/DataTransfer/includes
first commit
Diffstat (limited to 'www/wiki/extensions/DataTransfer/includes')
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_Hooks.php26
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php69
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_Page.php56
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php131
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php243
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_Utils.php86
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php40
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php37
-rw-r--r--www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php215
9 files changed, 903 insertions, 0 deletions
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_Hooks.php b/www/wiki/extensions/DataTransfer/includes/DT_Hooks.php
new file mode 100644
index 00000000..95999339
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_Hooks.php
@@ -0,0 +1,26 @@
+<?php
+/**
+ * Static functions called by various outside hooks.
+ *
+ * @author Yaron Koren
+ * @ingroup DataTransfer
+ */
+class DTHooks {
+
+ /**
+ * Add links to the 'AdminLinks' special page, defined by the Admin Links
+ * extension
+ */
+ public static function addToAdminLinks( $admin_links_tree ) {
+ $import_export_section = $admin_links_tree->getSection( wfMessage( 'adminlinks_importexport' )->text() );
+ $main_row = $import_export_section->getRow( 'main' );
+ $main_row->addItem( ALItem::newFromSpecialPage( 'ViewXML' ) );
+ $main_row->addItem( ALItem::newFromSpecialPage( 'ImportXML' ) );
+ $main_row->addItem( ALItem::newFromSpecialPage( 'ImportCSV' ) );
+ if ( class_exists( 'PHPExcel' )) {
+ $main_row->addItem( ALItem::newFromSpecialPage( 'ImportSpreadsheet' ) );
+ }
+ return true;
+ }
+
+} \ No newline at end of file
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php b/www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php
new file mode 100644
index 00000000..67a31489
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_ImportJob.php
@@ -0,0 +1,69 @@
+<?php
+
+/**
+ * Background job to import a page into the wiki, for use by Data Transfer
+ *
+ * @author Yaron Koren
+ */
+class DTImportJob extends Job {
+
+ function __construct( $title, $params = '', $id = 0 ) {
+ parent::__construct( 'dtImport', $title, $params, $id );
+ }
+
+ /**
+ * Run a dtImport job
+ * @return boolean success
+ */
+ function run() {
+ if ( is_null( $this->title ) ) {
+ $this->error = "dtImport: Invalid title";
+ return false;
+ }
+
+ $wikiPage = WikiPage::factory( $this->title );
+ if ( !$wikiPage ) {
+ $this->error = 'dtImport: Wiki page not found "' . $this->title->getPrefixedDBkey() . '"';
+ return false;
+ }
+
+ $for_pages_that_exist = $this->params['for_pages_that_exist'];
+ if ( $for_pages_that_exist == 'skip' && $this->title->exists() ) {
+ return true;
+ }
+
+ // Change global $wgUser variable to the one specified by
+ // the job only for the extent of this import.
+ global $wgUser;
+ $actual_user = $wgUser;
+ $wgUser = User::newFromId( $this->params['user_id'] );
+ $text = $this->params['text'];
+ if ( $this->title->exists() ) {
+ if ( $for_pages_that_exist == 'append' ) {
+ $existingText = ContentHandler::getContentText( $wikiPage->getContent() );
+ $text = $existingText . "\n" . $text;
+ } elseif ( $for_pages_that_exist == 'merge' ) {
+ $existingPageStructure = DTPageStructure::newFromTitle( $this->title );
+ $newPageStructure = new DTPageStructure;
+ $newPageStructure->parsePageContents( $text );
+ $existingPageStructure->mergeInPageStructure( $newPageStructure );
+ $text = $existingPageStructure->toWikitext();
+ }
+ // otherwise, $for_pages_that_exist == 'overwrite'
+ }
+ $edit_summary = $this->params['edit_summary'];
+ $new_content = new WikitextContent( $text );
+ // It's strange that doEditContent() doesn't
+ // automatically attach the 'bot' flag when the user
+ // is a bot...
+ if ( $wgUser->isAllowed( 'bot' ) ) {
+ $flags = EDIT_FORCE_BOT;
+ } else {
+ $flags = 0;
+ }
+ $wikiPage->doEditContent( $new_content, $edit_summary, $flags );
+
+ $wgUser = $actual_user;
+ return true;
+ }
+}
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_Page.php b/www/wiki/extensions/DataTransfer/includes/DT_Page.php
new file mode 100644
index 00000000..eb3c92fc
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_Page.php
@@ -0,0 +1,56 @@
+<?php
+/**
+ * Class holding the data of a page to be imported
+ *
+ * @author Yaron Koren
+ */
+
+class DTPage {
+ var $mName;
+ var $mTemplates;
+ var $mFreeText;
+
+ public function __construct() {
+ $this->mTemplates = array();
+ }
+
+ function setName( $name ) {
+ $this->mName = $name;
+ }
+
+ function getName() {
+ return $this->mName;
+ }
+
+ function addTemplateField( $template_name, $field_name, $value ) {
+
+ if ( !array_key_exists( $template_name, $this->mTemplates ) ) {
+ $this->mTemplates[$template_name] = array();
+ }
+ $this->mTemplates[$template_name][$field_name] = $value;
+ }
+
+ function setFreeText( $free_text ) {
+ $this->mFreeText = $free_text;
+ }
+
+ function createText() {
+ $text = "";
+ foreach ( $this->mTemplates as $template_name => $fields ) {
+ $fieldsAdded = false;
+ $text .= '{{' . $template_name;
+ foreach ( $fields as $field_name => $val ) {
+ if ( $val != '' ) {
+ $text .= "\n|$field_name=$val";
+ $fieldsAdded = true;
+ }
+ }
+ if ( $fieldsAdded ) {
+ $text .= "\n";
+ }
+ $text .= '}}' . "\n";
+ }
+ $text .= $this->mFreeText;
+ return $text;
+ }
+}
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php b/www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php
new file mode 100644
index 00000000..24c91719
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_PageComponent.php
@@ -0,0 +1,131 @@
+<?php
+/**
+ * Class that represents a single "component" of a page - either a template
+ * or a piece of free text.
+ *
+ * @author Yaron Koren
+ * @author DataTransfer
+ */
+class DTPageComponent {
+ var $mIsTemplate = false;
+ var $mTemplateName;
+ static $mUnnamedFieldCounter;
+ var $mFields;
+ var $mFreeText;
+ static $mFreeTextIDCounter = 1;
+ var $mFreeTextID;
+
+ public static function newTemplate( $templateName ) {
+ $dtPageComponent = new DTPageComponent();
+ $dtPageComponent->mTemplateName = trim( $templateName );
+ $dtPageComponent->mIsTemplate = true;
+ $dtPageComponent->mFields = array();
+ self::$mUnnamedFieldCounter = 1;
+ return $dtPageComponent;
+ }
+ public static function newFreeText( $freeText ) {
+ $dtPageComponent = new DTPageComponent();
+ $dtPageComponent->mIsTemplate = false;
+ $dtPageComponent->mFreeText = $freeText;
+ $dtPageComponent->mFreeTextID = self::$mFreeTextIDCounter++;
+ return $dtPageComponent;
+ }
+
+ public function addNamedField( $fieldName, $fieldValue ) {
+ $this->mFields[trim( $fieldName )] = trim( $fieldValue );
+ }
+
+ public function addUnnamedField( $fieldValue ) {
+ $fieldName = self::$mUnnamedFieldCounter++;
+ $this->mFields[$fieldName] = trim( $fieldValue );
+ }
+
+ public function toWikitext() {
+ if ( $this->mIsTemplate ) {
+ $wikitext = '{{' . $this->mTemplateName;
+ foreach ( $this->mFields as $fieldName => $fieldValue ) {
+ if ( is_numeric( $fieldName ) ) {
+ $wikitext .= '|' . $fieldValue;
+ } else {
+ $wikitext .= "\n|$fieldName=$fieldValue";
+ }
+ }
+ $wikitext .= "\n}}";
+ return $wikitext;
+ } else {
+ return $this->mFreeText;
+ }
+ }
+
+ public function toXML( $isSimplified ) {
+ global $wgDataTransferViewXMLParseFields;
+ global $wgDataTransferViewXMLParseFreeText;
+ global $wgParser, $wgTitle;
+
+ if ( $this->mIsTemplate ) {
+ global $wgContLang;
+ $namespace_labels = $wgContLang->getNamespaces();
+ $template_label = $namespace_labels[NS_TEMPLATE];
+ $field_str = str_replace( ' ', '_', wfMessage( 'dt_xml_field' )->inContentLanguage()->text() );
+ $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() );
+
+ $bodyXML = '';
+ foreach ( $this->mFields as $fieldName => $fieldValue ) {
+ // If this field itself holds template calls,
+ // get the XML for those calls.
+ if ( is_array( $fieldValue ) ) {
+ $fieldValueXML = '';
+ foreach ( $fieldValue as $subComponent ) {
+ $fieldValueXML .= $subComponent->toXML( $isSimplified );
+ }
+ } elseif ( $wgDataTransferViewXMLParseFields ) {
+ // Avoid table of contents and "edit" links
+ $fieldValue = $wgParser->parse( "__NOTOC__ __NOEDITSECTION__\n" . $fieldValue, $wgTitle, new ParserOptions() )->getText();
+ }
+
+ if ( $isSimplified ) {
+ if ( is_numeric( $fieldName ) ) {
+ // add "Field" to the beginning of the file name, since
+ // XML tags that are simply numbers aren't allowed
+ $fieldTag = $field_str . '_' . $fieldName;
+ } else {
+ $fieldTag = str_replace( ' ', '_', trim( $fieldName ) );
+ }
+ $attrs = null;
+ } else {
+ $fieldTag = $field_str;
+ $attrs = array( $name_str => $fieldName );
+ }
+ if ( is_array( $fieldValue ) ) {
+ $bodyXML .= Xml::tags( $fieldTag, $attrs, $fieldValueXML );
+ } else {
+ $bodyXML .= Xml::element( $fieldTag, $attrs, $fieldValue );
+ }
+ }
+
+ if ( $isSimplified ) {
+ $templateName = str_replace( ' ', '_', $this->mTemplateName );
+ return Xml::tags( $templateName, null, $bodyXML );
+ } else {
+ return Xml::tags( $template_label, array( $name_str => $this->mTemplateName ), $bodyXML );
+ }
+ } else {
+ $free_text_str = str_replace( ' ', '_', wfMessage( 'dt_xml_freetext' )->inContentLanguage()->text() );
+ if ( $wgDataTransferViewXMLParseFreeText ) {
+ $freeText = $this->mFreeText;
+ // Undo the escaping that happened before.
+ $freeText = str_replace( array( '&#123;', '&#125;' ), array( '{', '}' ), $freeText );
+ // Get rid of table of contents.
+ $mw = MagicWord::get( 'toc' );
+ if ( $mw->match( $freeText ) ) {
+ $freeText = $mw->replace( '', $freeText );
+ }
+ // Avoid "edit" links.
+ $freeText = $wgParser->parse( "__NOTOC__ __NOEDITSECTION__\n" . $freeText, $wgTitle, new ParserOptions() )->getText();
+ } else {
+ $freeText = $this->mFreeText;
+ }
+ return XML::element( $free_text_str, array( 'id' => $this->mFreeTextID ), $freeText );
+ }
+ }
+} \ No newline at end of file
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php b/www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php
new file mode 100644
index 00000000..253cd99e
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_PageStructure.php
@@ -0,0 +1,243 @@
+<?php
+
+/**
+ * Class that holds the structure of a single wiki page. It is used for both
+ * turning wikitext into XML, and vice versa.
+ *
+ * @author Yaron Koren
+ * @ingroup DataTransfer
+ */
+class DTPageStructure {
+ var $mPageTitle;
+ var $mComponents = array();
+
+ function addComponent( $dtPageComponent ) {
+ $this->mComponents[] = $dtPageComponent;
+ DTPageComponent::$mFreeTextIDCounter = 1;
+ }
+
+ public static function newFromTitle( $pageTitle ) {
+ $pageStructure = new DTPageStructure();
+ $pageStructure->mPageTitle = $pageTitle;
+
+ $wiki_page = WikiPage::factory( $pageTitle );
+ $page_contents = ContentHandler::getContentText( $wiki_page->getContent() );
+
+ $pageStructure->parsePageContents( $page_contents );
+
+ // Now, go through the field values and see if any of them
+ // hold template calls - if any of them do, parse the value
+ // as if it's the full contents of a page, and add the
+ // resulting "components" to that field.
+ foreach ( $pageStructure->mComponents as $pageComponent ) {
+ if ( $pageComponent->mIsTemplate ) {
+ foreach ( $pageComponent->mFields as $fieldName => $fieldValue ) {
+ if ( strpos( $fieldValue, '{{' ) !== false ) {
+ $dummyPageStructure = new DTPageStructure();
+ $dummyPageStructure->parsePageContents( $fieldValue );
+ $pageComponent->mFields[$fieldName] = $dummyPageStructure->mComponents;
+ }
+ }
+ }
+ }
+ return $pageStructure;
+ }
+
+ /**
+ * Parses the contents of a wiki page, turning template calls into
+ * an arracy of DTPageComponent objects.
+ */
+ public function parsePageContents( $page_contents ) {
+ // escape out variables like "{{PAGENAME}}"
+ $page_contents = str_replace( '{{PAGENAME}}', '&#123;&#123;PAGENAME&#125;&#125;', $page_contents );
+ // escape out parser functions
+ $page_contents = preg_replace( '/{{(#.+)}}/', '&#123;&#123;$1&#125;&#125;', $page_contents );
+ // escape out transclusions, and calls like "DEFAULTSORT"
+ $page_contents = preg_replace( '/{{(.*:.+)}}/', '&#123;&#123;$1&#125;&#125;', $page_contents );
+ // escape out variable names
+ $page_contents = str_replace( '{{{', '&#123;&#123;&#123;', $page_contents );
+ $page_contents = str_replace( '}}}', '&#125;&#125;&#125;', $page_contents );
+ // escape out tables
+ $page_contents = str_replace( '{|', '&#123;|', $page_contents );
+ $page_contents = str_replace( '|}', '|&#125;', $page_contents );
+
+ // traverse the page contents, one character at a time
+ $uncompleted_curly_brackets = 0;
+ $free_text = "";
+ $template_name = "";
+ $field_name = "";
+ $field_value = "";
+ $field_has_name = false;
+ for ( $i = 0; $i < strlen( $page_contents ); $i++ ) {
+ $c = $page_contents[$i];
+ if ( $uncompleted_curly_brackets == 0 ) {
+ if ( $c == "{" || $i == strlen( $page_contents ) - 1 ) {
+ if ( $i == strlen( $page_contents ) - 1 )
+ $free_text .= $c;
+ $uncompleted_curly_brackets++;
+ $free_text = trim( $free_text );
+ if ( $free_text != "" ) {
+ $freeTextComponent = DTPageComponent::newFreeText( $free_text );
+ $this->addComponent( $freeTextComponent );
+ $free_text = "";
+ }
+ } elseif ( $c == "{" ) {
+ // do nothing
+ } else {
+ $free_text .= $c;
+ }
+ } elseif ( $uncompleted_curly_brackets == 1 ) {
+ if ( $c == "{" ) {
+ $uncompleted_curly_brackets++;
+ $creating_template_name = true;
+ } elseif ( $c == "}" ) {
+ $uncompleted_curly_brackets--;
+ // is this needed?
+ // if ($field_name != "") {
+ // $field_name = "";
+ // }
+ if ( $page_contents[$i - 1] == '}' ) {
+ $this->addComponent( $curTemplate );
+ }
+ $template_name = "";
+ }
+ } elseif ( $uncompleted_curly_brackets == 2 ) {
+ if ( $c == "}" ) {
+ $uncompleted_curly_brackets--;
+ }
+ if ( $c == "{" ) {
+ $uncompleted_curly_brackets++;
+ $field_value .= $c;
+ } else {
+ if ( $creating_template_name ) {
+ if ( $c == "|" || $c == "}" ) {
+ $curTemplate = DTPageComponent::newTemplate( $template_name );
+ $template_name = str_replace( ' ', '_', trim( $template_name ) );
+ $template_name = str_replace( '&', '&amp;', $template_name );
+ $creating_template_name = false;
+ $creating_field_name = true;
+ $field_id = 1;
+ } else {
+ $template_name .= $c;
+ }
+ } else {
+ if ( $c == "|" || $c == "}" ) {
+ if ( $field_has_name ) {
+ $curTemplate->addNamedField( $field_name, $field_value );
+ $field_value = "";
+ $field_has_name = false;
+ } else {
+ // "field_name" is actually the value
+ $curTemplate->addUnnamedField( $field_name );
+ }
+ $creating_field_name = true;
+ $field_name = "";
+ } elseif ( $c == "=" ) {
+ // handle case of = in value
+ if ( ! $creating_field_name ) {
+ $field_value .= $c;
+ } else {
+ $creating_field_name = false;
+ $field_has_name = true;
+ }
+ } elseif ( $creating_field_name ) {
+ $field_name .= $c;
+ } else {
+ $field_value .= $c;
+ }
+ }
+ }
+ } else { // greater than 2
+ if ( $c == "}" ) {
+ $uncompleted_curly_brackets--;
+ } elseif ( $c == "{" ) {
+ $uncompleted_curly_brackets++;
+ }
+ $field_value .= $c;
+ }
+ }
+ }
+
+ /**
+ * Helper function for mergeInPageStructure().
+ */
+ private function getSingleInstanceTemplates() {
+ $instancesPerTemplate = array();
+ foreach ( $this->mComponents as $pageComponent ) {
+ if ( $pageComponent->mIsTemplate ) {
+ $templateName = $pageComponent->mTemplateName;
+ if ( array_key_exists( $templateName, $instancesPerTemplate ) ) {
+ $instancesPerTemplate[$templateName]++;
+ } else {
+ $instancesPerTemplate[$templateName] = 1;
+ }
+ }
+ }
+
+ $singleInstanceTemplates = array();
+ foreach ( $instancesPerTemplate as $templateName => $instances ) {
+ if ( $instances == 1 ) {
+ $singleInstanceTemplates[] = $templateName;
+ }
+ }
+ return $singleInstanceTemplates;
+ }
+
+ private function getIndexOfTemplateName( $templateName ) {
+ foreach ( $this->mComponents as $i => $pageComponent ) {
+ if ( $pageComponent->mTemplateName == $templateName ) {
+ return $i;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Used when doing a "merge" in an XML or CSV import.
+ */
+ public function mergeInPageStructure( $secondPageStructure ) {
+ // If there are any templates that have one instance in both
+ // pages, replace values for their fields with values from
+ // the second page.
+ $singleInstanceTemplatesHere = $this->getSingleInstanceTemplates();
+ $singleInstanceTemplatesThere = $secondPageStructure->getSingleInstanceTemplates();
+ $singleInstanceTemplatesInBoth = array_intersect( $singleInstanceTemplatesHere, $singleInstanceTemplatesThere );
+ foreach ( $secondPageStructure->mComponents as $pageComponent ) {
+ if ( in_array( $pageComponent->mTemplateName, $singleInstanceTemplatesInBoth ) ) {
+ $indexOfThisTemplate = $this->getIndexOfTemplateName( $pageComponent->mTemplateName );
+ foreach ( $pageComponent->mFields as $fieldName => $fieldValue ) {
+ $this->mComponents[$indexOfThisTemplate]->mFields[$fieldName] = $fieldValue;
+ }
+ } else {
+ $this->mComponents[] = $pageComponent;
+ }
+ }
+ }
+
+ public function toWikitext() {
+ $wikitext = '';
+ foreach ( $this->mComponents as $pageComponent ) {
+ $wikitext .= $pageComponent->toWikitext() . "\n";
+ }
+ return trim( $wikitext );
+ }
+
+ public function toXML( $isSimplified ) {
+ $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() );
+ $id_str = str_replace( ' ', '_', wfMessage( 'dt_xml_id' )->inContentLanguage()->text() );
+ $title_str = str_replace( ' ', '_', wfMessage( 'dt_xml_title' )->inContentLanguage()->text() );
+
+ $bodyXML = '';
+ foreach ( $this->mComponents as $pageComponent ) {
+ $bodyXML .= $pageComponent->toXML( $isSimplified );
+ }
+ $articleID = $this->mPageTitle->getArticleID();
+ $pageName = $this->mPageTitle->getText();
+ if ( $isSimplified ) {
+ return Xml::tags( $page_str, null, Xml::tags( $id_str, null, $articleID ) . Xml::tags( $title_str, null, $pageName ) . $bodyXML );
+ } else {
+ return Xml::tags( $page_str, array( $id_str => $articleID, $title_str => $pageName ), $bodyXML );
+ }
+ }
+
+}
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_Utils.php b/www/wiki/extensions/DataTransfer/includes/DT_Utils.php
new file mode 100644
index 00000000..03af8e6a
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_Utils.php
@@ -0,0 +1,86 @@
+<?php
+
+/**
+ * Utility functions for the Data Transfer extension.
+ *
+ * @author Yaron Koren
+ */
+class DTUtils {
+
+ static function printImportingMessage() {
+ return "\t" . Xml::element( 'p', null, wfMessage( 'dt_import_importing' )->text() ) . "\n";
+ }
+
+ static function printFileSelector( $fileType ) {
+ $text = "\n\t" . Xml::element( 'p', null, wfMessage( 'dt_import_selectfile', $fileType )->text() ) . "\n";
+ $text .= <<<END
+ <p><input type="file" name="file_name" size="25" /></p>
+
+END;
+ $text .= "\t" . '<hr style="margin: 10px 0 10px 0" />' . "\n";
+ return $text;
+ }
+
+ static function printExistingPagesHandling() {
+ $text = "\t" . Xml::element( 'p', null, wfMessage( 'dt_import_forexisting' )->text() ) . "\n";
+ $existingPagesText = "\n\t" .
+ Xml::element( 'input',
+ array(
+ 'type' => 'radio',
+ 'name' => 'pagesThatExist',
+ 'value' => 'overwrite',
+ 'checked' => 'checked'
+ ) ) . "\n" .
+ "\t" . wfMessage( 'dt_import_overwriteexisting' )->text() . "<br />" . "\n" .
+ "\t" . Xml::element( 'input',
+ array(
+ 'type' => 'radio',
+ 'name' => 'pagesThatExist',
+ 'value' => 'merge',
+ ) ) . "\n" .
+ "\t" . wfMessage( 'dt_import_mergeintoexisting' )->text() . "<br />" . "\n\t" .
+ "\t" . Xml::element( 'input',
+ array(
+ 'type' => 'radio',
+ 'name' => 'pagesThatExist',
+ 'value' => 'skip',
+ ) ) . "\n" .
+ "\t" . wfMessage( 'dt_import_skipexisting' )->text() . "<br />" . "\n" .
+ "\t" . Xml::element( 'input',
+ array(
+ 'type' => 'radio',
+ 'name' => 'pagesThatExist',
+ 'value' => 'append',
+ ) ) . "\n" .
+ "\t" . wfMessage( 'dt_import_appendtoexisting' )->text() . "<br />" . "\n\t";
+ $text .= "\t" . Xml::tags( 'p', null, $existingPagesText ) . "\n";
+ $text .= "\t" . '<hr style="margin: 10px 0 10px 0" />' . "\n";
+ return $text;
+ }
+
+ static function printImportSummaryInput( $fileType ) {
+ $importSummaryText = "\t" . Xml::element( 'input',
+ array(
+ 'type' => 'text',
+ 'id' => 'wpSummary', // ID is necessary for CSS formatting
+ 'class' => 'mw-summary',
+ 'name' => 'import_summary',
+ 'value' => wfMessage( 'dt_import_editsummary', $fileType )->inContentLanguage()->text()
+ )
+ ) . "\n";
+ return "\t" . Xml::tags( 'p', null,
+ wfMessage( 'dt_import_summarydesc' )->text() . "\n" .
+ $importSummaryText ) . "\n";
+ }
+
+ static function printSubmitButton() {
+ $formSubmitText = Xml::element( 'input',
+ array(
+ 'type' => 'submit',
+ 'name' => 'import_file',
+ 'value' => wfMessage( 'import-interwiki-submit' )->text()
+ )
+ );
+ return "\t" . Xml::tags( 'p', null, $formSubmitText ) . "\n";
+ }
+}
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php b/www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php
new file mode 100644
index 00000000..1bfc848f
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_WikiPage.php
@@ -0,0 +1,40 @@
+<?php
+/**
+ * Class for representing a wiki page.
+ *
+ * @author Yaron Koren
+ * @ingroup DataTransfer
+ */
+
+class DTWikiPage {
+ private $mPageName = null;
+ private $mElements = array();
+
+ public function __construct( $name ) {
+ $this->mPageName = $name;
+ }
+
+ function getName() {
+ return $this->mPageName;
+ }
+
+ function addTemplate( $template ) {
+ $this->mElements[] = $template;
+ }
+
+ function addFreeText( $free_text ) {
+ $this->mElements[] = $free_text;
+ }
+
+ function createText() {
+ $text = "";
+ foreach ( $this->mElements as $elem ) {
+ if ( $elem instanceof DTWikiTemplate ) {
+ $text .= $elem->createText();
+ } else {
+ $text .= $elem;
+ }
+ }
+ return $text;
+ }
+} \ No newline at end of file
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php b/www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php
new file mode 100644
index 00000000..9ab2864e
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_WikiTemplate.php
@@ -0,0 +1,37 @@
+<?php
+/**
+ * Class for representing a template call within a wiki page.
+ *
+ * @author Yaron Koren
+ * @ingroup DataTransfer
+ */
+
+class DTWikiTemplate {
+ private $mName = null;
+ private $mFields = array();
+
+ public function __construct( $name ) {
+ $this->mName = $name;
+ }
+
+ function addField( $name, $value ) {
+ $this->mFields[$name] = $value;
+ }
+
+ function createText() {
+ $multi_line_template = false;
+ $text = '{{' . $this->mName;
+ foreach ( $this->mFields as $field_name => $field_val ) {
+ if ( is_numeric( $field_name ) ) {
+ $text .= "|$field_val";
+ } else {
+ $text .= "\n|$field_name=$field_val";
+ $multi_line_template = true;
+ }
+ }
+ if ( $multi_line_template )
+ $text .= "\n";
+ $text .= '}}' . "\n";
+ return $text;
+ }
+} \ No newline at end of file
diff --git a/www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php b/www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php
new file mode 100644
index 00000000..259e6f5b
--- /dev/null
+++ b/www/wiki/extensions/DataTransfer/includes/DT_XMLParser.php
@@ -0,0 +1,215 @@
+<?php
+/**
+ * Class for parsing XML representing wiki pages and their template calls
+ *
+ * @author Yaron Koren
+ * @ingroup DataTransfer
+ */
+
+class DTXMLParser {
+ var $mDebug = false;
+ var $mSource = null;
+ var $mCurFieldName = null;
+ var $mCurFieldValue = '';
+ var $mCurTemplate = null;
+ var $mCurPage = null; // new DTWikiPage();
+ var $mPages = array();
+
+ function __construct( $source ) {
+ $this->mSource = $source;
+ }
+
+ function debug( $text ) {
+ // print "$text. ";
+ }
+
+ function throwXMLerror( $text ) {
+ print htmlspecialchars( $text );
+ }
+
+ function doParse() {
+ $parser = xml_parser_create( "UTF-8" );
+
+ # case folding violates XML standard, turn it off
+ xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
+
+ xml_set_object( $parser, $this );
+ xml_set_element_handler( $parser, "in_start", "" );
+
+ $offset = 0; // for context extraction on error reporting
+ do {
+ $chunk = $this->mSource->readChunk();
+ if ( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
+ wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
+ // return new WikiXmlError( $parser, wfMessage( 'import-parse-failure' )->escaped(), $chunk, $offset );
+ }
+ $offset += strlen( $chunk );
+ } while ( $chunk !== false && !$this->mSource->atEnd() );
+ xml_parser_free( $parser );
+ }
+
+ function donothing( $parser, $x, $y = "" ) {
+ # $this->debug( "donothing" );
+ }
+
+
+ function in_start( $parser, $name, $attribs ) {
+ // $this->debug( "in_start $name" );
+ $pages_str = str_replace( ' ', '_', wfMessage( 'dt_xml_pages' )->inContentLanguage()->text() );
+ if ( $name != $pages_str ) {
+ print( "Expected '$pages_str', got '$name'" );
+ }
+ xml_set_element_handler( $parser, "in_pages", "out_pages" );
+ }
+
+ function in_pages( $parser, $name, $attribs ) {
+ $this->debug( "in_pages $name" );
+ $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() );
+ if ( $name == $page_str ) {
+ $title_str = str_replace( ' ', '_', wfMessage( 'dt_xml_title' )->inContentLanguage()->text() );
+ if ( array_key_exists( $title_str, $attribs ) ) {
+ $this->mCurPage = new DTWikiPage( $attribs[$title_str] );
+ xml_set_element_handler( $parser, "in_page", "out_page" );
+ } else {
+ $this->throwXMLerror( "'$title_str' attribute missing for page" );
+ return;
+ }
+ } else {
+ $this->throwXMLerror( "Expected <$page_str>, got <$name>" );
+ }
+
+ return;
+ }
+
+ function out_pages( $parser, $name ) {
+ $this->debug( "out_pages $name" );
+ xml_set_element_handler( $parser, "donothing", "donothing" );
+ }
+
+ function in_category( $parser, $name, $attribs ) {
+ $this->debug( "in_category $name" );
+ $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() );
+ if ( $name == $page_str ) {
+ if ( array_key_exists( $title_str, $attribs ) ) {
+ $this->mCurPage = new DTWikiPage( $attribs[$title_str] );
+ xml_set_element_handler( $parser, "in_page", "out_page" );
+ } else {
+ $this->throwXMLerror( "'$title_str' attribute missing for page" );
+ return;
+ }
+ } else {
+ $this->throwXMLerror( "Expected <$page_str>, got <$name>" );
+ return;
+ }
+ }
+
+ function out_category( $parser, $name ) {
+ $this->debug( "out_category $name" );
+ if ( $name != "category" ) {
+ $this->throwXMLerror( "Expected </category>, got </$name>" );
+ return;
+ }
+ xml_set_element_handler( $parser, "donothing", "donothing" );
+ }
+
+ function in_page( $parser, $name, $attribs ) {
+ $this->debug( "in_page $name" );
+ $template_str = str_replace( ' ', '_', wfMessage( 'dt_xml_template' )->inContentLanguage()->text() );
+ $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() );
+ $free_text_str = str_replace( ' ', '_', wfMessage( 'dt_xml_freetext' )->inContentLanguage()->text() );
+ if ( $name == $template_str ) {
+ if ( array_key_exists( $name_str, $attribs ) ) {
+ $this->mCurTemplate = new DTWikiTemplate( $attribs[$name_str] );
+ xml_set_element_handler( $parser, "in_template", "out_template" );
+ } else {
+ $this->throwXMLerror( "'$name_str' attribute missing for template" );
+ return;
+ }
+ } elseif ( $name == $free_text_str ) {
+ xml_set_element_handler( $parser, "in_freetext", "out_freetext" );
+ xml_set_character_data_handler( $parser, "freetext_value" );
+ } else {
+ $this->throwXMLerror( "Expected <$template_str>, got <$name>" );
+ return;
+ }
+ }
+
+ function out_page( $parser, $name ) {
+ $this->debug( "out_page $name" );
+ $page_str = str_replace( ' ', '_', wfMessage( 'dt_xml_page' )->inContentLanguage()->text() );
+ if ( $name != $page_str ) {
+ $this->throwXMLerror( "Expected </$page_str>, got </$name>" );
+ return;
+ }
+ $this->mPages[] = $this->mCurPage;
+ xml_set_element_handler( $parser, "in_pages", "out_pages" );
+ }
+
+ function in_template( $parser, $name, $attribs ) {
+ $this->debug( "in_template $name" );
+ $field_str = str_replace( ' ', '_', wfMessage( 'dt_xml_field' )->inContentLanguage()->text() );
+ if ( $name == $field_str ) {
+ $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() );
+ if ( array_key_exists( $name_str, $attribs ) ) {
+ $this->mCurFieldName = $attribs[$name_str];
+ // $this->push( $name );
+ $this->workRevisionCount = 0;
+ $this->workSuccessCount = 0;
+ $this->uploadCount = 0;
+ $this->uploadSuccessCount = 0;
+ xml_set_element_handler( $parser, "in_field", "out_field" );
+ xml_set_character_data_handler( $parser, "field_value" );
+ } else {
+ $this->throwXMLerror( "'$name_str' attribute missing for field" );
+ return;
+ }
+ } else {
+ $this->throwXMLerror( "Expected <$field_str>, got <$name>" );
+ return;
+ }
+ }
+
+ function out_template( $parser, $name ) {
+ $this->debug( "out_template $name" );
+ $template_str = str_replace( ' ', '_', wfMessage( 'dt_xml_template' )->inContentLanguage()->text() );
+ if ( $name != $template_str ) {
+ $this->throwXMLerror( "Expected </$template_str>, got </$name>" );
+ return;
+ }
+ $this->mCurPage->addTemplate( $this->mCurTemplate );
+ xml_set_element_handler( $parser, "in_page", "out_page" );
+ }
+
+ function in_field( $parser, $name, $attribs ) {
+ // xml_set_element_handler( $parser, "donothing", "donothing" );
+ }
+
+ function out_field( $parser, $name ) {
+ $this->debug( "out_field $name" );
+ $field_str = str_replace( ' ', '_', wfMessage( 'dt_xml_field' )->inContentLanguage()->text() );
+ if ( $name == $field_str ) {
+ $this->mCurTemplate->addField( $this->mCurFieldName, $this->mCurFieldValue );
+ $this->mCurFieldValue = '';
+ } else {
+ $this->throwXMLerror( "Expected </$field_str>, got </$name>" );
+ return;
+ }
+ xml_set_element_handler( $parser, "in_template", "out_template" );
+ }
+
+ function field_value( $parser, $data ) {
+ $this->mCurFieldValue .= $data;
+ }
+
+ function in_freetext( $parser, $name, $attribs ) {
+ // xml_set_element_handler( $parser, "donothing", "donothing" );
+ }
+
+ function out_freetext( $parser, $name ) {
+ xml_set_element_handler( $parser, "in_page", "out_page" );
+ }
+
+ function freetext_value( $parser, $data ) {
+ $this->mCurPage->addFreeText( $data );
+ }
+}