diff options
author | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
---|---|---|
committer | Yaco <franco@reevo.org> | 2020-06-04 11:01:00 -0300 |
commit | fc7369835258467bf97eb64f184b93691f9a9fd5 (patch) | |
tree | daabd60089d2dd76d9f5fb416b005fbe159c799d /www/wiki/extensions/DataTransfer/specials |
first commit
Diffstat (limited to 'www/wiki/extensions/DataTransfer/specials')
4 files changed, 612 insertions, 0 deletions
diff --git a/www/wiki/extensions/DataTransfer/specials/DT_ImportCSV.php b/www/wiki/extensions/DataTransfer/specials/DT_ImportCSV.php new file mode 100644 index 00000000..1a221424 --- /dev/null +++ b/www/wiki/extensions/DataTransfer/specials/DT_ImportCSV.php @@ -0,0 +1,217 @@ +<?php +/** + * Lets the user import a CSV file to turn into wiki pages + * + * @author Yaron Koren + */ + +class DTImportCSV extends SpecialPage { + + /** + * Constructor + */ + public function __construct( $name='ImportCSV' ) { + parent::__construct( $name ); + } + + public function doesWrites() { + return true; + } + + function execute( $query ) { + $this->setHeaders(); + + if ( ! $this->getUser()->isAllowed( 'datatransferimport' ) ) { + throw new PermissionsError( 'datatransferimport' ); + } + + if ( $this->getRequest()->getCheck( 'import_file' ) ) { + $text = $this->importFromUploadAndModifyPages(); + } else { + $text = $this->printForm(); + } + + $this->getOutput()->addModuleStyles( 'ext.datatransfer'); + $this->getOutput()->addHTML( $text ); + } + + protected function importFromUploadAndModifyPages () { + + $text = DTUtils::printImportingMessage(); + $uploadResult = ImportStreamSource::newFromUpload( "file_name" ); + + if ( !$uploadResult->isOK() ) { + $uploadError = $this->getOutput()->parse( $uploadResult->getWikiText() ); + $text .= $uploadError; + return $text; + } + + $source = $uploadResult->value; + + $encoding = $this->getRequest()->getVal( 'encoding' ); + $pages = array(); + + $error_msg = $this->importFromFile( $source->mHandle, $encoding, $pages ); + + if ( ! is_null( $error_msg ) ) { + $text .= $error_msg; + return $text; + } + + $importSummary = $this->getRequest()->getVal( 'import_summary' ); + $forPagesThatExist = $this->getRequest()->getVal( 'pagesThatExist' ); + + $text .= self::modifyPages( $pages, $importSummary, $forPagesThatExist ); + + return $text; + } + + protected function printForm() { + $formText = DTUtils::printFileSelector( $this->getFiletype() ); + $utf8OptionText = "\t" . Xml::element( 'option', + array( + 'selected' => 'selected', + 'value' => 'utf8' + ), 'UTF-8' ) . "\n"; + $utf16OptionText = "\t" . Xml::element( 'option', + array( + 'value' => 'utf16' + ), 'UTF-16' ) . "\n"; + $encodingSelectText = Xml::tags( 'select', + array( 'name' => 'encoding' ), + "\n" . $utf8OptionText . $utf16OptionText. "\t" ) . "\n\t"; + $formText .= "\t" . Xml::tags( 'p', null, $this->msg( 'dt_import_encodingtype', 'CSV' )->text() . " " . $encodingSelectText ) . "\n"; + $formText .= "\t" . '<hr style="margin: 10px 0 10px 0" />' . "\n"; + $formText .= DTUtils::printExistingPagesHandling(); + $formText .= DTUtils::printImportSummaryInput( $this->getFiletype() ); + $formText .= DTUtils::printSubmitButton(); + $text = "\t" . Xml::tags( 'form', + array( + 'enctype' => 'multipart/form-data', + 'action' => '', + 'method' => 'post' + ), $formText ) . "\n"; + return $text; + } + + protected function importFromFile( $csv_file, $encoding, &$pages ) { + if ( is_null( $csv_file ) ) { + return wfMessage( 'emptyfile' )->text(); + } + + $table = array(); + if ( $encoding == 'utf16' ) { + // Change encoding to UTF-8. + // Starting with PHP 5.3 we could use str_getcsv(), + // which would save the tempfile hassle. + $tempfile = tmpfile(); + $csv_string = ''; + while ( !feof( $csv_file ) ) { + $csv_string .= fgets( $csv_file, 65535 ); + } + fwrite( $tempfile, iconv( 'UTF-16', 'UTF-8', $csv_string ) ); + fseek( $tempfile, 0 ); + while ( $line = fgetcsv( $tempfile ) ) { + array_push( $table, $line ); + } + fclose( $tempfile ); + } else { + while ( $line = fgetcsv( $csv_file ) ) { + // Convert from UTF-8 to ASCII - htmlentities() + // fails for UTF-8 if there are non-ASCII + // characters. + // $convertedLine = array(); + // foreach ( $line as $value ) { + // $convertedLine[] = mb_convert_encoding( $value, 'UTF-8', 'ASCII' ); + // } + array_push( $table, $line ); + } + } + fclose( $csv_file ); + + // Get rid of the "byte order mark", if it's there - this is + // a three-character string sometimes put at the beginning + // of files to indicate its encoding. + // Code copied from: + // http://www.dotvoid.com/2010/04/detecting-utf-bom-byte-order-mark/ + $byteOrderMark = pack( "CCC", 0xef, 0xbb, 0xbf ); + if ( 0 == strncmp( $table[0][0], $byteOrderMark, 3 ) ) { + $table[0][0] = substr( $table[0][0], 3 ); + // If there were quotation marks around this value, + // they didn't get removed, so remove them now. + $table[0][0] = trim( $table[0][0], '"' ); + } + + return $this->importFromArray( $table, $pages ); + + } + + protected function importFromArray( $table, &$pages ) { + // Check header line to make sure every term is in the + // correct format. + $titleLabels = array( wfMessage( 'dt_xml_title' )->inContentLanguage()->text() ); + $freeTextLabels = array( wfMessage( 'dt_xml_freetext' )->inContentLanguage()->text() ); + // Add the English-language values as well, if this isn't an + // English-language wiki. + if ( $this->getLanguage()->getCode() !== 'en' ) { + $titleLabels[] = wfMessage( 'dt_xml_title' )->inLanguage( 'en' )->text(); + $freeTextLabels[] = wfMessage( 'dt_xml_freetext' )->inLanguage( 'en' )->text(); + } + foreach ( $table[0] as $i => $headerVal ) { + if ( !in_array( $headerVal, $titleLabels ) + && !in_array( $headerVal, $freeTextLabels ) + && $headerVal !== '' + && !preg_match( '/^[^\[\]]+\[[^\[\]]+]$/', $headerVal ) ) { + $errorMsg = wfMessage( 'dt_importcsv_badheader', $i, $headerVal, $titleLabels[0], $freeTextLabels[0] )->text(); + return $errorMsg; + } + } + foreach ( $table as $i => $line ) { + if ( $i == 0 ) continue; + $page = new DTPage(); + foreach ( $line as $j => $val ) { + if ( $table[0][$j] === '' ) { + continue; + } + if ( in_array( $table[0][$j], $titleLabels ) ) { + $page->setName( $val ); + } elseif ( in_array( $table[0][$j], $freeTextLabels ) ) { + $page->setFreeText( $val ); + } else { + list( $templateName, $fieldName ) = explode( '[', str_replace( ']', '', $table[0][$j] ) ); + $page->addTemplateField( $templateName, $fieldName, $val ); + } + } + $pages[] = $page; + } + + return null; + } + + protected function modifyPages( $pages, $editSummary, $forPagesThatExist ) { + $text = ""; + $jobs = array(); + $jobParams = array(); + $jobParams['user_id'] = $this->getUser()->getId(); + $jobParams['edit_summary'] = $editSummary; + $jobParams['for_pages_that_exist'] = $forPagesThatExist; + foreach ( $pages as $page ) { + $title = Title::newFromText( $page->getName() ); + if ( is_null( $title ) ) { + $text .= '<p>' . $this->msg( 'img-auth-badtitle', $page->getName() )->text() . "</p>\n"; + continue; + } + $jobParams['text'] = $page->createText(); + $jobs[] = new DTImportJob( $title, $jobParams ); + } + JobQueueGroup::singleton()->push( $jobs ); + + $text .= $this->msg( 'dt_import_success' )->numParams( count( $jobs ) )->params( $this->getFiletype() )->parseAsBlock(); + + return $text; + } + + protected function getFiletype() { + return wfMessage( 'dt_filetype_csv' )->text(); + } +} diff --git a/www/wiki/extensions/DataTransfer/specials/DT_ImportSpreadsheet.php b/www/wiki/extensions/DataTransfer/specials/DT_ImportSpreadsheet.php new file mode 100644 index 00000000..c1da80aa --- /dev/null +++ b/www/wiki/extensions/DataTransfer/specials/DT_ImportSpreadsheet.php @@ -0,0 +1,53 @@ +<?php +/** + * Lets the user import a spreadsheet file to turn into wiki pages + * + * @author Stephan Gambke + * @ingroup DataTransfer + */ + +class DTImportSpreadsheet extends DTImportCSV { + + public function __construct( $name='ImportSpreadsheet' ) { + parent::__construct( $name ); + } + + protected function printForm() { + if ( !class_exists( 'PHPExcel' ) ) { + return '<div class="error">You must have the PHPExcel library installed to run this page.</div>'; + } + + $formText = DTUtils::printFileSelector( $this->getFiletype() ); + $formText .= DTUtils::printExistingPagesHandling(); + $formText .= DTUtils::printImportSummaryInput( $this->getFiletype() ); + $formText .= DTUtils::printSubmitButton(); + $text = "\t" . Xml::tags( 'form', + array( + 'enctype' => 'multipart/form-data', + 'action' => '', + 'method' => 'post' + ), $formText ) . "\n"; + return $text; + } + + protected function importFromFile( $file, $encoding, &$pages ) { + + if ( is_null( $file ) ) { + return wfMessage( 'emptyfile' )->text(); + } + + $metadata = stream_get_meta_data( $file ); + $filename = $metadata['uri']; + + @$objPHPExcel = PHPExcel_IOFactory::load( $filename ); + + $table = $objPHPExcel->getSheet(0)->toArray( '', true, true, false ); + + return $this->importFromArray( $table, $pages ); + + } + + protected function getFiletype() { + return wfMessage( 'dt_filetype_spreadsheet' )->text(); + } +}
\ No newline at end of file diff --git a/www/wiki/extensions/DataTransfer/specials/DT_ImportXML.php b/www/wiki/extensions/DataTransfer/specials/DT_ImportXML.php new file mode 100644 index 00000000..42f7d14d --- /dev/null +++ b/www/wiki/extensions/DataTransfer/specials/DT_ImportXML.php @@ -0,0 +1,73 @@ +<?php +/** + * Lets the user import an XML file to turn into wiki pages + * + * @author Yaron Koren + */ + +class DTImportXML extends SpecialPage { + + /** + * Constructor + */ + public function __construct( $name = 'ImportXML' ) { + parent::__construct( $name ); + } + + public function doesWrites() { + return true; + } + + function execute( $query ) { + $this->setHeaders(); + + if ( ! $this->getUser()->isAllowed( 'datatransferimport' ) ) { + throw new PermissionsError( 'datatransferimport' ); + } + + $request = $this->getRequest(); + if ( $request->getCheck( 'import_file' ) ) { + $text = DTUtils::printImportingMessage(); + $uploadResult = ImportStreamSource::newFromUpload( "file_name" ); + $source = $uploadResult->value; + $importSummary = $request->getVal( 'import_summary' ); + $forPagesThatExist = $request->getVal( 'pagesThatExist' ); + $text .= self::modifyPages( $source, $importSummary, $forPagesThatExist ); + } else { + $formText = DTUtils::printFileSelector( wfMessage( 'dt_filetype_xml' )->text() ); + $formText .= DTUtils::printExistingPagesHandling(); + $formText .= DTUtils::printImportSummaryInput( wfMessage( 'dt_filetype_xml' )->text() ); + $formText .= DTUtils::printSubmitButton(); + $text = "\t" . Xml::tags( 'form', + array( + 'enctype' => 'multipart/form-data', + 'action' => '', + 'method' => 'post' + ), $formText ) . "\n"; + } + + $this->getOutput()->addHTML( $text ); + } + + function modifyPages( $source, $editSummary, $forPagesThatExist ) { + $text = ""; + $xml_parser = new DTXMLParser( $source ); + $xml_parser->doParse(); + $jobs = array(); + $job_params = array(); + $job_params['user_id'] = $this->getUser()->getId(); + $job_params['edit_summary'] = $editSummary; + $job_params['for_pages_that_exist'] = $forPagesThatExist; + + foreach ( $xml_parser->mPages as $page ) { + $title = Title::newFromText( $page->getName() ); + $job_params['text'] = $page->createText(); + $jobs[] = new DTImportJob( $title, $job_params ); + } + JobQueueGroup::singleton()->push( $jobs ); + + $text .= $this->msg( 'dt_import_success' )->numParams( count( $jobs ) )->params( 'XML' ) + ->parseAsBlock(); + return $text; + } +} diff --git a/www/wiki/extensions/DataTransfer/specials/DT_ViewXML.php b/www/wiki/extensions/DataTransfer/specials/DT_ViewXML.php new file mode 100644 index 00000000..797aa8f9 --- /dev/null +++ b/www/wiki/extensions/DataTransfer/specials/DT_ViewXML.php @@ -0,0 +1,269 @@ +<?php +/** + * Displays an interface to let the user export pages from the wiki in XML form + * + * @author Yaron Koren + */ + +class DTViewXML extends SpecialPage { + + /** + * Constructor + */ + public function __construct( $name = 'ViewXML' ) { + parent::__construct( $name ); + } + + function execute( $query ) { + $this->setHeaders(); + $this->doSpecialViewXML( $query ); + } + + static function getCategoriesList() { + $dbr = wfGetDB( DB_SLAVE ); + $categorylinks = $dbr->tableName( 'categorylinks' ); + $res = $dbr->query( "SELECT DISTINCT cl_to FROM $categorylinks" ); + $categories = array(); + while ( $row = $dbr->fetchRow( $res ) ) { + $categories[] = $row[0]; + } + $dbr->freeResult( $res ); + sort( $categories ); + return $categories; + } + + static function getNamespacesList() { + $dbr = wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $res = $dbr->query( "SELECT DISTINCT page_namespace FROM $page" ); + $namespaces = array(); + while ( $row = $dbr->fetchRow( $res ) ) { + $namespaces[] = $row[0]; + } + $dbr->freeResult( $res ); + return $namespaces; + } + + /* + * Get all the pages that belong to a category and all its + * subcategories, down a certain number of levels - heavily based + * on SMW's SMWInlineQuery::includeSubcategories() + */ + static function getPagesForCategory( $top_category, $num_levels ) { + if ( 0 == $num_levels ) return $top_category; + + $db = wfGetDB( DB_SLAVE ); + $fname = "getPagesForCategory"; + $categories = array( $top_category ); + $checkcategories = array( $top_category ); + $titles = array(); + for ( $level = $num_levels; $level > 0; $level-- ) { + $newcategories = array(); + foreach ( $checkcategories as $category ) { + $res = $db->select( // make the query + array( 'categorylinks', 'page' ), + array( 'page_id', 'page_title', 'page_namespace' ), + array( 'cl_from = page_id', + 'cl_to = ' . $db->addQuotes( $category ) + ), + $fname + ); + if ( $res ) { + while ( $row = $db->fetchRow( $res ) ) { + if ( array_key_exists( 'page_title', $row ) ) { + $page_namespace = $row['page_namespace']; + if ( $page_namespace == NS_CATEGORY ) { + $new_category = $row[ 'page_title' ]; + if ( !in_array( $new_category, $categories ) ) { + $newcategories[] = $new_category; + } + } else { + $titles[] = Title::newFromID( $row['page_id'] ); + } + } + } + $db->freeResult( $res ); + } + } + if ( count( $newcategories ) == 0 ) { + return $titles; + } else { + $categories = array_merge( $categories, $newcategories ); + } + $checkcategories = array_diff( $newcategories, array() ); + } + return $titles; + } + + static function getPagesForNamespace( $namespace ) { + $dbr = wfGetDB( DB_SLAVE ); + $page = $dbr->tableName( 'page' ); + $res = $dbr->query( "SELECT page_id FROM $page WHERE page_namespace = '$namespace'" ); + $titles = array(); + while ( $row = $dbr->fetchRow( $res ) ) { + $titles[] = Title::newFromID( $row[0] ); + } + $dbr->freeResult( $res ); + return $titles; + } + + /** + * Helper function for getXMLForPage() + */ + static function treeContainsElement( $tree, $element ) { + // escape out if there's no tree (i.e., category) + if ( $tree == null ) { + return false; + } + + foreach ( $tree as $node => $child_tree ) { + if ( $node === $element ) { + return true; + } elseif ( count( $child_tree ) > 0 ) { + if ( self::treeContainsElement( $child_tree, $element ) ) { + return true; + } + } + } + // no match found + return false; + } + + + static function getXMLForPage( $title, $simplified_format, $depth = 0 ) { + if ( $depth > 5 ) { return ""; } + + $pageStructure = DTPageStructure::newFromTitle( $title ); + $text = $pageStructure->toXML( $simplified_format ); + + // escape back the curly brackets that were escaped out at the beginning + $text = str_replace( '&#123;', '{', $text ); + $text = str_replace( '&#125;', '}', $text ); + return $text; + } + + function doSpecialViewXML() { + global $wgContLang; + + $out = $this->getOutput(); + $request = $this->getRequest(); + + $namespace_labels = $wgContLang->getNamespaces(); + $category_label = $namespace_labels[NS_CATEGORY]; + $name_str = str_replace( ' ', '_', wfMessage( 'dt_xml_name' )->inContentLanguage()->text() ); + $namespace_str = str_replace( ' ', '_', wfMessage( 'dt_xml_namespace' )->text() ); + $pages_str = str_replace( ' ', '_', wfMessage( 'dt_xml_pages' )->inContentLanguage()->text() ); + + $form_submitted = false; + $cats = $request->getArray( 'categories' ); + $nses = $request->getArray( 'namespaces' ); + $requestedTitles = $request->getVal( 'titles' ); + if ( ! empty( $cats ) || ! empty( $nses ) || $requestedTitles != null ) { + $form_submitted = true; + } + + if ( $form_submitted ) { + $out->disable(); + + // Cancel output buffering and gzipping if set + // This should provide safer streaming for pages with history + wfResetOutputBuffers(); + header( "Content-type: application/xml; charset=utf-8" ); + + $simplified_format = $request->getVal( 'simplified_format' ); + $text = "<$pages_str>"; + if ( $cats ) { + foreach ( $cats as $cat => $val ) { + if ( $simplified_format ) + $text .= '<' . str_replace( ' ', '_', $cat ) . ">\n"; + else + $text .= "<$category_label $name_str=\"$cat\">\n"; + $titles = self::getPagesForCategory( $cat, 10 ); + foreach ( $titles as $title ) { + $text .= self::getXMLForPage( $title, $simplified_format ); + } + if ( $simplified_format ) { + $text .= '</' . str_replace( ' ', '_', $cat ) . ">\n"; + } else { + $text .= "</$category_label>\n"; + } + } + } + + if ( $nses ) { + foreach ( $nses as $ns => $val ) { + if ( $ns == 0 ) { + $ns_name = "Main"; + } else { + $ns_name = MWNamespace::getCanonicalName( $ns ); + } + if ( $simplified_format ) { + $text .= '<' . str_replace( ' ', '_', $ns_name ) . ">\n"; + } else { + $text .= "<$namespace_str $name_str=\"$ns_name\">\n"; + } + $titles = self::getPagesForNamespace( $ns ); + foreach ( $titles as $title ) { + $text .= self::getXMLForPage( $title, $simplified_format ); + } + if ( $simplified_format ) + $text .= '</' . str_replace( ' ', '_', $ns_name ) . ">\n"; + else + $text .= "</$namespace_str>\n"; + } + } + + // The user can specify a set of page names to view + // the XML of, using a "titles=" parameter, separated + // by "|", in the manner of the MediaWiki API. + // Hm... perhaps all of Special:ViewXML should just + // be replaced by an API action? + if ( $requestedTitles ) { + $pageNames = explode( '|', $requestedTitles ); + foreach ( $pageNames as $pageName ) { + $title = Title::newFromText( $pageName ); + $text .= self::getXMLForPage( $title, $simplified_format ); + } + } + + $text .= "</$pages_str>"; + print $text; + } else { + // set 'title' as hidden field, in case there's no URL niceness + global $wgContLang; + $mw_namespace_labels = $wgContLang->getNamespaces(); + $special_namespace = $mw_namespace_labels[NS_SPECIAL]; + $text = <<<END + <form action="" method="get"> + <input type="hidden" name="title" value="$special_namespace:ViewXML"> + +END; + $text .= "<p>" . wfMessage( 'dt_viewxml_docu' )->text() . "</p>\n"; + $text .= "<h2>" . wfMessage( 'dt_viewxml_categories' )->text() . "</h2>\n"; + $categories = self::getCategoriesList(); + foreach ( $categories as $category ) { + $text .= Html::input( "categories[$category]", null, 'checkbox' ); + $title = Title::makeTitle( NS_CATEGORY, $category ); + $link = Linker::link( $title, htmlspecialchars( $title->getText() ) ); + $text .= " $link<br />\n"; + } + $text .= "<h2>" . wfMessage( 'dt_viewxml_namespaces' )->text() . "</h2>\n"; + $namespaces = self::getNamespacesList(); + foreach ( $namespaces as $nsCode ) { + if ( $nsCode === '0' ) { + $nsName = wfMessage( 'blanknamespace' )->escaped(); + } else { + $nsName = htmlspecialchars( $wgContLang->getFormattedNsText( $nsCode ) ); + if ( $nsName === '' ) continue; + } + $text .= Html::input( "namespaces[$nsCode]", null, 'checkbox' ); + $text .= ' ' . str_replace( '_', ' ', $nsName ) . "<br />\n"; + } + $text .= "<br /><p><label><input type=\"checkbox\" name=\"simplified_format\" /> " . wfMessage( 'dt_viewxml_simplifiedformat' )->text() . "</label></p>\n"; + $text .= "<input type=\"submit\" value=\"" . wfMessage( 'viewxml' )->text() . "\">\n"; + $text .= "</form>\n"; + + $out->addHTML( $text ); + } + } +} |