From 849e9d14b8f22d988a832ae37e3b151aa91d9417 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Wed, 27 Feb 2008 06:19:09 +0000 Subject: [PATCH] Replaced maintenance/dumpHTML.php with a message pointing to the DumpHTML extension. Deleted subsidiary files. --- maintenance/dumpHTML.inc | 1016 ----------------------------------- maintenance/dumpHTML.php | 161 +----- skins/disabled/HTMLDump.php | 232 -------- skins/htmldump/lookup.js | 91 ---- skins/htmldump/main.css | 9 - skins/htmldump/md5.js | 256 --------- skins/htmldump/utf8.js | 72 --- 7 files changed, 5 insertions(+), 1832 deletions(-) delete mode 100644 maintenance/dumpHTML.inc delete mode 100644 skins/disabled/HTMLDump.php delete mode 100644 skins/htmldump/lookup.js delete mode 100644 skins/htmldump/main.css delete mode 100644 skins/htmldump/md5.js delete mode 100644 skins/htmldump/utf8.js diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc deleted file mode 100644 index 5a695aaecc..0000000000 --- a/maintenance/dumpHTML.inc +++ /dev/null @@ -1,1016 +0,0 @@ - $value ) { - $this->$var = $value; - } - } - - function loadCheckpoints() { - if ( $this->checkpoints !== false ) { - return true; - } elseif ( !$this->checkpointFile ) { - return false; - } else { - $lines = @file( $this->checkpointFile ); - if ( $lines === false ) { - print "Starting new checkpoint file \"{$this->checkpointFile}\"\n"; - $this->checkpoints = array(); - } else { - $lines = array_map( 'trim', $lines ); - $this->checkpoints = array(); - foreach ( $lines as $line ) { - list( $name, $value ) = explode( '=', $line, 2 ); - $this->checkpoints[$name] = $value; - } - } - return true; - } - } - - function getCheckpoint( $type, $defValue = false ) { - if ( !$this->loadCheckpoints() ) { - return false; - } - if ( !isset( $this->checkpoints[$type] ) ) { - return false; - } else { - return $this->checkpoints[$type]; - } - } - - function setCheckpoint( $type, $value ) { - if ( !$this->checkpointFile ) { - return; - } - $this->checkpoints[$type] = $value; - $blob = ''; - foreach ( $this->checkpoints as $type => $value ) { - $blob .= "$type=$value\n"; - } - file_put_contents( $this->checkpointFile, $blob ); - } - - function doEverything() { - if ( $this->getCheckpoint( 'everything' ) == 'done' ) { - print "Checkpoint says everything is already done\n"; - return; - } - $this->doArticles(); - $this->doCategories(); - $this->doRedirects(); - if ( $this->sliceNumerator == 1 ) { - $this->doSpecials(); - } - $this->doLocalImageDescriptions(); - - if ( !$this->noSharedDesc ) { - $this->doSharedImageDescriptions(); - } - - $this->setCheckpoint( 'everything', 'done' ); - } - - /** - * Write a set of articles specified by start and end page_id - * Skip categories and images, they will be done separately - */ - function doArticles() { - if ( $this->endID === false ) { - $end = $this->getMaxPageID(); - } else { - $end = $this->endID; - } - $start = $this->startID; - - # Start from the checkpoint - $cp = $this->getCheckpoint( 'article' ); - if ( $cp == 'done' ) { - print "Articles already done\n"; - return; - } elseif ( $cp !== false ) { - $start = $cp; - print "Resuming article dump from checkpoint at page_id $start of $end\n"; - } else { - print "Starting from page_id $start of $end\n"; - } - - # Move the start point to the correct slice if it isn't there already - $start = $this->modSliceStart( $start ); - - $this->setupGlobals(); - - $mainPageObj = Title::newMainPage(); - $mainPage = $mainPageObj->getPrefixedDBkey(); - - for ( $id = $start, $i = 0; $id <= $end; $id += $this->sliceDenominator, $i++ ) { - wfWaitForSlaves( 20 ); - if ( !( $i % REPORTING_INTERVAL) ) { - print "Processing ID: $id\r"; - $this->setCheckpoint( 'article', $id ); - } - if ( !($i % (REPORTING_INTERVAL*10) ) ) { - print "\n"; - } - $title = Title::newFromID( $id ); - if ( $title ) { - $ns = $title->getNamespace() ; - if ( $ns != NS_CATEGORY && $ns != NS_MEDIAWIKI && - $title->getPrefixedDBkey() != $mainPage ) { - $this->doArticle( $title ); - } - } - } - $this->setCheckpoint( 'article', 'done' ); - print "\n"; - } - - function doSpecials() { - $this->doMainPage(); - - $this->setupGlobals(); - print "Special:Categories..."; - $this->doArticle( SpecialPage::getTitleFor( 'Categories' ) ); - print "\n"; - } - - /** Write the main page as index.html */ - function doMainPage() { - - print "Making index.html "; - - // Set up globals with no ../../.. in the link URLs - $this->setupGlobals( 0 ); - - $title = Title::newMainPage(); - $text = $this->getArticleHTML( $title ); - - # Parse the XHTML to find the images - $images = $this->findImages( $text ); - $this->copyImages( $images ); - - $file = fopen( "{$this->dest}/index.html", "w" ); - if ( !$file ) { - print "\nCan't open index.html for writing\n"; - return false; - } - fwrite( $file, $text ); - fclose( $file ); - print "\n"; - } - - function doImageDescriptions() { - $this->doLocalImageDescriptions(); - if ( !$this->noSharedDesc ) { - $this->doSharedImageDescriptions(); - } - } - - /** - * Dump image description pages that don't have an associated article, but do - * have a local image - */ - function doLocalImageDescriptions() { - $chunkSize = 1000; - - $dbr = wfGetDB( DB_SLAVE ); - - $cp = $this->getCheckpoint( 'local image' ); - if ( $cp == 'done' ) { - print "Local image descriptions already done\n"; - return; - } elseif ( $cp !== false ) { - print "Writing image description pages starting from $cp\n"; - $conds = array( 'img_name >= ' . $dbr->addQuotes( $cp ) ); - } else { - print "Writing image description pages for local images\n"; - $conds = false; - } - - $this->setupGlobals(); - $i = 0; - - do { - $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__, - array( 'ORDER BY' => 'img_name', 'LIMIT' => $chunkSize ) ); - $numRows = $dbr->numRows( $res ); - - while ( $row = $dbr->fetchObject( $res ) ) { - # Update conds for the next chunk query - $conds = array( 'img_name > ' . $dbr->addQuotes( $row->img_name ) ); - - // Slice the result set with a filter - if ( !$this->sliceFilter( $row->img_name ) ) { - continue; - } - - wfWaitForSlaves( 10 ); - if ( !( ++$i % REPORTING_INTERVAL ) ) { - print "{$row->img_name}\n"; - if ( $row->img_name !== 'done' ) { - $this->setCheckpoint( 'local image', $row->img_name ); - } - } - $title = Title::makeTitle( NS_IMAGE, $row->img_name ); - if ( $title->getArticleID() ) { - // Already done by dumpHTML - continue; - } - $this->doArticle( $title ); - } - $dbr->freeResult( $res ); - } while ( $numRows ); - - $this->setCheckpoint( 'local image', 'done' ); - print "\n"; - } - - /** - * Dump images which only have a real description page on commons - */ - function doSharedImageDescriptions() { - list( $start, $end ) = $this->sliceRange( 0, 255 ); - - $cp = $this->getCheckpoint( 'shared image' ); - if ( $cp == 'done' ) { - print "Shared description pages already done\n"; - return; - } elseif ( $cp !== false ) { - print "Writing description pages for commons images starting from directory $cp/255\n"; - $start = $cp; - } else { - print "Writing description pages for commons images\n"; - } - - $this->setupGlobals(); - $i = 0; - for ( $hash = $start; $hash <= $end; $hash++ ) { - $this->setCheckpoint( 'shared image', $hash ); - - $dir = sprintf( "%s/%01x/%02x", $this->sharedStaticDirectory, - intval( $hash / 16 ), $hash ); - $handle = @opendir( $dir ); - while ( $handle && $file = readdir( $handle ) ) { - if ( $file[0] == '.' ) { - continue; - } - if ( !(++$i % REPORTING_INTERVAL ) ) { - print "$i\r"; - } - - $title = Title::makeTitleSafe( NS_IMAGE, $file ); - $this->doArticle( $title ); - } - if ( $handle ) { - closedir( $handle ); - } - } - $this->setCheckpoint( 'shared image', 'done' ); - print "\n"; - } - - function doCategories() { - $chunkSize = 1000; - - $this->setupGlobals(); - $dbr = wfGetDB( DB_SLAVE ); - - $cp = $this->getCheckpoint( 'category' ); - if ( $cp == 'done' ) { - print "Category pages already done\n"; - return; - } elseif ( $cp !== false ) { - print "Resuming category page dump from $cp\n"; - $conds = array( 'cl_to >= ' . $dbr->addQuotes( $cp ) ); - } else { - print "Starting category pages\n"; - $conds = false; - } - - $i = 0; - do { - $res = $dbr->select( 'categorylinks', 'DISTINCT cl_to', $conds, __METHOD__, - array( 'ORDER BY' => 'cl_to', 'LIMIT' => $chunkSize ) ); - $numRows = $dbr->numRows( $res ); - - while ( $row = $dbr->fetchObject( $res ) ) { - // Set conditions for next chunk - $conds = array( 'cl_to > ' . $dbr->addQuotes( $row->cl_to ) ); - - // Filter pages from other slices - if ( !$this->sliceFilter( $row->cl_to ) ) { - continue; - } - - wfWaitForSlaves( 10 ); - if ( !(++$i % REPORTING_INTERVAL ) ) { - print "{$row->cl_to}\n"; - if ( $row->cl_to != 'done' ) { - $this->setCheckpoint( 'category', $row->cl_to ); - } - } - $title = Title::makeTitle( NS_CATEGORY, $row->cl_to ); - $this->doArticle( $title ); - } - $dbr->freeResult( $res ); - } while ( $numRows ); - - $this->setCheckpoint( 'category', 'done' ); - print "\n"; - } - - function doRedirects() { - print "Doing redirects...\n"; - - $chunkSize = 10000; - $end = $this->getMaxPageID(); - $cp = $this->getCheckpoint( 'redirect' ); - if ( $cp == 'done' ) { - print "Redirects already done\n"; - return; - } elseif ( $cp !== false ) { - print "Resuming redirect generation from page_id $cp\n"; - $start = intval( $cp ); - } else { - $start = 1; - } - - $this->setupGlobals(); - $dbr = wfGetDB( DB_SLAVE ); - $i = 0; - - for ( $chunkStart = $start; $chunkStart <= $end; $chunkStart += $chunkSize ) { - $chunkEnd = min( $end, $chunkStart + $chunkSize - 1 ); - $conds = array( - 'page_is_redirect' => 1, - "page_id BETWEEN $chunkStart AND $chunkEnd" - ); - # Modulo slicing in SQL - if ( $this->sliceDenominator != 1 ) { - $n = intval( $this->sliceNumerator ); - $m = intval( $this->sliceDenominator ); - $conds[] = "page_id % $m = $n"; - } - $res = $dbr->select( 'page', array( 'page_id', 'page_namespace', 'page_title' ), - $conds, __METHOD__ ); - - while ( $row = $dbr->fetchObject( $res ) ) { - $title = Title::makeTitle( $row->page_namespace, $row->page_title ); - if ( !(++$i % (REPORTING_INTERVAL*10) ) ) { - printf( "Done %d redirects (%2.3f%%)\n", $i, $row->page_id / $end * 100 ); - $this->setCheckpoint( 'redirect', $row->page_id ); - } - $this->doArticle( $title ); - } - $dbr->freeResult( $res ); - } - $this->setCheckpoint( 'redirect', 'done' ); - } - - /** Write an article specified by title */ - function doArticle( $title ) { - if ( $this->noOverwrite ) { - $fileName = $this->dest.'/'.$this->getHashedFilename( $title ); - if ( file_exists( $fileName ) ) { - return; - } - } - - $this->profile(); - - $this->rawPages = array(); - $text = $this->getArticleHTML( $title ); - - if ( $text === false ) { - return; - } - - # Parse the XHTML to find the images - $images = $this->findImages( $text ); - $this->copyImages( $images ); - - # Write to file - $this->writeArticle( $title, $text ); - - # Do raw pages - wfMkdirParents( "{$this->dest}/raw", 0755 ); - foreach( $this->rawPages as $record ) { - list( $file, $title, $params ) = $record; - - $path = "{$this->dest}/raw/$file"; - if ( !file_exists( $path ) ) { - $article = new Article( $title ); - $request = new FauxRequest( $params ); - $rp = new RawPage( $article, $request ); - $text = $rp->getRawText(); - - print "Writing $file\n"; - $file = fopen( $path, 'w' ); - if ( !$file ) { - print("Can't open file $path for writing\n"); - continue; - } - fwrite( $file, $text ); - fclose( $file ); - } - } - - wfIncrStats( 'dumphtml_article' ); - } - - /** Write the given text to the file identified by the given title object */ - function writeArticle( $title, $text ) { - $filename = $this->getHashedFilename( $title ); - - # Temporary hack for current dump, this should be moved to - # getFriendlyName() at the earliest opportunity. - # - # Limit filename length to 255 characters, so it works on ext3. - # Titles are in fact limited to 255 characters, but dumpHTML - # adds a suffix which may put them over the limit. - $length = strlen( $filename ); - if ( $length > 255 ) { - print "Warning: Filename too long ($length bytes). Skipping.\n"; - return; - } - - $fullName = "{$this->dest}/$filename"; - $fullDir = dirname( $fullName ); - - if ( $this->compress ) { - $fullName .= ".gz"; - $text = gzencode( $text, 9 ); - } - - wfMkdirParents( $fullDir, 0755 ); - - wfSuppressWarnings(); - $file = fopen( $fullName, 'w' ); - wfRestoreWarnings(); - - if ( !$file ) { - die("Can't open file '$fullName' for writing.\nCheck permissions or use another destination (-d).\n"); - return; - } - - fwrite( $file, $text ); - fclose( $file ); - } - - /** Set up globals required for parsing */ - function setupGlobals( $currentDepth = NULL ) { - global $wgUser, $wgStylePath, $wgArticlePath, $wgMathPath; - global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; - global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; - global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer; - global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon, $wgEnableSidebarCache; - global $wgGenerateThumbnailOnParse; - - static $oldLogo = NULL; - - if ( !$this->setupDone ) { - $wgHooks['GetLocalURL'][] =& $this; - $wgHooks['GetFullURL'][] =& $this; - $wgHooks['SiteNoticeBefore'][] =& $this; - $wgHooks['SiteNoticeAfter'][] =& $this; - $this->oldArticlePath = $wgServer . $wgArticlePath; - } - - if ( is_null( $currentDepth ) ) { - $currentDepth = $this->depth; - } - - if ( $this->alternateScriptPath ) { - if ( $currentDepth == 0 ) { - $wgScriptPath = '.'; - } else { - $wgScriptPath = '..' . str_repeat( '/..', $currentDepth - 1 ); - } - } else { - $wgScriptPath = '..' . str_repeat( '/..', $currentDepth ); - } - - $wgArticlePath = str_repeat( '../', $currentDepth ) . '$1'; - - # Logo image - # Allow for repeated setup - if ( !is_null( $oldLogo ) ) { - $wgLogo = $oldLogo; - } else { - $oldLogo = $wgLogo; - } - - if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) { - # If it's in the upload directory, rewrite it to the new upload directory - $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 ); - } elseif ( $wgLogo{0} == '/' ) { - # This is basically heuristic - # Rewrite an absolute logo path to one relative to the the script path - $wgLogo = $wgScriptPath . $wgLogo; - } - - # Another ugly hack - if ( !$this->setupDone ) { - $this->oldCopyrightIcon = $wgCopyrightIcon; - } - $wgCopyrightIcon = str_replace( 'src="/images', - 'src="' . htmlspecialchars( $wgScriptPath ) . '/images', $this->oldCopyrightIcon ); - - $wgStylePath = "$wgScriptPath/skins"; - $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; - $wgSharedUploadPath = "$wgUploadPath/shared"; - $wgMaxCredits = -1; - $wgHideInterlanguageLinks = !$this->interwiki; - $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; - $wgEnableParserCache = false; - $wgMathPath = "$wgScriptPath/math"; - $wgEnableSidebarCache = false; - $wgGenerateThumbnailOnParse = true; - - if ( !empty( $wgRightsText ) ) { - $wgRightsUrl = "$wgScriptPath/COPYING.html"; - } - - $wgUser = new User; - $wgUser->setOption( 'skin', $this->skin ); - $wgUser->setOption( 'editsection', 0 ); - - $this->destUploadDirectory = "{$this->dest}/{$this->imageRel}"; - if ( realpath( $this->destUploadDirectory ) == realpath( $wgUploadDirectory ) ) { - print "Disabling image snapshot because the destination is the same as the source\n"; - $this->makeSnapshot = false; - } - $this->sharedStaticDirectory = "{$this->destUploadDirectory}/shared"; - - $this->setupDone = true; - } - - /** Reads the content of a title object, executes the skin and captures the result */ - function getArticleHTML( $title ) { - global $wgOut, $wgTitle, $wgArticle, $wgUser; - - if ( $this->debug ) { - print $title->getPrefixedDBkey() . "\n"; - } - $linkCache =& LinkCache::singleton(); - $linkCache->clear(); - $wgTitle = $title; - if ( is_null( $wgTitle ) ) { - return false; - } - - $ns = $wgTitle->getNamespace(); - if ( $ns == NS_SPECIAL ) { - $wgOut = new OutputPage; - $wgOut->setParserOptions( new ParserOptions ); - SpecialPage::executePath( $wgTitle ); - } else { - /** @todo merge with Wiki.php code */ - if ( $ns == NS_IMAGE ) { - $wgArticle = new ImagePage( $wgTitle ); - } elseif ( $ns == NS_CATEGORY ) { - $wgArticle = new CategoryPage( $wgTitle ); - } else { - $wgArticle = new Article( $wgTitle ); - } - $rt = Title::newFromRedirect( $wgArticle->fetchContent() ); - if ( $rt != NULL ) { - return $this->getRedirect( $rt ); - } else { - $wgOut = new OutputPage; - $wgOut->setParserOptions( new ParserOptions ); - - $wgArticle->view(); - } - } - - - $sk =& $wgUser->getSkin(); - ob_start(); - $sk->outputPage( $wgOut ); - $text = ob_get_contents(); - ob_end_clean(); - - return $text; - } - - function getRedirect( $rt ) { - $url = $rt->escapeLocalURL(); - $text = $rt->getPrefixedText(); - return << - - - - - - -

Redirecting to $text

- - -ENDTEXT; - } - - /** Returns image paths used in an XHTML document */ - function findImages( $text ) { - global $wgOutputEncoding, $wgDumpImages; - $parser = xml_parser_create( $wgOutputEncoding ); - xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' ); - - $wgDumpImages = array(); - xml_parse( $parser, $text ); - xml_parser_free( $parser ); - - return $wgDumpImages; - } - - /** - * Copy a file specified by a URL to a given directory - * - * @param string $srcPath The source URL - * @param string $srcPathBase The base directory of the source URL - * @param string $srcDirBase The base filesystem directory of the source URL - * @param string $destDirBase The base filesystem directory of the destination URL - */ - function relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) { - $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); // +1 for slash - $sourceLoc = "$srcDirBase/$rel"; - $destLoc = "$destDirBase/$rel"; - #print "Copying $sourceLoc to $destLoc\n"; - if ( !file_exists( $destLoc ) ) { - wfMkdirParents( dirname( $destLoc ), 0755 ); - if ( function_exists( 'symlink' ) && !$this->forceCopy ) { - if ( !symlink( $sourceLoc, $destLoc ) ) { - print "Warning: unable to create symlink at $destLoc\n"; - } - } else { - if ( !copy( $sourceLoc, $destLoc ) ) { - print "Warning: unable to copy $sourceLoc to $destLoc\n"; - } - } - } - } - - /** - * Copy an image, and if it is a thumbnail, copy its parent image too - */ - function copyImage( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) { - global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath; - $this->relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ); - if ( substr( $srcPath, strlen( $srcPathBase ) + 1, 6 ) == 'thumb/' ) { - # The image was a thumbnail - # Copy the source image as well - $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); - $parts = explode( '/', $rel ); - $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}"; - $newSrc = "$srcPathBase/$rel"; - $this->relativeCopy( $newSrc, $srcPathBase, $srcDirBase, $destDirBase ); - } - } - - /** - * Copy images (or create symlinks) from commons to a static directory. - * This is necessary even if you intend to distribute all of commons, because - * the directory contents is used to work out which image description pages - * are needed. - * - * Also copies math images, and full-sized images if the makeSnapshot option - * is specified. - * - */ - function copyImages( $images ) { - global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath, $wgSharedUploadDirectory, - $wgMathPath, $wgMathDirectory; - # Find shared uploads and copy them into the static directory - $sharedPathLength = strlen( $wgSharedUploadPath ); - $mathPathLength = strlen( $wgMathPath ); - $uploadPathLength = strlen( $wgUploadPath ); - foreach ( $images as $escapedImage => $dummy ) { - $image = urldecode( $escapedImage ); - - if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) { - $this->copyImage( $image, $wgSharedUploadPath, $wgSharedUploadDirectory, $this->sharedStaticDirectory ); - } elseif ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) { - $this->relativeCopy( $image, $wgMathPath, $wgMathDirectory, "{$this->dest}/math" ); - } elseif ( $this->makeSnapshot && substr( $image, 0, $uploadPathLength ) == $wgUploadPath ) { - $this->copyImage( $image, $wgUploadPath, $wgUploadDirectory, $this->destUploadDirectory ); - } - } - } - - function onGetFullURL( &$title, &$url, $query ) { - global $wgContLang, $wgArticlePath; - - $iw = $title->getInterwiki(); - if ( $title->isExternal() && $wgContLang->getLanguageName( $iw ) ) { - if ( $title->getDBkey() == '' ) { - $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath ); - } else { - $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ), - $wgArticlePath ); - } - $url .= $this->compress ? ".gz" : ""; - return false; - } else { - return true; - } - } - - function onGetLocalURL( &$title, &$url, $query ) { - global $wgArticlePath; - - if ( $title->isExternal() ) { - # Default is fine for interwiki - return true; - } - - $url = false; - if ( $query != '' ) { - $params = array(); - parse_str( $query, $params ); - if ( isset($params['action']) && $params['action'] == 'raw' ) { - if ( $params['gen'] == 'css' || $params['gen'] == 'js' ) { - $file = 'gen.' . $params['gen']; - } else { - $file = $this->getFriendlyName( $title->getPrefixedDBkey() ); - // Clean up Monobook.css etc. - $matches = array(); - if ( preg_match( '/^(.*)\.(css|js)_[0-9a-f]{4}$/', $file, $matches ) ) { - $file = $matches[1] . '.' . $matches[2]; - } - } - $this->rawPages[$file] = array( $file, $title, $params ); - $url = str_replace( '$1', "raw/" . wfUrlencode( $file ), $wgArticlePath ); - } - } - if ( $url === false ) { - $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); - } - $url .= $this->compress ? ".gz" : ""; - return false; - } - - function getHashedFilename( &$title ) { - if ( '' != $title->mInterwiki ) { - $dbkey = $title->getDBkey(); - } else { - $dbkey = $title->getPrefixedDBkey(); - } - - $mainPage = Title::newMainPage(); - if ( $mainPage->getPrefixedDBkey() == $dbkey ) { - return 'index.html'; - } - - return $this->getHashedDirectory( $title ) . '/' . - $this->getFriendlyName( $dbkey ) . '.html'; - } - - function getFriendlyName( $name ) { - global $wgLang; - # Replace illegal characters for Windows paths with underscores - $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' ); - - # Work out lower case form. We assume we're on a system with case-insensitive - # filenames, so unless the case is of a special form, we have to disambiguate - if ( function_exists( 'mb_strtolower' ) ) { - $lowerCase = $wgLang->ucfirst( mb_strtolower( $name ) ); - } else { - $lowerCase = ucfirst( strtolower( $name ) ); - } - - # Make it mostly unique - if ( $lowerCase != $friendlyName ) { - $friendlyName .= '_' . substr(md5( $name ), 0, 4); - } - # Handle colon specially by replacing it with tilde - # Thus we reduce the number of paths with hashes appended - $friendlyName = str_replace( ':', '~', $friendlyName ); - - return $friendlyName; - } - - /** - * Get a relative directory for putting a title into - */ - function getHashedDirectory( &$title ) { - if ( '' != $title->getInterwiki() ) { - $pdbk = $title->getDBkey(); - } else { - $pdbk = $title->getPrefixedDBkey(); - } - - # Find the first colon if there is one, use characters after it - $p = strpos( $pdbk, ':' ); - if ( $p !== false ) { - $dbk = substr( $pdbk, $p + 1 ); - $dbk = substr( $dbk, strspn( $dbk, '_' ) ); - } else { - $dbk = $pdbk; - } - - # Split into characters - $m = array(); - preg_match_all( '/./us', $dbk, $m ); - - $chars = $m[0]; - $length = count( $chars ); - $dir = ''; - - for ( $i = 0; $i < $this->depth; $i++ ) { - if ( $i ) { - $dir .= '/'; - } - if ( $i >= $length ) { - $dir .= '_'; - } else { - $c = $chars[$i]; - if ( ord( $c ) >= 128 || preg_match( '/[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/', $c ) ) { - if ( function_exists( 'mb_strtolower' ) ) { - $dir .= mb_strtolower( $c ); - } else { - $dir .= strtolower( $c ); - } - } else { - $dir .= sprintf( "%02X", ord( $c ) ); - } - } - } - return $dir; - } - - /** - * Calculate the start end end of a job based on the current slice - * @param integer $start - * @param integer $end - * @return array of integers - */ - function sliceRange( $start, $end ) { - $count = $end - $start + 1; - $each = $count / $this->sliceDenominator; - $sliceStart = $start + intval( $each * ( $this->sliceNumerator - 1 ) ); - if ( $this->sliceNumerator == $this->sliceDenominator ) { - $sliceEnd = $end; - } else { - $sliceEnd = $start + intval( $each * $this->sliceNumerator ) - 1; - } - return array( $sliceStart, $sliceEnd ); - } - - /** - * Adjust a start point so that it belongs to the current slice, where slices are defined by integer modulo - * @param integer $start - * @param integer $base The true start of the range; the minimum start - */ - function modSliceStart( $start, $base = 1 ) { - return $start - ( $start % $this->sliceDenominator ) + $this->sliceNumerator - 1 + $base; - } - - /** - * Determine whether a string belongs to the current slice, based on hash - */ - function sliceFilter( $s ) { - return crc32( $s ) % $this->sliceDenominator == $this->sliceNumerator - 1; - } - - /** - * No site notice - */ - function onSiteNoticeBefore( &$text ) { - $text = ''; - return false; - } - function onSiteNoticeAfter( &$text ) { - $text = ''; - return false; - } - - function getMaxPageID() { - if ( $this->maxPageID === false ) { - $dbr = wfGetDB( DB_SLAVE ); - $this->maxPageID = $dbr->selectField( 'page', 'max(page_id)', false, __METHOD__ ); - } - return $this->maxPageID; - } - - function profile() { - global $wgProfiler; - - if ( !$this->udpProfile ) { - return; - } - if ( !$this->udpProfileInit ) { - $this->udpProfileInit = true; - } elseif ( $this->udpProfileCounter == 1 % $this->udpProfile ) { - $wgProfiler->getFunctionReport(); - $wgProfiler = new DumpHTML_ProfilerStub; - } - if ( $this->udpProfileCounter == 0 ) { - $wgProfiler = new ProfilerSimpleUDP; - $wgProfiler->setProfileID( 'dumpHTML' ); - } - $this->udpProfileCounter = ( $this->udpProfileCounter + 1 ) % $this->udpProfile; - } -} - -class DumpHTML_ProfilerStub { - function profileIn() {} - function profileOut() {} - function getOutput() {} - function close() {} - function getFunctionReport() {} -} - -/** XML parser callback */ -function wfDumpStartTagHandler( $parser, $name, $attribs ) { - global $wgDumpImages; - - if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) { - $wgDumpImages[$attribs['SRC']] = true; - } -} - -/** XML parser callback */ -function wfDumpEndTagHandler( $parser, $name ) {} - -# vim: syn=php -?> diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php index 87401dce4b..bd94958eb6 100644 --- a/maintenance/dumpHTML.php +++ b/maintenance/dumpHTML.php @@ -1,158 +1,7 @@ - destination directory - -s start ID - -e end ID - -k skin to use (defaults to htmldump) - --no-overwrite skip existing HTML files - --checkpoint use a checkpoint file to allow restarting of interrupted dumps - --slice split the job into m segments and do the n'th one - --images only do image description pages - --shared-desc only do shared (commons) image description pages - --no-shared-desc don't do shared image description pages - --categories only do category pages - --redirects only do redirects - --special only do miscellaneous stuff - --force-copy copy commons instead of symlink, needed for Wikimedia - --interlang allow interlanguage links - --image-snapshot copy all images used to the destination directory - --compress generate compressed version of the html pages - --udp-profile profile 1/N rendering operations using ProfilerSimpleUDP - -ENDS; - -$optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice', 'udp-profile' ); -$options = array( 'help' ); -$profiling = false; - -if ( $profiling ) { - define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' ); - function wfSetupDump() { - global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate; - $wgProfiling = true; - $wgProfileToDatabase = false; - $wgProfileSampleRate = 1; - } -} - -if ( in_array( '--udp-profile', $argv ) ) { - define( 'MW_FORCE_PROFILE', 1 ); -} - -require_once( "commandLine.inc" ); -require_once( "dumpHTML.inc" ); - -error_reporting( E_ALL & (~E_NOTICE) ); - -if( isset( $options['help'] ) ) { - echo $usage; - exit; -} - -if ( !empty( $options['s'] ) ) { - $start = $options['s']; -} else { - $start = 1; -} - -if ( !empty( $options['e'] ) ) { - $end = $options['e']; -} else { - $dbr = wfGetDB( DB_SLAVE ); - $end = $dbr->selectField( 'page', 'max(page_id)', false ); -} - -if ( !empty( $options['d'] ) ) { - $dest = $options['d']; -} else { - $dest = "$IP/static"; -} - -$skin = isset( $options['k'] ) ? $options['k'] : 'htmldump'; - -if ( $options['slice'] ) { - $bits = explode( '/', $options['slice'] ); - if ( count( $bits ) != 2 || $bits[0] < 1 || $bits[0] > $bits[1] ) { - print "Invalid slice specification"; - exit; - } - $sliceNumerator = $bits[0]; - $sliceDenominator = $bits[1]; -} else { - $sliceNumerator = $sliceDenominator = 1; -} - -$wgHTMLDump = new DumpHTML( array( - 'dest' => $dest, - 'forceCopy' => $options['force-copy'], - 'alternateScriptPath' => $options['interlang'], - 'interwiki' => $options['interlang'], - 'skin' => $skin, - 'makeSnapshot' => $options['image-snapshot'], - 'checkpointFile' => $options['checkpoint'], - 'startID' => $start, - 'endID' => $end, - 'sliceNumerator' => $sliceNumerator, - 'sliceDenominator' => $sliceDenominator, - 'noOverwrite' => $options['no-overwrite'], - 'compress' => $options['compress'], - 'noSharedDesc' => $options['no-shared-desc'], - 'udpProfile' => $options['udp-profile'], - 'debug' => $options['debug'], -)); - - -if ( $options['special'] ) { - $wgHTMLDump->doSpecials(); -} elseif ( $options['images'] ) { - $wgHTMLDump->doImageDescriptions(); -} elseif ( $options['categories'] ) { - $wgHTMLDump->doCategories(); -} elseif ( $options['redirects'] ) { - $wgHTMLDump->doRedirects(); -} elseif ( $options['shared-desc'] ) { - $wgHTMLDump->doSharedImageDescriptions(); -} else { - print "Creating static HTML dump in directory $dest. \n"; - $dbr = wfGetDB( DB_SLAVE ); - $server = $dbr->getProperty( 'mServer' ); - print "Using database {$server}\n"; - - if ( !isset( $options['e'] ) ) { - $wgHTMLDump->doEverything(); - } else { - $wgHTMLDump->doArticles(); - } -} - -if ( isset( $options['debug'] ) ) { - #print_r($GLOBALS); - # Workaround for bug #36957 - $globals = array_keys( $GLOBALS ); - #sort( $globals ); - $sizes = array(); - foreach ( $globals as $name ) { - $sizes[$name] = strlen( serialize( $GLOBALS[$name] ) ); - } - arsort($sizes); - $sizes = array_slice( $sizes, 0, 20 ); - foreach ( $sizes as $name => $size ) { - printf( "%9d %s\n", $size, $name ); - } -} - -if ( $profiling ) { - echo $wgProfiler->getOutput(); -} +dumpHTML has moved to the DumpHTML extension. +WebDAV/SVN: +http://svn.wikimedia.org/svnroot/mediawiki/trunk/extensions/DumpHTML/ +Web: +http://svn.wikimedia.org/viewvc/mediawiki/trunk/extensions/DumpHTML/ diff --git a/skins/disabled/HTMLDump.php b/skins/disabled/HTMLDump.php deleted file mode 100644 index d5e2300efc..0000000000 --- a/skins/disabled/HTMLDump.php +++ /dev/null @@ -1,232 +0,0 @@ -template = 'HTMLDumpTemplate'; - } - - function buildSidebar() { - $sections = parent::buildSidebar(); - $badMessages = array( 'recentchanges-url', 'randompage-url' ); - $badUrls = array(); - foreach ( $badMessages as $msg ) { - $badUrls[] = self::makeInternalOrExternalUrl( wfMsgForContent( $msg ) ); - } - - foreach ( $sections as $heading => $section ) { - foreach ( $section as $index => $link ) { - if ( in_array( $link['href'], $badUrls ) ) { - unset( $sections[$heading][$index] ); - } - } - } - return $sections; - } - - function buildContentActionUrls() { - global $wgHTMLDump; - - $content_actions = array(); - $nskey = $this->getNameSpaceKey(); - $content_actions[$nskey] = $this->tabAction( - $this->mTitle->getSubjectPage(), - $nskey, - !$this->mTitle->isTalkPage() ); - - $content_actions['talk'] = $this->tabAction( - $this->mTitle->getTalkPage(), - 'talk', - $this->mTitle->isTalkPage(), - '', - true); - - if ( isset( $wgHTMLDump ) ) { - $content_actions['current'] = array( - 'text' => wfMsg( 'currentrev' ), - 'href' => str_replace( '$1', wfUrlencode( $this->mTitle->getPrefixedDBkey() ), - $wgHTMLDump->oldArticlePath ), - 'class' => false - ); - } - return $content_actions; - } - - function makeBrokenLinkObj( &$nt, $text = '', $query = '', $trail = '', $prefix = '' ) { - if ( !isset( $nt ) ) { - return "{$prefix}{$text}{$trail}"; - } - - if ( $nt->getNamespace() == NS_CATEGORY ) { - # Determine if the category has any articles in it - $dbr = wfGetDB( DB_SLAVE ); - $hasMembers = $dbr->selectField( 'categorylinks', '1', - array( 'cl_to' => $nt->getDBkey() ), __METHOD__ ); - if ( $hasMembers ) { - return $this->makeKnownLinkObj( $nt, $text, $query, $trail, $prefix ); - } - } - - if ( $text == '' ) { - $text = $nt->getPrefixedText(); - } - return $prefix . $text . $trail; - } -} - -/** - * @todo document - * @addtogroup Skins - */ -class HTMLDumpTemplate extends QuickTemplate { - /** - * Template filter callback for MonoBook skin. - * Takes an associative array of data set from a SkinTemplate-based - * class, and a wrapper for MediaWiki's localization database, and - * outputs a formatted page. - * - * @private - */ - function execute() { - wfSuppressWarnings(); -?> - - - - html('headlinks') ?> - <?php $this->text('pagetitle') ?> - - - - - - - - - - - data['jsvarurl' ]) { ?> - data['pagecss' ]) { ?> - data['usercss' ]) { ?> - data['userjs' ]) { ?> - data['userjsprev']) { ?> - - data['nsclass' ]) { ?>class="text('nsclass') ?>"> -
-
-
- - data['sitenotice']) { ?>
html('sitenotice') ?>
-

data['displaytitle']!=""?$this->html('title'):$this->text('title') ?>

-
-

msg('tagline') ?>

-
html('subtitle') ?>
- data['undelete']) { ?>
html('undelete') ?>
- data['newtalk'] ) { ?>
html('newtalk') ?>
- - html('bodytext') ?> - data['catlinks']) { ?> - -
-
-
-
-
-
-
Views
-
    - data['content_actions'] as $key => $action) { - ?>
  • class="" - >
  • -
-
- - - data['sidebar'] as $bar => $cont) { ?> -
-
-
-
    - $val) { ?> -
  • - -
-
-
- - - data['language_urls'] ) { ?>
-
msg('otherlanguages') ?>
-
-
    - data['language_urls'] as $langlink) { ?> -
  • - -
  • - -
-
-
- -
-
- -
- - - diff --git a/skins/htmldump/lookup.js b/skins/htmldump/lookup.js deleted file mode 100644 index 5fd8d019a1..0000000000 --- a/skins/htmldump/lookup.js +++ /dev/null @@ -1,91 +0,0 @@ -/** - * "Go" function for static HTML dump - */ -function goToStatic(depth) { - var url = getStaticURL(document.getElementById("searchInput").value, depth); - if (url != "") { - location = url; - } else { - alert("Invalid title"); - } -} - -/** - * Determine relative path for a given non-canonical title - */ -function getStaticURL(text, depth) { - var pdbk = getPDBK(text); - if (pdbk == "") { - return ""; - } else { - var i; - var path = getHashedDirectory(pdbk, depth) + "/" + getFriendlyName(pdbk) + ".html"; - if (!/(index\.html|\/)$/.exec(location)) { - for (i = 0; i < depth; i++) { - path = "../" + path; - } - } - return path; - } -} - -function getPDBK(text) { - // Spaces to underscores - text = text.replace(/ /g, "_"); - - // Trim leading and trailing space - text = text.replace(/^_+/g, ""); - text = text.replace(/_+$/g, ""); - - // Capitalise first letter - return ucfirst(text); -} - -function getHashedDirectory(pdbk, depth) { - // Find the first colon if there is one, use characters after it - var dbk = pdbk.replace(/^[^:]*:_*(.*)$/, "$1"); - var i, c, dir = ""; - - for (i=0; i < depth; i++) { - if (i) { - dir += "/"; - } - if (i >= dbk.length) { - dir += "_"; - } else { - c = dbk.charAt(i); - cc = dbk.charCodeAt(i); - - if (cc >= 128 || /[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/.exec(c)) { - dir += c.toLowerCase(); - } else { - dir += binl2hex([cc]).substr(0,2).toUpperCase(); - } - } - } - return dir; -} - -function ucfirst(s) { - return s.charAt(0).toUpperCase() + s.substring(1, s.length); -} - -function getFriendlyName(name) { - // Replace illegal characters for Windows paths with underscores - var friendlyName = name.replace(/[\/\\*?"<>|~]/g, "_"); - - // Work out lower case form. We assume we're on a system with case-insensitive - // filenames, so unless the case is of a special form, we have to disambiguate - var lowerCase = ucfirst(name.toLowerCase()); - - // Make it mostly unique - if (lowerCase != friendlyName) { - friendlyName += "_" + hex_md5(_to_utf8(name)).substring(0, 4); - } - // Handle colon specially by replacing it with tilde - // Thus we reduce the number of paths with hashes appended - friendlyName = friendlyName.replace(":", "~"); - - return friendlyName; -} - diff --git a/skins/htmldump/main.css b/skins/htmldump/main.css deleted file mode 100644 index d1b4a92b11..0000000000 --- a/skins/htmldump/main.css +++ /dev/null @@ -1,9 +0,0 @@ -@import "../monobook/main.css"; - -#footer li { - display: block; -} -head:first-child + body #footer li { white-space: normal; } -.usermessage { display: none; } -.editsection { display: none; } - diff --git a/skins/htmldump/md5.js b/skins/htmldump/md5.js deleted file mode 100644 index 46d2aab7d1..0000000000 --- a/skins/htmldump/md5.js +++ /dev/null @@ -1,256 +0,0 @@ -/* - * A JavaScript implementation of the RSA Data Security, Inc. MD5 Message - * Digest Algorithm, as defined in RFC 1321. - * Version 2.1 Copyright (C) Paul Johnston 1999 - 2002. - * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet - * Distributed under the BSD License - * See http://pajhome.org.uk/crypt/md5 for more info. - */ - -/* - * Configurable variables. You may need to tweak these to be compatible with - * the server-side, but the defaults work in most cases. - */ -var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */ -var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */ -var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */ - -/* - * These are the functions you'll usually want to call - * They take string arguments and return either hex or base-64 encoded strings - */ -function hex_md5(s){ return binl2hex(core_md5(str2binl(s), s.length * chrsz));} -function b64_md5(s){ return binl2b64(core_md5(str2binl(s), s.length * chrsz));} -function str_md5(s){ return binl2str(core_md5(str2binl(s), s.length * chrsz));} -function hex_hmac_md5(key, data) { return binl2hex(core_hmac_md5(key, data)); } -function b64_hmac_md5(key, data) { return binl2b64(core_hmac_md5(key, data)); } -function str_hmac_md5(key, data) { return binl2str(core_hmac_md5(key, data)); } - -/* - * Perform a simple self-test to see if the VM is working - */ -function md5_vm_test() -{ - return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72"; -} - -/* - * Calculate the MD5 of an array of little-endian words, and a bit length - */ -function core_md5(x, len) -{ - /* append padding */ - x[len >> 5] |= 0x80 << ((len) % 32); - x[(((len + 64) >>> 9) << 4) + 14] = len; - - var a = 1732584193; - var b = -271733879; - var c = -1732584194; - var d = 271733878; - - for(var i = 0; i < x.length; i += 16) - { - var olda = a; - var oldb = b; - var oldc = c; - var oldd = d; - - a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936); - d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586); - c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819); - b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330); - a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897); - d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426); - c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341); - b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983); - a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416); - d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417); - c = md5_ff(c, d, a, b, x[i+10], 17, -42063); - b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162); - a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682); - d = md5_ff(d, a, b, c, x[i+13], 12, -40341101); - c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290); - b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329); - - a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510); - d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632); - c = md5_gg(c, d, a, b, x[i+11], 14, 643717713); - b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302); - a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691); - d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083); - c = md5_gg(c, d, a, b, x[i+15], 14, -660478335); - b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848); - a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438); - d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690); - c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961); - b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501); - a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467); - d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784); - c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473); - b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734); - - a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558); - d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463); - c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562); - b = md5_hh(b, c, d, a, x[i+14], 23, -35309556); - a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060); - d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353); - c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632); - b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640); - a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174); - d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222); - c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979); - b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189); - a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487); - d = md5_hh(d, a, b, c, x[i+12], 11, -421815835); - c = md5_hh(c, d, a, b, x[i+15], 16, 530742520); - b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651); - - a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844); - d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415); - c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905); - b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055); - a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571); - d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606); - c = md5_ii(c, d, a, b, x[i+10], 15, -1051523); - b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799); - a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359); - d = md5_ii(d, a, b, c, x[i+15], 10, -30611744); - c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380); - b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649); - a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070); - d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379); - c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259); - b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551); - - a = safe_add(a, olda); - b = safe_add(b, oldb); - c = safe_add(c, oldc); - d = safe_add(d, oldd); - } - return Array(a, b, c, d); - -} - -/* - * These functions implement the four basic operations the algorithm uses. - */ -function md5_cmn(q, a, b, x, s, t) -{ - return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s),b); -} -function md5_ff(a, b, c, d, x, s, t) -{ - return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t); -} -function md5_gg(a, b, c, d, x, s, t) -{ - return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t); -} -function md5_hh(a, b, c, d, x, s, t) -{ - return md5_cmn(b ^ c ^ d, a, b, x, s, t); -} -function md5_ii(a, b, c, d, x, s, t) -{ - return md5_cmn(c ^ (b | (~d)), a, b, x, s, t); -} - -/* - * Calculate the HMAC-MD5, of a key and some data - */ -function core_hmac_md5(key, data) -{ - var bkey = str2binl(key); - if(bkey.length > 16) bkey = core_md5(bkey, key.length * chrsz); - - var ipad = Array(16), opad = Array(16); - for(var i = 0; i < 16; i++) - { - ipad[i] = bkey[i] ^ 0x36363636; - opad[i] = bkey[i] ^ 0x5C5C5C5C; - } - - var hash = core_md5(ipad.concat(str2binl(data)), 512 + data.length * chrsz); - return core_md5(opad.concat(hash), 512 + 128); -} - -/* - * Add integers, wrapping at 2^32. This uses 16-bit operations internally - * to work around bugs in some JS interpreters. - */ -function safe_add(x, y) -{ - var lsw = (x & 0xFFFF) + (y & 0xFFFF); - var msw = (x >> 16) + (y >> 16) + (lsw >> 16); - return (msw << 16) | (lsw & 0xFFFF); -} - -/* - * Bitwise rotate a 32-bit number to the left. - */ -function bit_rol(num, cnt) -{ - return (num << cnt) | (num >>> (32 - cnt)); -} - -/* - * Convert a string to an array of little-endian words - * If chrsz is ASCII, characters >255 have their hi-byte silently ignored. - */ -function str2binl(str) -{ - var bin = Array(); - var mask = (1 << chrsz) - 1; - for(var i = 0; i < str.length * chrsz; i += chrsz) - bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (i%32); - return bin; -} - -/* - * Convert an array of little-endian words to a string - */ -function binl2str(bin) -{ - var str = ""; - var mask = (1 << chrsz) - 1; - for(var i = 0; i < bin.length * 32; i += chrsz) - str += String.fromCharCode((bin[i>>5] >>> (i % 32)) & mask); - return str; -} - -/* - * Convert an array of little-endian words to a hex string. - */ -function binl2hex(binarray) -{ - var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef"; - var str = ""; - for(var i = 0; i < binarray.length * 4; i++) - { - str += hex_tab.charAt((binarray[i>>2] >> ((i%4)*8+4)) & 0xF) + - hex_tab.charAt((binarray[i>>2] >> ((i%4)*8 )) & 0xF); - } - return str; -} - -/* - * Convert an array of little-endian words to a base-64 string - */ -function binl2b64(binarray) -{ - var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - var str = ""; - for(var i = 0; i < binarray.length * 4; i += 3) - { - var triplet = (((binarray[i >> 2] >> 8 * ( i %4)) & 0xFF) << 16) - | (((binarray[i+1 >> 2] >> 8 * ((i+1)%4)) & 0xFF) << 8 ) - | ((binarray[i+2 >> 2] >> 8 * ((i+2)%4)) & 0xFF); - for(var j = 0; j < 4; j++) - { - if(i * 8 + j * 6 > binarray.length * 32) str += b64pad; - else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F); - } - } - return str; -} diff --git a/skins/htmldump/utf8.js b/skins/htmldump/utf8.js deleted file mode 100644 index ea3b890c64..0000000000 --- a/skins/htmldump/utf8.js +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Obtained from http://homepage3.nifty.com/aokura/jscript/index.html - * The webpage says, among other things: - * * ソースコードの全てあるいは一部を使用したことにより生じた損害に関しては一切責任を負いません。 - * * ソースコードの使用、配布に制限はありません。ご自由にお使いください。 - * * 動作チェックが不充分な場合もありますので、注意してください。 - * - * Which, loosely translated, means: - * * The author takes no responsibility for damage which occurs due to the use of this code. - * * There is no restriction on the use and distribution of the source code. Please use freely. - * * Please be careful, testing may have been insufficient. - */ - - -/********************************************************************** - * - * Unicode ⇔ UTF-8 - * - * Copyright (c) 2005 AOK - * - **********************************************************************/ - -function _to_utf8(s) { - var c, d = ""; - for (var i = 0; i < s.length; i++) { - c = s.charCodeAt(i); - if (c <= 0x7f) { - d += s.charAt(i); - } else if (c >= 0x80 && c <= 0x7ff) { - d += String.fromCharCode(((c >> 6) & 0x1f) | 0xc0); - d += String.fromCharCode((c & 0x3f) | 0x80); - } else { - d += String.fromCharCode((c >> 12) | 0xe0); - d += String.fromCharCode(((c >> 6) & 0x3f) | 0x80); - d += String.fromCharCode((c & 0x3f) | 0x80); - } - } - return d; -} - -function _from_utf8(s) { - var c, d = "", flag = 0, tmp; - for (var i = 0; i < s.length; i++) { - c = s.charCodeAt(i); - if (flag == 0) { - if ((c & 0xe0) == 0xe0) { - flag = 2; - tmp = (c & 0x0f) << 12; - } else if ((c & 0xc0) == 0xc0) { - flag = 1; - tmp = (c & 0x1f) << 6; - } else if ((c & 0x80) == 0) { - d += s.charAt(i); - } else { - flag = 0; - } - } else if (flag == 1) { - flag = 0; - d += String.fromCharCode(tmp | (c & 0x3f)); - } else if (flag == 2) { - flag = 3; - tmp |= (c & 0x3f) << 6; - } else if (flag == 3) { - flag = 0; - d += String.fromCharCode(tmp | (c & 0x3f)); - } else { - flag = 0; - } - } - return d; -} - -- 2.20.1