From 22e60a4772bcdc4765ecb00b254644d26b8b16e9 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Sat, 22 Oct 2005 16:25:05 +0000 Subject: [PATCH] dumpHTML improvements: * Removed dumpHTML-related clutter from Title.php, using hook functions instead * Prominent link to the online copy * Better directory distribution * action=raw support * Fixed unicode case folding (hopefully) * Added cool JavaScript "go" button --- includes/RawPage.php | 44 ++++--- includes/Title.php | 167 +++++++---------------- maintenance/dumpHTML.inc | 194 ++++++++++++++++++++++++++- maintenance/dumpHTML.php | 18 +-- skins/disabled/HTMLDump.php | 46 ++++--- skins/htmldump/lookup.js | 91 +++++++++++++ skins/htmldump/md5.js | 256 ++++++++++++++++++++++++++++++++++++ skins/htmldump/utf8.js | 72 ++++++++++ 8 files changed, 722 insertions(+), 166 deletions(-) create mode 100644 skins/htmldump/lookup.js create mode 100644 skins/htmldump/md5.js create mode 100644 skins/htmldump/utf8.js diff --git a/includes/RawPage.php b/includes/RawPage.php index 147392293d..67b464ae42 100644 --- a/includes/RawPage.php +++ b/includes/RawPage.php @@ -18,19 +18,27 @@ require_once( 'Revision.php' ); * @package MediaWiki */ class RawPage { + var $mArticle, $mTitle, $mRequest; - function RawPage( $article ) { + function RawPage( &$article, $request = false ) { global $wgRequest, $wgInputEncoding, $wgSquidMaxage, $wgJsMimeType; + $allowedCTypes = array('text/x-wiki', $wgJsMimeType, 'text/css', 'application/x-zope-edit'); $this->mArticle =& $article; $this->mTitle =& $article->mTitle; + + if ( $request === false ) { + $this->mRequest =& $wgRequest; + } else { + $this->mRequest = $request; + } - $ctype = $wgRequest->getText( 'ctype' ); - $smaxage = $wgRequest->getInt( 'smaxage', $wgSquidMaxage ); - $maxage = $wgRequest->getInt( 'maxage', $wgSquidMaxage ); - $this->mOldId = $wgRequest->getInt( 'oldid' ); + $ctype = $this->mRequest->getText( 'ctype' ); + $smaxage = $this->mRequest->getInt( 'smaxage', $wgSquidMaxage ); + $maxage = $this->mRequest->getInt( 'maxage', $wgSquidMaxage ); + $this->mOldId = $this->mRequest->getInt( 'oldid' ); # special case for 'generated' raw things: user css/js - $gen = $wgRequest->getText( 'gen' ); + $gen = $this->mRequest->getText( 'gen' ); if($gen == 'css') { $this->mGen = $gen; if($smaxage == '') $smaxage = $wgSquidMaxage; @@ -53,7 +61,7 @@ class RawPage { } function view() { - global $wgUser, $wgOut, $wgScript; + global $wgOut, $wgScript; if( isset( $_SERVER['SCRIPT_URL'] ) ) { # Normally we use PHP_SELF to get the URL to the script @@ -91,24 +99,26 @@ class RawPage { header( "Content-type: ".$this->mContentType.'; charset='.$this->mCharset ); # allow the client to cache this for 24 hours header( 'Cache-Control: s-maxage='.$this->mSmaxage.', max-age='.$this->mMaxage ); + echo $this->getRawText(); + $wgOut->disable(); + } + + function getRawText() { + global $wgUser, $wgOut; if($this->mGen) { $sk = $wgUser->getSkin(); $sk->initPage($wgOut); if($this->mGen == 'css') { - echo $sk->getUserStylesheet(); + return $sk->getUserStylesheet(); } else if($this->mGen == 'js') { - echo $sk->getUserJs(); + return $sk->getUserJs(); } } else { - echo $this->getrawtext(); + return $this->getArticleText(); } - $wgOut->disable(); - } - - function getrawtext () { - global $wgInputEncoding, $wgContLang; - $fname = 'RawPage::getrawtext'; - + } + + function getArticleText () { if( $this->mTitle ) { # Special case for MediaWiki: messages; we can hit the message cache. if( $this->mTitle->getNamespace() == NS_MEDIAWIKI) { diff --git a/includes/Title.php b/includes/Title.php index 69e18a21d2..a6ae3cb18b 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -659,108 +659,33 @@ class Title { * @access public */ function getFullURL( $query = '' ) { - global $wgContLang, $wgServer, $wgScript, $wgMakeDumpLinks, $wgArticlePath; + global $wgContLang, $wgServer; if ( '' == $this->mInterwiki ) { - return $wgServer . $this->getLocalUrl( $query ); - } elseif ( $wgMakeDumpLinks && $wgContLang->getLanguageName( $this->mInterwiki ) ) { - if ( $this->getDBkey() == '' ) { - $url = str_replace( '$1', "../{$this->mInterwiki}/index.html", $wgArticlePath ); - } else { - $url = str_replace( '$1', "../{$this->mInterwiki}/" . $this->getHashedFilename() , - $wgArticlePath ); - } - return $url; + $url = $wgServer . $this->getLocalUrl( $query ); } else { $baseUrl = $this->getInterwikiLink( $this->mInterwiki ); - } - $namespace = $wgContLang->getNsText( $this->mNamespace ); - if ( '' != $namespace ) { - # Can this actually happen? Interwikis shouldn't be parsed. - $namespace .= ':'; - } - $url = str_replace( '$1', $namespace . $this->mUrlform, $baseUrl ); - if( $query != '' ) { - if( false === strpos( $url, '?' ) ) { - $url .= '?'; - } else { - $url .= '&'; + $namespace = $wgContLang->getNsText( $this->mNamespace ); + if ( '' != $namespace ) { + # Can this actually happen? Interwikis shouldn't be parsed. + $namespace .= ':'; } - $url .= $query; - } - if ( '' != $this->mFragment ) { - $url .= '#' . $this->mFragment; - } - return $url; - } - - /** - * Get a relative directory for putting an HTML version of this article into - */ - function getHashedDirectory() { - global $wgMakeDumpLinks, $wgInputEncoding; - if ( '' != $this->mInterwiki ) { - $pdbk = $this->mDbkeyform; - } else { - $pdbk = $this->getPrefixedDBkey(); - } - - # Split into characters - if ( $wgInputEncoding == 'UTF-8' ) { - preg_match_all( '/./us', $pdbk, $m ); - } else { - preg_match_all( '/./s', $pdbk, $m ); - } - $chars = $m[0]; - $length = count( $chars ); - $dir = ''; - - for ( $i = 0; $i < $wgMakeDumpLinks; $i++ ) { - $c = $chars[$i]; - if ( $i ) { - $dir .= '/'; + $url = str_replace( '$1', $namespace . $this->mUrlform, $baseUrl ); + if( $query != '' ) { + if( false === strpos( $url, '?' ) ) { + $url .= '?'; + } else { + $url .= '&'; + } + $url .= $query; } - if ( $i >= $length ) { - $dir .= '_'; - } elseif ( ord( $c ) >= 128 || ctype_alnum( $c ) ) { - $dir .= strtolower( $c ); - } else { - $dir .= sprintf( "%02X", ord( $c ) ); + if ( '' != $this->mFragment ) { + $url .= '#' . $this->mFragment; } } - return $dir; - } - - function getHashedFilename() { - if ( '' != $this->mInterwiki ) { - $dbkey = $this->getDBkey(); - } else { - $dbkey = $this->getPrefixedDBkey(); - } - - $mainPage = Title::newMainPage(); - if ( $mainPage->getPrefixedDBkey() == $dbkey ) { - return 'index.html'; - } - - $dir = $this->getHashedDirectory(); - - # Replace illegal charcters for Windows paths with underscores - $friendlyName = strtr( $dbkey, '/\\*?"<>|~', '_________' ); - - # Work out lower case form. We assume we're on a system with case-insensitive - # filenames, so unless the case is of a special form, we have to disambiguate - $lowerCase = ucfirst( strtolower( $dbkey ) ); - - # Make it mostly unique - if ( $lowerCase != $friendlyName ) { - $friendlyName .= '_' . substr(md5( $dbkey ), 0, 4); - } - # Handle colon specially by replacing it with tilde - # Thus we reduce the number of paths with hashes appended - $friendlyName = str_replace( ':', '~', $friendlyName ); - return "$dir/$friendlyName.html"; + wfRunHooks( 'GetFullURL', array( &$this, &$url, $query ) ); + return $url; } /** @@ -772,40 +697,42 @@ class Title { * @access public */ function getLocalURL( $query = '' ) { - global $wgLang, $wgArticlePath, $wgScript, $wgMakeDumpLinks, $wgServer, $action; + global $wgArticlePath, $wgScript, $wgServer, $wgRequest; if ( $this->isExternal() ) { - return $this->getFullURL(); - } - - $dbkey = wfUrlencode( $this->getPrefixedDBkey() ); - if ( $wgMakeDumpLinks ) { - $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename() ), $wgArticlePath ); - } elseif ( $query == '' ) { - $url = str_replace( '$1', $dbkey, $wgArticlePath ); + $url = $this->getFullURL(); } else { - global $wgActionPaths; - if( !empty( $wgActionPaths ) && - preg_match( '/^(.*&|)action=([^&]*)(&(.*)|)$/', $query, $matches ) ) { - $action = urldecode( $matches[2] ); - if( isset( $wgActionPaths[$action] ) ) { - $query = $matches[1]; - if( isset( $matches[4] ) ) $query .= $matches[4]; - $url = str_replace( '$1', $dbkey, $wgActionPaths[$action] ); - if( $query != '' ) $url .= '?' . $query; - return $url; + $dbkey = wfUrlencode( $this->getPrefixedDBkey() ); + if ( $query == '' ) { + $url = str_replace( '$1', $dbkey, $wgArticlePath ); + } else { + global $wgActionPaths; + $url = false; + if( !empty( $wgActionPaths ) && + preg_match( '/^(.*&|)action=([^&]*)(&(.*)|)$/', $query, $matches ) ) + { + $action = urldecode( $matches[2] ); + if( isset( $wgActionPaths[$action] ) ) { + $query = $matches[1]; + if( isset( $matches[4] ) ) $query .= $matches[4]; + $url = str_replace( '$1', $dbkey, $wgActionPaths[$action] ); + if( $query != '' ) $url .= '?' . $query; + } + } + if ( $url === false ) { + if ( $query == '-' ) { + $query = ''; + } + $url = "{$wgScript}?title={$dbkey}&{$query}"; } } - if ( $query == '-' ) { - $query = ''; + + if ($wgRequest->getText('action') == 'render') { + $url = $wgServer . $url; } - $url = "{$wgScript}?title={$dbkey}&{$query}"; } - - if ($action == 'render') - return $wgServer . $url; - else - return $url; + wfRunHooks( 'GetLocalURL', array( &$this, &$url, $query ) ); + return $url; } /** diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc index 8697f0434a..1549e470df 100644 --- a/maintenance/dumpHTML.inc +++ b/maintenance/dumpHTML.inc @@ -8,6 +8,7 @@ define( 'REPORTING_INTERVAL', 10 ); require_once( 'includes/ImagePage.php' ); require_once( 'includes/CategoryPage.php' ); +require_once( 'includes/RawPage.php' ); class DumpHTML { # Destination directory @@ -32,6 +33,15 @@ class DumpHTML { # the destination var $alternateScriptPath = false; + # Original article path, for "current version" links + var $oldArticlePath = false; + + # Has setupGlobals been called? + var $setupDone = false; + + # List of raw pages used in the current article + var $rawPages; + function DumpHTML( $settings ) { foreach ( $settings as $var => $value ) { $this->$var = $value; @@ -52,6 +62,9 @@ class DumpHTML { $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); } + $mainPageObj = Title::newMainPage(); + $mainPage = $mainPageObj->getPrefixedDBkey(); + for ($id = $start; $id <= $end; $id++) { wfWaitForSlaves( 20 ); @@ -64,7 +77,7 @@ class DumpHTML { $title = Title::newFromID( $id ); if ( $title ) { $ns = $title->getNamespace() ; - if ( $ns != NS_CATEGORY ) { + if ( $ns != NS_CATEGORY && $title->getPrefixedDBkey() != $mainPage ) { $this->doArticle( $title ); } } @@ -207,9 +220,15 @@ class DumpHTML { /** Write an article specified by title */ function doArticle( $title ) { + // Testing + if ( $title->getNamespace() == 8 ) { + return; + } + global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; global $wgUploadDirectory; - + + $this->rawPages = array(); $text = $this->getArticleHTML( $title ); if ( $text === false ) { @@ -222,11 +241,34 @@ class DumpHTML { # Write to file $this->writeArticle( $title, $text ); + + # Do raw pages + wfMkdirParents( "{$this->dest}/raw", 0755 ); + foreach( $this->rawPages as $record ) { + list( $file, $title, $params ) = $record; + + $path = "{$this->dest}/raw/$file"; + if ( !file_exists( $path ) ) { + $article = new Article( $title ); + $request = new FauxRequest( $params ); + $rp = new RawPage( $article, $request ); + $text = $rp->getRawText(); + + print "Writing $file\n"; + $file = fopen( $path, 'w' ); + if ( !$file ) { + print("Can't open file $fullName for writing\n"); + continue; + } + fwrite( $file, $text ); + fclose( $file ); + } + } } /** Write the given text to the file identified by the given title object */ function writeArticle( &$title, $text ) { - $filename = $title->getHashedFilename(); + $filename = $this->getHashedFilename( $title ); $fullName = "{$this->dest}/$filename"; $fullDir = dirname( $fullName ); @@ -247,10 +289,17 @@ class DumpHTML { global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath; global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; - global $wgSharedThumbnailScriptPath, $wgEnableParserCache; + global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer; + global $wgRightsUrl, $wgRightsText; static $oldLogo = NULL; + if ( !$this->setupDone ) { + $wgHooks['GetLocalURL'][] =& $this; + $wgHooks['GetFullURL'][] =& $this; + $this->oldArticlePath = $wgServer . $wgArticlePath; + } + if ( is_null( $depth ) ) { $wgMakeDumpLinks = $this->depth; } else { @@ -294,6 +343,10 @@ class DumpHTML { $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; $wgEnableParserCache = false; $wgMathPath = "$wgScriptPath/math"; + + if ( !empty( $wgRightsText ) ) { + $wgRightsUrl = "$wgScriptPath/COPYING.html"; + } $wgUser = new User; $wgUser->setOption( 'skin', 'htmldump' ); @@ -301,6 +354,7 @@ class DumpHTML { $this->sharedStaticPath = "$wgUploadDirectory/shared"; + $this->setupDone = true; } /** Reads the content of a title object, executes the skin and captures the result */ @@ -438,6 +492,138 @@ ENDTEXT; } } } + + function onGetFullURL( &$title, &$url, $query ) { + global $wgContLang, $wgArticlePath; + + $iw = $title->getInterwiki(); + if ( $title->isExternal() && $wgContLang->getLanguageName( $iw ) ) { + if ( $title->getDBkey() == '' ) { + $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath ); + } else { + $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ), + $wgArticlePath ); + } + return false; + } else { + return true; + } + } + + function onGetLocalURL( &$title, &$url, $query ) { + global $wgArticlePath; + + if ( $title->isExternal() ) { + # Default is fine for interwiki + return true; + } + + $url = false; + if ( $query != '' ) { + parse_str( $query, $params ); + if ( isset($params['action']) && $params['action'] == 'raw' ) { + if ( $params['gen'] == 'css' || $params['gen'] == 'js' ) { + $file = 'gen.' . $params['gen']; + } else { + $file = $this->getFriendlyName( $title->getPrefixedDBkey() ); + // Clean up Monobook.css etc. + if ( preg_match( '/^(.*)\.(css|js)_[0-9a-f]{4}$/', $file, $matches ) ) { + $file = $matches[1] . '.' . $matches[2]; + } + } + $this->rawPages[$file] = array( $file, $title, $params ); + $url = str_replace( '$1', "raw/" . wfUrlencode( $file ), $wgArticlePath ); + } + } + if ( $url === false ) { + $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); + } + return false; + } + + function getHashedFilename( &$title ) { + if ( '' != $title->mInterwiki ) { + $dbkey = $title->getDBkey(); + } else { + $dbkey = $title->getPrefixedDBkey(); + } + + $mainPage = Title::newMainPage(); + if ( $mainPage->getPrefixedDBkey() == $dbkey ) { + return 'index.html'; + } + + return $this->getHashedDirectory( $title ) . '/' . + $this->getFriendlyName( $dbkey ) . '.html'; + } + + function getFriendlyName( $name ) { + # Replace illegal characters for Windows paths with underscores + $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' ); + + # Work out lower case form. We assume we're on a system with case-insensitive + # filenames, so unless the case is of a special form, we have to disambiguate + $lowerCase = ucfirst( strtolower( $name ) ); + + # Make it mostly unique + if ( $lowerCase != $friendlyName ) { + $friendlyName .= '_' . substr(md5( $name ), 0, 4); + } + # Handle colon specially by replacing it with tilde + # Thus we reduce the number of paths with hashes appended + $friendlyName = str_replace( ':', '~', $friendlyName ); + + return $friendlyName; + } + + /** + * Get a relative directory for putting a title into + */ + function getHashedDirectory( &$title ) { + if ( '' != $title->getInterwiki() ) { + $pdbk = $title->getDBkey(); + } else { + $pdbk = $title->getPrefixedDBkey(); + } + + # Find the first colon if there is one, use characters after it + $p = strpos( $pdbk, ':' ); + if ( $p !== false ) { + $dbk = substr( $pdbk, $p + 1 ); + $dbk = substr( $dbk, strspn( $dbk, '_' ) ); + } else { + $dbk = $pdbk; + } + + # Split into characters + preg_match_all( '/./us', $dbk, $m ); + + $chars = $m[0]; + $length = count( $chars ); + $dir = ''; + + for ( $i = 0; $i < $this->depth; $i++ ) { + if ( $i ) { + $dir .= '/'; + } + if ( $i >= $length ) { + $dir .= '_'; + } else { + $c = $chars[$i]; + if ( ord( $c ) >= 128 || ctype_alnum( $c ) ) { + if ( function_exists( 'mb_strtolower' ) ) { + $dir .= mb_strtolower( $c ); + } else { + $dir .= strtolower( $c ); + } + } else { + $dir .= sprintf( "%02X", ord( $c ) ); + } + } + } + return $dir; + } + } /** XML parser callback */ diff --git a/maintenance/dumpHTML.php b/maintenance/dumpHTML.php index f8d7b3b61c..3ffdf814fc 100644 --- a/maintenance/dumpHTML.php +++ b/maintenance/dumpHTML.php @@ -60,7 +60,7 @@ if ( !empty( $options['d'] ) ) { $dest = 'static'; } -$d = new DumpHTML( array( +$wgHTMLDump = new DumpHTML( array( 'dest' => $dest, 'forceCopy' => $options['force-copy'], 'alternateScriptPath' => $options['interlang'], @@ -69,13 +69,13 @@ $d = new DumpHTML( array( if ( $options['special'] ) { - $d->doSpecials(); + $wgHTMLDump->doSpecials(); } elseif ( $options['images'] ) { - $d->doImageDescriptions(); + $wgHTMLDump->doImageDescriptions(); } elseif ( $options['categories'] ) { - $d->doCategories(); + $wgHTMLDump->doCategories(); } elseif ( $options['redirects'] ) { - $d->doRedirects(); + $wgHTMLDump->doRedirects(); } else { print("Creating static HTML dump in directory $dest. \n". "Starting from page_id $start of $end.\n"); @@ -83,11 +83,11 @@ if ( $options['special'] ) { $dbr =& wfGetDB( DB_SLAVE ); print "Using database {$dbr->mServer}\n"; - $d->doArticles( $start, $end ); + $wgHTMLDump->doArticles( $start, $end ); if ( !isset( $options['e'] ) ) { - $d->doImageDescriptions(); - $d->doCategories(); - $d->doSpecials(); + $wgHTMLDump->doImageDescriptions(); + $wgHTMLDump->doCategories(); + $wgHTMLDump->doSpecials(); } /* diff --git a/skins/disabled/HTMLDump.php b/skins/disabled/HTMLDump.php index 58970621c1..0a9ac24d97 100644 --- a/skins/disabled/HTMLDump.php +++ b/skins/disabled/HTMLDump.php @@ -42,6 +42,8 @@ class SkinHTMLDump extends SkinTemplate { } function buildContentActionUrls() { + global $wgHTMLDump; + $content_actions = array(); $nskey = $this->getNameSpaceKey(); $content_actions[$nskey] = $this->tabAction( @@ -55,6 +57,15 @@ class SkinHTMLDump extends SkinTemplate { $this->mTitle->isTalkPage(), '', true); + + if ( isset( $wgHTMLDump ) ) { + $content_actions['current'] = array( + 'text' => wfMsg( 'currentrev' ), + 'href' => str_replace( '$1', wfUrlencode( $this->mTitle->getPrefixedDBkey() ), + $wgHTMLDump->oldArticlePath ), + 'class' => false + ); + } return $content_actions; } @@ -89,20 +100,6 @@ class HTMLDumpTemplate extends QuickTemplate { * @access private */ function execute() { - $this->modifySetup(); - $this->reallyExecute(); - } - - - function modifySetup() { - /* - foreach ( $this->data['navigation_urls'] as $index => $link ) { - if ( $link['text'] == 'recentchanges' ) { - unset( $this->data['navigation_urls'][$index] ); - } elseif ( $link['text'] */ - } - - function reallyExecute() { wfSuppressWarnings(); ?> @@ -110,7 +107,7 @@ class HTMLDumpTemplate extends QuickTemplate { html('headlinks') ?> <?php $this->text('pagetitle') ?> - + @@ -119,6 +116,10 @@ class HTMLDumpTemplate extends QuickTemplate { data['jsvarurl' ]) { ?> + + + + data['pagecss' ]) { ?> data['nsclass' ]) { ?>class="text('nsclass') ?>"> @@ -171,6 +172,20 @@ class HTMLDumpTemplate extends QuickTemplate { + data['language_urls'] ) { ?>
msg('otherlanguages') ?>
@@ -201,7 +216,6 @@ class HTMLDumpTemplate extends QuickTemplate {
- html('reporttime') ?> = dbk.length) { + dir += "_"; + } else { + c = dbk.charAt(i); + cc = dbk.charCodeAt(i); + + if (cc >= 128 || /[a-zA-Z0-9]/.exec(c)) { + dir += c.toLowerCase(); + } else { + dir += binl2hex([cc]); + } + } + } + return dir; +} + +function ucfirst(s) { + return s.charAt(0).toUpperCase() + s.substring(1, s.length); +} + +function getFriendlyName(name) { + // Replace illegal characters for Windows paths with underscores + var friendlyName = name.replace(/[\/\\*?"<>|~]/g, "_"); + + // Work out lower case form. We assume we're on a system with case-insensitive + // filenames, so unless the case is of a special form, we have to disambiguate + var lowerCase = ucfirst(name.toLowerCase()); + + // Make it mostly unique + if (lowerCase != friendlyName) { + friendlyName += "_" + hex_md5(name).substring(0, 4); + } + // Handle colon specially by replacing it with tilde + // Thus we reduce the number of paths with hashes appended + friendlyName = friendlyName.replace(":", "~"); + + return friendlyName; +} + diff --git a/skins/htmldump/md5.js b/skins/htmldump/md5.js new file mode 100644 index 0000000000..46d2aab7d1 --- /dev/null +++ b/skins/htmldump/md5.js @@ -0,0 +1,256 @@ +/* + * A JavaScript implementation of the RSA Data Security, Inc. MD5 Message + * Digest Algorithm, as defined in RFC 1321. + * Version 2.1 Copyright (C) Paul Johnston 1999 - 2002. + * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet + * Distributed under the BSD License + * See http://pajhome.org.uk/crypt/md5 for more info. + */ + +/* + * Configurable variables. You may need to tweak these to be compatible with + * the server-side, but the defaults work in most cases. + */ +var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */ +var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */ +var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */ + +/* + * These are the functions you'll usually want to call + * They take string arguments and return either hex or base-64 encoded strings + */ +function hex_md5(s){ return binl2hex(core_md5(str2binl(s), s.length * chrsz));} +function b64_md5(s){ return binl2b64(core_md5(str2binl(s), s.length * chrsz));} +function str_md5(s){ return binl2str(core_md5(str2binl(s), s.length * chrsz));} +function hex_hmac_md5(key, data) { return binl2hex(core_hmac_md5(key, data)); } +function b64_hmac_md5(key, data) { return binl2b64(core_hmac_md5(key, data)); } +function str_hmac_md5(key, data) { return binl2str(core_hmac_md5(key, data)); } + +/* + * Perform a simple self-test to see if the VM is working + */ +function md5_vm_test() +{ + return hex_md5("abc") == "900150983cd24fb0d6963f7d28e17f72"; +} + +/* + * Calculate the MD5 of an array of little-endian words, and a bit length + */ +function core_md5(x, len) +{ + /* append padding */ + x[len >> 5] |= 0x80 << ((len) % 32); + x[(((len + 64) >>> 9) << 4) + 14] = len; + + var a = 1732584193; + var b = -271733879; + var c = -1732584194; + var d = 271733878; + + for(var i = 0; i < x.length; i += 16) + { + var olda = a; + var oldb = b; + var oldc = c; + var oldd = d; + + a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936); + d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586); + c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819); + b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330); + a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897); + d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426); + c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341); + b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983); + a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416); + d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417); + c = md5_ff(c, d, a, b, x[i+10], 17, -42063); + b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162); + a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682); + d = md5_ff(d, a, b, c, x[i+13], 12, -40341101); + c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290); + b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329); + + a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510); + d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632); + c = md5_gg(c, d, a, b, x[i+11], 14, 643717713); + b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302); + a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691); + d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083); + c = md5_gg(c, d, a, b, x[i+15], 14, -660478335); + b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848); + a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438); + d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690); + c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961); + b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501); + a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467); + d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784); + c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473); + b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734); + + a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558); + d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463); + c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562); + b = md5_hh(b, c, d, a, x[i+14], 23, -35309556); + a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060); + d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353); + c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632); + b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640); + a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174); + d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222); + c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979); + b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189); + a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487); + d = md5_hh(d, a, b, c, x[i+12], 11, -421815835); + c = md5_hh(c, d, a, b, x[i+15], 16, 530742520); + b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651); + + a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844); + d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415); + c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905); + b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055); + a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571); + d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606); + c = md5_ii(c, d, a, b, x[i+10], 15, -1051523); + b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799); + a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359); + d = md5_ii(d, a, b, c, x[i+15], 10, -30611744); + c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380); + b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649); + a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070); + d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379); + c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259); + b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551); + + a = safe_add(a, olda); + b = safe_add(b, oldb); + c = safe_add(c, oldc); + d = safe_add(d, oldd); + } + return Array(a, b, c, d); + +} + +/* + * These functions implement the four basic operations the algorithm uses. + */ +function md5_cmn(q, a, b, x, s, t) +{ + return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s),b); +} +function md5_ff(a, b, c, d, x, s, t) +{ + return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t); +} +function md5_gg(a, b, c, d, x, s, t) +{ + return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t); +} +function md5_hh(a, b, c, d, x, s, t) +{ + return md5_cmn(b ^ c ^ d, a, b, x, s, t); +} +function md5_ii(a, b, c, d, x, s, t) +{ + return md5_cmn(c ^ (b | (~d)), a, b, x, s, t); +} + +/* + * Calculate the HMAC-MD5, of a key and some data + */ +function core_hmac_md5(key, data) +{ + var bkey = str2binl(key); + if(bkey.length > 16) bkey = core_md5(bkey, key.length * chrsz); + + var ipad = Array(16), opad = Array(16); + for(var i = 0; i < 16; i++) + { + ipad[i] = bkey[i] ^ 0x36363636; + opad[i] = bkey[i] ^ 0x5C5C5C5C; + } + + var hash = core_md5(ipad.concat(str2binl(data)), 512 + data.length * chrsz); + return core_md5(opad.concat(hash), 512 + 128); +} + +/* + * Add integers, wrapping at 2^32. This uses 16-bit operations internally + * to work around bugs in some JS interpreters. + */ +function safe_add(x, y) +{ + var lsw = (x & 0xFFFF) + (y & 0xFFFF); + var msw = (x >> 16) + (y >> 16) + (lsw >> 16); + return (msw << 16) | (lsw & 0xFFFF); +} + +/* + * Bitwise rotate a 32-bit number to the left. + */ +function bit_rol(num, cnt) +{ + return (num << cnt) | (num >>> (32 - cnt)); +} + +/* + * Convert a string to an array of little-endian words + * If chrsz is ASCII, characters >255 have their hi-byte silently ignored. + */ +function str2binl(str) +{ + var bin = Array(); + var mask = (1 << chrsz) - 1; + for(var i = 0; i < str.length * chrsz; i += chrsz) + bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (i%32); + return bin; +} + +/* + * Convert an array of little-endian words to a string + */ +function binl2str(bin) +{ + var str = ""; + var mask = (1 << chrsz) - 1; + for(var i = 0; i < bin.length * 32; i += chrsz) + str += String.fromCharCode((bin[i>>5] >>> (i % 32)) & mask); + return str; +} + +/* + * Convert an array of little-endian words to a hex string. + */ +function binl2hex(binarray) +{ + var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef"; + var str = ""; + for(var i = 0; i < binarray.length * 4; i++) + { + str += hex_tab.charAt((binarray[i>>2] >> ((i%4)*8+4)) & 0xF) + + hex_tab.charAt((binarray[i>>2] >> ((i%4)*8 )) & 0xF); + } + return str; +} + +/* + * Convert an array of little-endian words to a base-64 string + */ +function binl2b64(binarray) +{ + var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + var str = ""; + for(var i = 0; i < binarray.length * 4; i += 3) + { + var triplet = (((binarray[i >> 2] >> 8 * ( i %4)) & 0xFF) << 16) + | (((binarray[i+1 >> 2] >> 8 * ((i+1)%4)) & 0xFF) << 8 ) + | ((binarray[i+2 >> 2] >> 8 * ((i+2)%4)) & 0xFF); + for(var j = 0; j < 4; j++) + { + if(i * 8 + j * 6 > binarray.length * 32) str += b64pad; + else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F); + } + } + return str; +} diff --git a/skins/htmldump/utf8.js b/skins/htmldump/utf8.js new file mode 100644 index 0000000000..ea3b890c64 --- /dev/null +++ b/skins/htmldump/utf8.js @@ -0,0 +1,72 @@ +/** + * Obtained from http://homepage3.nifty.com/aokura/jscript/index.html + * The webpage says, among other things: + * * ソースコードの全てあるいは一部を使用したことにより生じた損害に関しては一切責任を負いません。 + * * ソースコードの使用、配布に制限はありません。ご自由にお使いください。 + * * 動作チェックが不充分な場合もありますので、注意してください。 + * + * Which, loosely translated, means: + * * The author takes no responsibility for damage which occurs due to the use of this code. + * * There is no restriction on the use and distribution of the source code. Please use freely. + * * Please be careful, testing may have been insufficient. + */ + + +/********************************************************************** + * + * Unicode ⇔ UTF-8 + * + * Copyright (c) 2005 AOK + * + **********************************************************************/ + +function _to_utf8(s) { + var c, d = ""; + for (var i = 0; i < s.length; i++) { + c = s.charCodeAt(i); + if (c <= 0x7f) { + d += s.charAt(i); + } else if (c >= 0x80 && c <= 0x7ff) { + d += String.fromCharCode(((c >> 6) & 0x1f) | 0xc0); + d += String.fromCharCode((c & 0x3f) | 0x80); + } else { + d += String.fromCharCode((c >> 12) | 0xe0); + d += String.fromCharCode(((c >> 6) & 0x3f) | 0x80); + d += String.fromCharCode((c & 0x3f) | 0x80); + } + } + return d; +} + +function _from_utf8(s) { + var c, d = "", flag = 0, tmp; + for (var i = 0; i < s.length; i++) { + c = s.charCodeAt(i); + if (flag == 0) { + if ((c & 0xe0) == 0xe0) { + flag = 2; + tmp = (c & 0x0f) << 12; + } else if ((c & 0xc0) == 0xc0) { + flag = 1; + tmp = (c & 0x1f) << 6; + } else if ((c & 0x80) == 0) { + d += s.charAt(i); + } else { + flag = 0; + } + } else if (flag == 1) { + flag = 0; + d += String.fromCharCode(tmp | (c & 0x3f)); + } else if (flag == 2) { + flag = 3; + tmp |= (c & 0x3f) << 6; + } else if (flag == 3) { + flag = 0; + d += String.fromCharCode(tmp | (c & 0x3f)); + } else { + flag = 0; + } + } + return d; +} + -- 2.20.1