return $in;
}
+ function firstChar( $s ) {
+ # Get the first character of a string. In ASCII, return
+ # first byte of the string. UTF8 and others have to
+ # overload this.
+ return $s[0];
+ }
function setAltEncoding() {
# Some languages may have an alternate char encoding option
# Fall back to English if local list is incomplete
$raw =& Language::getMagicWords();
}
- $rawEntry = $raw[$mw->mId];
+ $rawEntry = $raw[$mw->mId];
$mw->mCaseSensitive = $rawEntry[0];
$mw->mSynonyms = array_slice( $rawEntry, 1 );
}
return "<em>$text</em>";
}
- # returns additional Regex for the tokenizer. See LanguageFr.php for an example
- function tokenizerRegex()
- {
- return "";
- }
-
- # Process the token generated from the tokenizer by the above regex. Return
- # NULL if the token is unknown, and the text to be added to the output otherwise
- function processToken( &$token , &$tokenStack)
- {
- return NULL;
- }
# Normally we use the plain ASCII digits. Some languages such as Arabic will
# want to output numbers using script-appropriate characters: override this
return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s );
}
+
+ function firstChar( $s ) {
+ preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
+ '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})', $s, $matches);
+
+ return isset( $matches[1] ) ? $matches[1] : "";
+ }
}
} # ifdef MEDIAWIKI