6f1b5787586d3d04e80aba54085916c6ad58d62a
4 * Class representing a MediaWiki site.
11 * @licence GNU GPL v2+
12 * @author John Erling Blad < jeblad@gmail.com >
13 * @author Daniel Kinzler
14 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
16 class MediaWikiSite
extends SiteObject
{
18 const PATH_FILE
= 'file_path';
19 const PATH_PAGE
= 'page_path';
24 * @param integer $globalId
26 * @return MediaWikiSite
28 public static function newFromGlobalId( $globalId ) {
29 return SitesTable
::singleton()->newRow( array(
30 'type' => Site
::TYPE_MEDIAWIKI
,
31 'global_key' => $globalId,
36 * Returns the database form of the given title.
40 * @param String $title the target page's title, in normalized form.
44 public function toDBKey( $title ) {
45 return str_replace( ' ', '_', $title );
49 * Returns the normalized form of the given page title, using the normalization rules of the given site.
50 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
52 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
53 * on an external service.
55 * @note : If MW_PHPUNIT_TEST is defined or $egWBRemoteTitleNormalization is set to false, the call to the
56 * external site is skipped, and the title is normalized using the local normalization rules as
57 * implemented by the Title class.
59 * @see Site::normalizePageName
63 * @param string $pageName
68 public function normalizePageName( $pageName ) {
69 global $egWBRemoteTitleNormalization;
71 // Check if we have strings as arguments.
72 if ( !is_string( $pageName ) ) {
73 throw new MWException( '$pageName must be a string' );
76 // Go on call the external site
77 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
78 // If the code is under test, don't call out to other sites, just normalize locally.
79 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
81 $t = Title
::newFromText( $pageName );
82 return $t->getPrefixedText();
85 // Make sure the string is normalized into NFC (due to the bug 40017)
86 // but do nothing to the whitespaces, that should work appropriately.
87 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
88 $pageName = UtfNormal
::cleanUp( $pageName );
90 // Build the args for the specific call
95 'converttitles' => true,
97 'titles' => $pageName,
98 //@todo: options for maxlag and maxage
99 // Note that maxlag will lead to a long delay before a reply is made,
100 // but that maxage can avoid the extreme delay. On the other hand
101 // maxage could be nice to use anyhow as it stops unnecessary requests.
102 // Also consider smaxage if maxage is used.
105 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
107 // Go on call the external site
108 //@todo: we need a good way to specify a timeout here.
109 $ret = Http
::get( $url );
112 if ( $ret === false ) {
113 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
117 $data = FormatJson
::decode( $ret, true );
119 if ( !is_array( $data ) ) {
120 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
124 $page = static::extractPageRecord( $data, $pageName );
126 if ( isset( $page['missing'] ) ) {
127 wfDebugLog( "MediaWikiSite", "call to <$url> returned a missing page title! " . $ret );
131 if ( !isset( $page['title'] ) ) {
132 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
136 return $page['title'];
141 * Get normalization record for a given page title from an API response.
145 * @param array $externalData A reply from the API on a external server.
146 * @param string $pageTitle Identifies the page at the external site, needing normalization.
148 * @return array|false a 'page' structure representing the page identified by $pageTitle.
150 private static function extractPageRecord( $externalData, $pageTitle ) {
151 // If there is a special case with only one returned page
152 // we can cheat, and only return
153 // the single page in the "pages" substructure.
154 if ( isset( $externalData['query']['pages'] ) ) {
155 $pages = array_values( $externalData['query']['pages'] );
156 if ( count( $pages) === 1 ) {
160 // This is only used during internal testing, as it is assumed
161 // a more optimal (and lossfree) storage.
162 // Make initial checks and return if prerequisites are not meet.
163 if ( !is_array( $externalData ) ||
!isset( $externalData['query'] ) ) {
166 // Loop over the tree different named structures, that otherwise are similar
168 'normalized' => 'from',
169 'converted' => 'from',
170 'redirects' => 'from',
173 foreach ( $structs as $listId => $fieldId ) {
174 // Check if the substructure exist at all.
175 if ( !isset( $externalData['query'][$listId] ) ) {
178 // Filter the substructure down to what we actually are using.
179 $collectedHits = array_filter(
180 array_values( $externalData['query'][$listId] ),
181 function( $a ) use ( $fieldId, $pageTitle ) {
182 return $a[$fieldId] === $pageTitle;
185 // If still looping over normalization, conversion or redirects,
186 // then we need to keep the new page title for later rounds.
187 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
188 switch ( count( $collectedHits ) ) {
192 $pageTitle = $collectedHits[0]['to'];
198 // If on the pages structure we should prepare for returning.
199 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
200 switch ( count( $collectedHits ) ) {
204 return array_shift( $collectedHits );
210 // should never be here
215 * @see Site::getLinkPathType
216 * Returns Site::PATH_PAGE
222 public function getLinkPathType() {
223 return self
::PATH_PAGE
;
227 * Returns the relative page path.
233 public function getRelativePagePath() {
234 return parse_url( $this->getPath( self
::PATH_PAGE
), PHP_URL_PATH
);
238 * Returns the relative file path.
244 public function getRelativeFilePath() {
245 return parse_url( $this->getPath( self
::PATH_FILE
), PHP_URL_PATH
);
249 * Sets the relative page path.
253 * @param string $path
255 public function setPagePath( $path ) {
256 $this->setPath( self
::PATH_PAGE
, $path );
260 * Sets the relative file path.
264 * @param string $path
266 public function setFilePath( $path ) {
267 $this->setPath( self
::PATH_FILE
, $path );
271 * @see Site::getPagePath
273 * This implementation returns a URL constructed using the path returned by getLinkPath().
274 * In addition to the default behaviour implemented by SiteObject::getPageUrl(), this
275 * method converts the $pageName to DBKey-format by replacing spaces with underscores
276 * before using it in the URL.
280 * @param string|false
284 public function getPageUrl( $pageName = false ) {
285 $url = $this->getLinkPath();
287 if ( $url === false ) {
291 if ( $pageName !== false ) {
292 $pageName = $this->toDBKey( trim( $pageName ) );
293 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ) ;
300 * Returns the full file path (ie site url + relative file path).
301 * The path should go at the $1 marker. If the $path
302 * argument is provided, the marker will be replaced by it's value.
306 * @param string|false $path
310 public function getFileUrl( $path = false ) {
311 $filePath = $this->getPath( self
::PATH_FILE
);
313 if ( $filePath !== false ) {
314 $filePath = str_replace( '$1', $path, $filePath );