Enable UTF-8 lower/upper case operations in SearchEngine,
[lhc/web/wiklou.git] / includes / SearchEngine.php
1 <?php
2 /**
3 * Contain a class for special pages
4 * @package MediaWiki
5 * @subpackage Search
6 */
7
8 /**
9 * @package MediaWiki
10 */
11 class SearchEngine {
12 var $limit = 10;
13 var $offset = 0;
14 var $searchTerms = array();
15 var $namespaces = array( NS_MAIN );
16 var $showRedirects = false;
17
18 /**
19 * Perform a full text search query and return a result set.
20 * If title searches are not supported or disabled, return null.
21 *
22 * @param string $term - Raw search term
23 * @return SearchResultSet
24 * @access public
25 * @abstract
26 */
27 function searchText( $term ) {
28 return null;
29 }
30
31 /**
32 * Perform a title-only search query and return a result set.
33 * If title searches are not supported or disabled, return null.
34 *
35 * @param string $term - Raw search term
36 * @return SearchResultSet
37 * @access public
38 * @abstract
39 */
40 function searchTitle( $term ) {
41 return null;
42 }
43
44 /**
45 * If an exact title match can be find, or a very slightly close match,
46 * return the title. If no match, returns NULL.
47 *
48 * @static
49 * @param string $term
50 * @return Title
51 * @private
52 */
53 function getNearMatch( $term ) {
54 global $wgContLang;
55 # Exact match? No need to look further.
56 $title = Title::newFromText( $term );
57 if (is_null($title))
58 return NULL;
59
60 if ( $title->getNamespace() == NS_SPECIAL || $title->exists() ) {
61 return $title;
62 }
63
64 # Now try all lower case (i.e. first letter capitalized)
65 #
66 $title = Title::newFromText( $wgContLang->lc( $term ) );
67 if ( $title->exists() ) {
68 return $title;
69 }
70
71 # Now try capitalized string
72 #
73 $title = Title::newFromText( $wgContLang->ucwords( $term ) );
74 if ( $title->exists() ) {
75 return $title;
76 }
77
78 # Now try all upper case
79 #
80 $title = Title::newFromText( $wgContLang->uc( $term ) );
81 if ( $title->exists() ) {
82 return $title;
83 }
84
85 # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
86 $title = Title::newFromText( $wgContLang->ucwordbreaks($term) );
87 if ( $title->exists() ) {
88 return $title;
89 }
90
91 global $wgCapitalLinks, $wgContLang;
92 if( !$wgCapitalLinks ) {
93 // Catch differs-by-first-letter-case-only
94 $title = Title::newFromText( $wgContLang->ucfirst( $term ) );
95 if ( $title->exists() ) {
96 return $title;
97 }
98 $title = Title::newFromText( $wgContLang->lcfirst( $term ) );
99 if ( $title->exists() ) {
100 return $title;
101 }
102 }
103
104 $title = Title::newFromText( $term );
105
106 # Entering an IP address goes to the contributions page
107 if ( ( $title->getNamespace() == NS_USER && User::isIP($title->getText() ) )
108 || User::isIP( trim( $term ) ) ) {
109 return Title::makeTitle( NS_SPECIAL, "Contributions/" . $title->getDbkey() );
110 }
111
112
113 # Entering a user goes to the user page whether it's there or not
114 if ( $title->getNamespace() == NS_USER ) {
115 return $title;
116 }
117
118 # Quoted term? Try without the quotes...
119 if( preg_match( '/^"([^"]+)"$/', $term, $matches ) ) {
120 return SearchEngine::getNearMatch( $matches[1] );
121 }
122
123 return NULL;
124 }
125
126 function legalSearchChars() {
127 return "A-Za-z_'0-9\\x80-\\xFF\\-";
128 }
129
130 /**
131 * Set the maximum number of results to return
132 * and how many to skip before returning the first.
133 *
134 * @param int $limit
135 * @param int $offset
136 * @access public
137 */
138 function setLimitOffset( $limit, $offset = 0 ) {
139 $this->limit = intval( $limit );
140 $this->offset = intval( $offset );
141 }
142
143 /**
144 * Set which namespaces the search should include.
145 * Give an array of namespace index numbers.
146 *
147 * @param array $namespaces
148 * @access public
149 */
150 function setNamespaces( $namespaces ) {
151 $this->namespaces = $namespaces;
152 }
153
154 /**
155 * Make a list of searchable namespaces and their canonical names.
156 * @return array
157 * @access public
158 */
159 function searchableNamespaces() {
160 global $wgContLang;
161 $arr = array();
162 foreach( $wgContLang->getNamespaces() as $ns => $name ) {
163 if( $ns >= NS_MAIN ) {
164 $arr[$ns] = $name;
165 }
166 }
167 return $arr;
168 }
169
170 /**
171 * Return a 'cleaned up' search string
172 *
173 * @return string
174 * @access public
175 */
176 function filter( $text ) {
177 $lc = $this->legalSearchChars();
178 return trim( preg_replace( "/[^{$lc}]/", " ", $text ) );
179 }
180 /**
181 * Load up the appropriate search engine class for the currently
182 * active database backend, and return a configured instance.
183 *
184 * @return SearchEngine
185 * @private
186 */
187 function create() {
188 global $wgDBtype, $wgSearchType;
189 if( $wgSearchType ) {
190 $class = $wgSearchType;
191 } elseif( $wgDBtype == 'mysql' ) {
192 $class = 'SearchMySQL4';
193 } else if ( $wgDBtype == 'postgres' ) {
194 $class = 'SearchPostgres';
195 } else {
196 $class = 'SearchEngineDummy';
197 }
198 $search = new $class( wfGetDB( DB_SLAVE ) );
199 $search->setLimitOffset(0,0);
200 return $search;
201 }
202
203 /**
204 * Create or update the search index record for the given page.
205 * Title and text should be pre-processed.
206 *
207 * @param int $id
208 * @param string $title
209 * @param string $text
210 * @abstract
211 */
212 function update( $id, $title, $text ) {
213 // no-op
214 }
215
216 /**
217 * Update a search index record's title only.
218 * Title should be pre-processed.
219 *
220 * @param int $id
221 * @param string $title
222 * @abstract
223 */
224 function updateTitle( $id, $title ) {
225 // no-op
226 }
227 }
228
229 /** @package MediaWiki */
230 class SearchResultSet {
231 /**
232 * Fetch an array of regular expression fragments for matching
233 * the search terms as parsed by this engine in a text extract.
234 *
235 * @return array
236 * @access public
237 * @abstract
238 */
239 function termMatches() {
240 return array();
241 }
242
243 function numRows() {
244 return 0;
245 }
246
247 /**
248 * Return true if results are included in this result set.
249 * @return bool
250 * @abstract
251 */
252 function hasResults() {
253 return false;
254 }
255
256 /**
257 * Some search modes return a total hit count for the query
258 * in the entire article database. This may include pages
259 * in namespaces that would not be matched on the given
260 * settings.
261 *
262 * Return null if no total hits number is supported.
263 *
264 * @return int
265 * @access public
266 */
267 function getTotalHits() {
268 return null;
269 }
270
271 /**
272 * Some search modes return a suggested alternate term if there are
273 * no exact hits. Returns true if there is one on this set.
274 *
275 * @return bool
276 * @access public
277 */
278 function hasSuggestion() {
279 return false;
280 }
281
282 /**
283 * Some search modes return a suggested alternate term if there are
284 * no exact hits. Check hasSuggestion() first.
285 *
286 * @return string
287 * @access public
288 */
289 function getSuggestion() {
290 return '';
291 }
292
293 /**
294 * Fetches next search result, or false.
295 * @return SearchResult
296 * @access public
297 * @abstract
298 */
299 function next() {
300 return false;
301 }
302 }
303
304 /** @package MediaWiki */
305 class SearchResult {
306 function SearchResult( $row ) {
307 $this->mTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
308 }
309
310 /**
311 * @return Title
312 * @access public
313 */
314 function getTitle() {
315 return $this->mTitle;
316 }
317
318 /**
319 * @return double or null if not supported
320 */
321 function getScore() {
322 return null;
323 }
324 }
325
326 /**
327 * @package MediaWiki
328 */
329 class SearchEngineDummy {
330 function search( $term ) {
331 return null;
332 }
333 function setLimitOffset($l, $o) {}
334 function legalSearchChars() {}
335 function update() {}
336 function setnamespaces() {}
337 function searchtitle() {}
338 function searchtext() {}
339 }
340 ?>