* rmed redundant code
[lhc/web/wiklou.git] / maintenance / entities2literals.pl
1 #!/usr/bin/evn perl
2 # Takes STDIN and converts Converts hexadecimal, decimal and named HTML
3 # entities to their respective literals.
4 #
5 # Usage: perl entities2literals.pl < file_to_convert [> outfile]
6 # Reference: http://www.w3.org/TR/REC-html40/sgml/entities.html
7 # Copyright 2005 Ævar Arnfjörð Bjarmason <avarab@gmail.com> No rights reserved
8
9 use strict;
10
11 my $file = join /$\//, <>;
12
13 $file =~ s/&#(\d+);/chr $1/eg;
14 $file =~ s/&#x([0-9a-fA-F]+);/chr hex $1/eg;
15
16 while (<DATA>) {
17 chomp;
18 my ($number, $entity) = split / +/;
19 $file =~ s/&$entity;/chr $number/eg;
20 }
21 print $file;
22
23 __DATA__
24 34 quot
25 38 amp
26 60 lt
27 62 gt
28 160 nbsp
29 161 iexcl
30 162 cent
31 163 pound
32 164 curren
33 165 yen
34 166 brvbar
35 167 sect
36 168 uml
37 169 copy
38 170 ordf
39 171 laquo
40 172 not
41 173 shy
42 174 reg
43 175 macr
44 176 deg
45 177 plusmn
46 178 sup2
47 179 sup3
48 180 acute
49 181 micro
50 182 para
51 183 middot
52 184 cedil
53 185 sup1
54 186 ordm
55 187 raquo
56 188 frac14
57 189 frac12
58 190 frac34
59 191 iquest
60 192 Agrave
61 193 Aacute
62 194 Acirc
63 195 Atilde
64 196 Auml
65 197 Aring
66 198 AElig
67 199 Ccedil
68 200 Egrave
69 201 Eacute
70 202 Ecirc
71 203 Euml
72 204 Igrave
73 205 Iacute
74 206 Icirc
75 207 Iuml
76 208 ETH
77 209 Ntilde
78 210 Ograve
79 211 Oacute
80 212 Ocirc
81 213 Otilde
82 214 Ouml
83 215 times
84 216 Oslash
85 217 Ugrave
86 218 Uacute
87 219 Ucirc
88 220 Uuml
89 221 Yacute
90 222 THORN
91 223 szlig
92 224 agrave
93 225 aacute
94 226 acirc
95 227 atilde
96 228 auml
97 229 aring
98 230 aelig
99 231 ccedil
100 232 egrave
101 233 eacute
102 234 ecirc
103 235 euml
104 236 igrave
105 237 iacute
106 238 icirc
107 239 iuml
108 240 eth
109 241 ntilde
110 242 ograve
111 243 oacute
112 244 ocirc
113 245 otilde
114 246 ouml
115 247 divide
116 248 oslash
117 249 ugrave
118 250 uacute
119 251 ucirc
120 252 uuml
121 253 yacute
122 254 thorn
123 255 yuml
124 338 OElig
125 339 oelig
126 352 Scaron
127 353 scaron
128 376 Yuml
129 402 fnof
130 710 circ
131 732 tilde
132 913 Alpha
133 914 Beta
134 915 Gamma
135 916 Delta
136 917 Epsilon
137 918 Zeta
138 919 Eta
139 920 Theta
140 921 Iota
141 922 Kappa
142 923 Lambda
143 924 Mu
144 925 Nu
145 926 Xi
146 927 Omicron
147 928 Pi
148 929 Rho
149 931 Sigma
150 932 Tau
151 933 Upsilon
152 934 Phi
153 935 Chi
154 936 Psi
155 937 Omega
156 945 alpha
157 946 beta
158 947 gamma
159 948 delta
160 949 epsilon
161 950 zeta
162 951 eta
163 952 theta
164 953 iota
165 954 kappa
166 955 lambda
167 956 mu
168 957 nu
169 958 xi
170 959 omicron
171 960 pi
172 961 rho
173 962 sigmaf
174 963 sigma
175 964 tau
176 965 upsilon
177 966 phi
178 967 chi
179 968 psi
180 969 omega
181 977 thetasym
182 978 upsih
183 982 piv
184 8194 ensp
185 8195 emsp
186 8201 thinsp
187 8204 zwnj
188 8205 zwj
189 8206 lrm
190 8207 rlm
191 8211 ndash
192 8212 mdash
193 8216 lsquo
194 8217 rsquo
195 8218 sbquo
196 8220 ldquo
197 8221 rdquo
198 8222 bdquo
199 8224 dagger
200 8225 Dagger
201 8226 bull
202 8230 hellip
203 8240 permil
204 8242 prime
205 8243 Prime
206 8249 lsaquo
207 8250 rsaquo
208 8254 oline
209 8260 frasl
210 8364 euro
211 8465 image
212 8472 weierp
213 8476 real
214 8482 trade
215 8501 alefsym
216 8592 larr
217 8593 uarr
218 8594 rarr
219 8595 darr
220 8596 harr
221 8629 crarr
222 8656 lArr
223 8657 uArr
224 8658 rArr
225 8659 dArr
226 8660 hArr
227 8704 forall
228 8706 part
229 8707 exist
230 8709 empty
231 8711 nabla
232 8712 isin
233 8713 notin
234 8715 ni
235 8719 prod
236 8721 sum
237 8722 minus
238 8727 lowast
239 8730 radic
240 8733 prop
241 8734 infin
242 8736 ang
243 8743 and
244 8744 or
245 8745 cap
246 8746 cup
247 8747 int
248 8756 there4
249 8764 sim
250 8773 cong
251 8776 asymp
252 8800 ne
253 8801 equiv
254 8804 le
255 8805 ge
256 8834 sub
257 8835 sup
258 8836 nsub
259 8838 sube
260 8839 supe
261 8853 oplus
262 8855 otimes
263 8869 perp
264 8901 sdot
265 8968 lceil
266 8969 rceil
267 8970 lfloor
268 8971 rfloor
269 9001 lang
270 9002 rang
271 9674 loz
272 9824 spades
273 9827 clubs
274 9829 hearts
275 9830 diams