001package org.json; 002 003/* 004Copyright (c) 2002 JSON.org 005 006Permission is hereby granted, free of charge, to any person obtaining a copy 007of this software and associated documentation files (the "Software"), to deal 008in the Software without restriction, including without limitation the rights 009to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 010copies of the Software, and to permit persons to whom the Software is 011furnished to do so, subject to the following conditions: 012 013The above copyright notice and this permission notice shall be included in all 014copies or substantial portions of the Software. 015 016The Software shall be used for Good, not Evil. 017 018THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 019IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 020FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 021AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 022LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 023OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 024SOFTWARE. 025 */ 026 027/** 028 * The XMLTokener extends the JSONTokener to provide additional methods 029 * for the parsing of XML texts. 030 * @author JSON.org 031@version $Id$ 032@since Ptolemy II 10.0 033 * @version 2010-01-30 034 */ 035public class XMLTokener extends JSONTokener { 036 037 /** The table of entity values. It initially contains Character values for 038 * amp, apos, gt, lt, quot. 039 */ 040 public static final java.util.HashMap entity; 041 042 static { 043 entity = new java.util.HashMap(8); 044 entity.put("amp", XML.AMP); 045 entity.put("apos", XML.APOS); 046 entity.put("gt", XML.GT); 047 entity.put("lt", XML.LT); 048 entity.put("quot", XML.QUOT); 049 } 050 051 /** 052 * Construct an XMLTokener from a string. 053 * @param s A source string. 054 */ 055 public XMLTokener(String s) { 056 super(s); 057 } 058 059 /** 060 * Get the text in the CDATA block. 061 * @return The string up to the <code>]]></code>. 062 * @exception JSONException If the <code>]]></code> is not found. 063 */ 064 public String nextCDATA() throws JSONException { 065 char c; 066 int i; 067 StringBuffer sb = new StringBuffer(); 068 for (;;) { 069 c = next(); 070 if (end()) { 071 throw syntaxError("Unclosed CDATA"); 072 } 073 sb.append(c); 074 i = sb.length() - 3; 075 if (i >= 0 && sb.charAt(i) == ']' && sb.charAt(i + 1) == ']' 076 && sb.charAt(i + 2) == '>') { 077 sb.setLength(i); 078 return sb.toString(); 079 } 080 } 081 } 082 083 /** 084 * Get the next XML outer token, trimming whitespace. There are two kinds 085 * of tokens: the '<' character which begins a markup tag, and the content 086 * text between markup tags. 087 * 088 * @return A string, or a '<' Character, or null if there is no more 089 * source text. 090 * @exception JSONException 091 */ 092 public Object nextContent() throws JSONException { 093 char c; 094 StringBuffer sb; 095 do { 096 c = next(); 097 } while (Character.isWhitespace(c)); 098 if (c == 0) { 099 return null; 100 } 101 if (c == '<') { 102 return XML.LT; 103 } 104 sb = new StringBuffer(); 105 for (;;) { 106 if (c == '<' || c == 0) { 107 back(); 108 return sb.toString().trim(); 109 } 110 if (c == '&') { 111 sb.append(nextEntity(c)); 112 } else { 113 sb.append(c); 114 } 115 c = next(); 116 } 117 } 118 119 /** 120 * Return the next entity. These entities are translated to Characters: 121 * <code>& ' > < "</code>. 122 * @param a An ampersand character. 123 * @return A Character or an entity String if the entity is not recognized. 124 * @exception JSONException If missing ';' in XML entity. 125 */ 126 public Object nextEntity(char a) throws JSONException { 127 StringBuffer sb = new StringBuffer(); 128 for (;;) { 129 char c = next(); 130 if (Character.isLetterOrDigit(c) || c == '#') { 131 sb.append(Character.toLowerCase(c)); 132 } else if (c == ';') { 133 break; 134 } else { 135 throw syntaxError("Missing ';' in XML entity: &" + sb); 136 } 137 } 138 String s = sb.toString(); 139 Object e = entity.get(s); 140 return e != null ? e : a + s + ";"; 141 } 142 143 /** 144 * Returns the next XML meta token. This is used for skipping over <!...> 145 * and <?...?> structures. 146 * @return Syntax characters (<code>< > / = ! ?</code>) are returned as 147 * Character, and strings and names are returned as Boolean. We don't care 148 * what the values actually are. 149 * @exception JSONException If a string is not properly closed or if the XML 150 * is badly structured. 151 */ 152 public Object nextMeta() throws JSONException { 153 char c; 154 char q; 155 do { 156 c = next(); 157 } while (Character.isWhitespace(c)); 158 switch (c) { 159 case 0: 160 throw syntaxError("Misshaped meta tag"); 161 case '<': 162 return XML.LT; 163 case '>': 164 return XML.GT; 165 case '/': 166 return XML.SLASH; 167 case '=': 168 return XML.EQ; 169 case '!': 170 return XML.BANG; 171 case '?': 172 return XML.QUEST; 173 case '"': 174 case '\'': 175 q = c; 176 for (;;) { 177 c = next(); 178 if (c == 0) { 179 throw syntaxError("Unterminated string"); 180 } 181 if (c == q) { 182 return Boolean.TRUE; 183 } 184 } 185 default: 186 for (;;) { 187 c = next(); 188 if (Character.isWhitespace(c)) { 189 return Boolean.TRUE; 190 } 191 switch (c) { 192 case 0: 193 case '<': 194 case '>': 195 case '/': 196 case '=': 197 case '!': 198 case '?': 199 case '"': 200 case '\'': 201 back(); 202 return Boolean.TRUE; 203 } 204 } 205 } 206 } 207 208 /** 209 * Get the next XML Token. These tokens are found inside of angle 210 * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it 211 * may be a string wrapped in single quotes or double quotes, or it may be a 212 * name. 213 * @return a String or a Character. 214 * @exception JSONException If the XML is not well formed. 215 */ 216 public Object nextToken() throws JSONException { 217 char c; 218 char q; 219 StringBuffer sb; 220 do { 221 c = next(); 222 } while (Character.isWhitespace(c)); 223 switch (c) { 224 case 0: 225 throw syntaxError("Misshaped element"); 226 case '<': 227 throw syntaxError("Misplaced '<'"); 228 case '>': 229 return XML.GT; 230 case '/': 231 return XML.SLASH; 232 case '=': 233 return XML.EQ; 234 case '!': 235 return XML.BANG; 236 case '?': 237 return XML.QUEST; 238 239 // Quoted string 240 241 case '"': 242 case '\'': 243 q = c; 244 sb = new StringBuffer(); 245 for (;;) { 246 c = next(); 247 if (c == 0) { 248 throw syntaxError("Unterminated string"); 249 } 250 if (c == q) { 251 return sb.toString(); 252 } 253 if (c == '&') { 254 sb.append(nextEntity(c)); 255 } else { 256 sb.append(c); 257 } 258 } 259 default: 260 261 // Name 262 263 sb = new StringBuffer(); 264 for (;;) { 265 sb.append(c); 266 c = next(); 267 if (Character.isWhitespace(c)) { 268 return sb.toString(); 269 } 270 switch (c) { 271 case 0: 272 return sb.toString(); 273 case '>': 274 case '/': 275 case '=': 276 case '!': 277 case '?': 278 case '[': 279 case ']': 280 back(); 281 return sb.toString(); 282 case '<': 283 case '"': 284 case '\'': 285 throw syntaxError("Bad character in a name"); 286 } 287 } 288 } 289 } 290 291 /** 292 * Skip characters until past the requested string. 293 * If it is not found, we are left at the end of the source with a result of false. 294 * @param to A string to skip past. 295 * @exception JSONException 296 */ 297 public boolean skipPast(String to) throws JSONException { 298 boolean b; 299 char c; 300 int i; 301 int j; 302 int offset = 0; 303 int n = to.length(); 304 char[] circle = new char[n]; 305 306 /* 307 * First fill the circle buffer with as many characters as are in the 308 * to string. If we reach an early end, bail. 309 */ 310 311 for (i = 0; i < n; i += 1) { 312 c = next(); 313 if (c == 0) { 314 return false; 315 } 316 circle[i] = c; 317 } 318 /* 319 * We will loop, possibly for all of the remaining characters. 320 */ 321 for (;;) { 322 j = offset; 323 b = true; 324 /* 325 * Compare the circle buffer with the to string. 326 */ 327 for (i = 0; i < n; i += 1) { 328 if (circle[j] != to.charAt(i)) { 329 b = false; 330 break; 331 } 332 j += 1; 333 if (j >= n) { 334 j -= n; 335 } 336 } 337 /* 338 * If we exit the loop with b intact, then victory is ours. 339 */ 340 if (b) { 341 return true; 342 } 343 /* 344 * Get the next character. If there isn't one, then defeat is ours. 345 */ 346 c = next(); 347 if (c == 0) { 348 return false; 349 } 350 /* 351 * Shove the character in the circle buffer and advance the 352 * circle offset. The offset is mod n. 353 */ 354 circle[offset] = c; 355 offset += 1; 356 if (offset >= n) { 357 offset -= n; 358 } 359 } 360 } 361}