001package org.json;
002
003/*
004Copyright (c) 2002 JSON.org
005
006Permission is hereby granted, free of charge, to any person obtaining a copy
007of this software and associated documentation files (the "Software"), to deal
008in the Software without restriction, including without limitation the rights
009to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
010copies of the Software, and to permit persons to whom the Software is
011furnished to do so, subject to the following conditions:
012
013The above copyright notice and this permission notice shall be included in all
014copies or substantial portions of the Software.
015
016The Software shall be used for Good, not Evil.
017
018THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
019IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
020FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
021AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
022LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
023OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
024SOFTWARE.
025 */
026
027/**
028 * The XMLTokener extends the JSONTokener to provide additional methods
029 * for the parsing of XML texts.
030 * @author JSON.org
031@version $Id$
032@since Ptolemy II 10.0
033 * @version 2010-01-30
034 */
035public class XMLTokener extends JSONTokener {
036
037    /** The table of entity values. It initially contains Character values for
038     * amp, apos, gt, lt, quot.
039     */
040    public static final java.util.HashMap entity;
041
042    static {
043        entity = new java.util.HashMap(8);
044        entity.put("amp", XML.AMP);
045        entity.put("apos", XML.APOS);
046        entity.put("gt", XML.GT);
047        entity.put("lt", XML.LT);
048        entity.put("quot", XML.QUOT);
049    }
050
051    /**
052     * Construct an XMLTokener from a string.
053     * @param s A source string.
054     */
055    public XMLTokener(String s) {
056        super(s);
057    }
058
059    /**
060     * Get the text in the CDATA block.
061     * @return The string up to the <code>]]&gt;</code>.
062     * @exception JSONException If the <code>]]&gt;</code> is not found.
063     */
064    public String nextCDATA() throws JSONException {
065        char c;
066        int i;
067        StringBuffer sb = new StringBuffer();
068        for (;;) {
069            c = next();
070            if (end()) {
071                throw syntaxError("Unclosed CDATA");
072            }
073            sb.append(c);
074            i = sb.length() - 3;
075            if (i >= 0 && sb.charAt(i) == ']' && sb.charAt(i + 1) == ']'
076                    && sb.charAt(i + 2) == '>') {
077                sb.setLength(i);
078                return sb.toString();
079            }
080        }
081    }
082
083    /**
084     * Get the next XML outer token, trimming whitespace. There are two kinds
085     * of tokens: the '&lt;' character which begins a markup tag, and the content
086     * text between markup tags.
087     *
088     * @return  A string, or a '&lt;' Character, or null if there is no more
089     * source text.
090     * @exception JSONException
091     */
092    public Object nextContent() throws JSONException {
093        char c;
094        StringBuffer sb;
095        do {
096            c = next();
097        } while (Character.isWhitespace(c));
098        if (c == 0) {
099            return null;
100        }
101        if (c == '<') {
102            return XML.LT;
103        }
104        sb = new StringBuffer();
105        for (;;) {
106            if (c == '<' || c == 0) {
107                back();
108                return sb.toString().trim();
109            }
110            if (c == '&') {
111                sb.append(nextEntity(c));
112            } else {
113                sb.append(c);
114            }
115            c = next();
116        }
117    }
118
119    /**
120     * Return the next entity. These entities are translated to Characters:
121     *     <code>&amp;  &apos;  &gt;  &lt;  &quot;</code>.
122     * @param a An ampersand character.
123     * @return  A Character or an entity String if the entity is not recognized.
124     * @exception JSONException If missing ';' in XML entity.
125     */
126    public Object nextEntity(char a) throws JSONException {
127        StringBuffer sb = new StringBuffer();
128        for (;;) {
129            char c = next();
130            if (Character.isLetterOrDigit(c) || c == '#') {
131                sb.append(Character.toLowerCase(c));
132            } else if (c == ';') {
133                break;
134            } else {
135                throw syntaxError("Missing ';' in XML entity: &" + sb);
136            }
137        }
138        String s = sb.toString();
139        Object e = entity.get(s);
140        return e != null ? e : a + s + ";";
141    }
142
143    /**
144     * Returns the next XML meta token. This is used for skipping over &lt;!...&gt;
145     * and &lt;?...?&gt; structures.
146     * @return Syntax characters (<code>&lt; &gt; / = ! ?</code>) are returned as
147     *  Character, and strings and names are returned as Boolean. We don't care
148     *  what the values actually are.
149     * @exception JSONException If a string is not properly closed or if the XML
150     *  is badly structured.
151     */
152    public Object nextMeta() throws JSONException {
153        char c;
154        char q;
155        do {
156            c = next();
157        } while (Character.isWhitespace(c));
158        switch (c) {
159        case 0:
160            throw syntaxError("Misshaped meta tag");
161        case '<':
162            return XML.LT;
163        case '>':
164            return XML.GT;
165        case '/':
166            return XML.SLASH;
167        case '=':
168            return XML.EQ;
169        case '!':
170            return XML.BANG;
171        case '?':
172            return XML.QUEST;
173        case '"':
174        case '\'':
175            q = c;
176            for (;;) {
177                c = next();
178                if (c == 0) {
179                    throw syntaxError("Unterminated string");
180                }
181                if (c == q) {
182                    return Boolean.TRUE;
183                }
184            }
185        default:
186            for (;;) {
187                c = next();
188                if (Character.isWhitespace(c)) {
189                    return Boolean.TRUE;
190                }
191                switch (c) {
192                case 0:
193                case '<':
194                case '>':
195                case '/':
196                case '=':
197                case '!':
198                case '?':
199                case '"':
200                case '\'':
201                    back();
202                    return Boolean.TRUE;
203                }
204            }
205        }
206    }
207
208    /**
209     * Get the next XML Token. These tokens are found inside of angle
210     * brackets. It may be one of these characters: <code>/ &gt; = ! ?</code> or it
211     * may be a string wrapped in single quotes or double quotes, or it may be a
212     * name.
213     * @return a String or a Character.
214     * @exception JSONException If the XML is not well formed.
215     */
216    public Object nextToken() throws JSONException {
217        char c;
218        char q;
219        StringBuffer sb;
220        do {
221            c = next();
222        } while (Character.isWhitespace(c));
223        switch (c) {
224        case 0:
225            throw syntaxError("Misshaped element");
226        case '<':
227            throw syntaxError("Misplaced '<'");
228        case '>':
229            return XML.GT;
230        case '/':
231            return XML.SLASH;
232        case '=':
233            return XML.EQ;
234        case '!':
235            return XML.BANG;
236        case '?':
237            return XML.QUEST;
238
239        // Quoted string
240
241        case '"':
242        case '\'':
243            q = c;
244            sb = new StringBuffer();
245            for (;;) {
246                c = next();
247                if (c == 0) {
248                    throw syntaxError("Unterminated string");
249                }
250                if (c == q) {
251                    return sb.toString();
252                }
253                if (c == '&') {
254                    sb.append(nextEntity(c));
255                } else {
256                    sb.append(c);
257                }
258            }
259        default:
260
261            // Name
262
263            sb = new StringBuffer();
264            for (;;) {
265                sb.append(c);
266                c = next();
267                if (Character.isWhitespace(c)) {
268                    return sb.toString();
269                }
270                switch (c) {
271                case 0:
272                    return sb.toString();
273                case '>':
274                case '/':
275                case '=':
276                case '!':
277                case '?':
278                case '[':
279                case ']':
280                    back();
281                    return sb.toString();
282                case '<':
283                case '"':
284                case '\'':
285                    throw syntaxError("Bad character in a name");
286                }
287            }
288        }
289    }
290
291    /**
292     * Skip characters until past the requested string.
293     * If it is not found, we are left at the end of the source with a result of false.
294     * @param to A string to skip past.
295     * @exception JSONException
296     */
297    public boolean skipPast(String to) throws JSONException {
298        boolean b;
299        char c;
300        int i;
301        int j;
302        int offset = 0;
303        int n = to.length();
304        char[] circle = new char[n];
305
306        /*
307         * First fill the circle buffer with as many characters as are in the
308         * to string. If we reach an early end, bail.
309         */
310
311        for (i = 0; i < n; i += 1) {
312            c = next();
313            if (c == 0) {
314                return false;
315            }
316            circle[i] = c;
317        }
318        /*
319         * We will loop, possibly for all of the remaining characters.
320         */
321        for (;;) {
322            j = offset;
323            b = true;
324            /*
325             * Compare the circle buffer with the to string.
326             */
327            for (i = 0; i < n; i += 1) {
328                if (circle[j] != to.charAt(i)) {
329                    b = false;
330                    break;
331                }
332                j += 1;
333                if (j >= n) {
334                    j -= n;
335                }
336            }
337            /*
338             * If we exit the loop with b intact, then victory is ours.
339             */
340            if (b) {
341                return true;
342            }
343            /*
344             * Get the next character. If there isn't one, then defeat is ours.
345             */
346            c = next();
347            if (c == 0) {
348                return false;
349            }
350            /*
351             * Shove the character in the circle buffer and advance the
352             * circle offset. The offset is mod n.
353             */
354            circle[offset] = c;
355            offset += 1;
356            if (offset >= n) {
357                offset -= n;
358            }
359        }
360    }
361}