1    // Derived from org.mozilla.javascript.TokenStream [NPL]
2    
3    /**
4     * The contents of this file are subject to the Netscape Public
5     * License Version 1.1 (the "License"); you may not use this file
6     * except in compliance with the License. You may obtain a copy of
7     * the License at http://www.mozilla.org/NPL/
8     *
9     * Software distributed under the License is distributed on an "AS
10    * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11    * implied. See the License for the specific language governing
12    * rights and limitations under the License.
13    *
14    * The Initial Developer of the Original Code is Netscape
15    * Communications Corporation.
16    *
17    * Contributor(s): Roger Lawrence, Mike McCabe
18    */
19   
20   package org.xwt.js;
21   import java.io.*;
22   
23   /** Lexes a stream of characters into a stream of Tokens */
24   class Lexer implements Tokens {
25   
26       /** for debugging */
27       public static void main(String[] s) throws Exception {
28           Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
29           int tok = 0;
30           while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
31       }
32   
33       /** the token that was just parsed */
34       protected int op;
35    
36      /** the most recently parsed token, <i>regardless of pushbacks</i> */
37       protected int mostRecentlyReadToken;
38   
39       /** if the token just parsed was a NUMBER, this is the numeric value */
40       protected Number number = null;
41   
42       /** if the token just parsed was a NAME or STRING, this is the string value */
43       protected String string = null;
44   
45       /** the line number of the most recently <i>lexed</i> token */
46       private int line = 0;
47   
48       /** the line number of the most recently <i>parsed</i> token */
49       protected int parserLine = 0;
50   
51       /** the column number of the current token */
52       protected int col = 0;
53   
54       /** the name of the source code file being lexed */
55       protected String sourceName;
56   
57       private SmartReader in;
58       public Lexer(Reader r, String sourceName, int line) throws IOException {
59           this.sourceName = sourceName;
60           this.line = line;
61           this.parserLine = line;
62           in = new SmartReader(r);
63       }
64   
65   
66       // Predicates ///////////////////////////////////////////////////////////////////////
67   
68       private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
69       private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
70       private static int xDigitToInt(int c) {
71           if ('0' <= c && c <= '9') return c - '0';
72           else if ('a' <= c && c <= 'f') return c - ('a' - 10);
73           else if ('A' <= c && c <= 'F') return c - ('A' - 10);
74           else return -1;
75       }
76   
77       
78       // Token Subtype Handlers /////////////////////////////////////////////////////////
79   
80       private int getKeyword(String name) throws IOException {
81           //#switch(name)
82           case "if": return IF;
83           case "lt": return LT;
84           case "gt": return GT;
85           case "in": return IN;
86           case "do": return DO;
87           case "and": return AND;
88           case "or": return OR;
89           case "for": return FOR;
90           case "int": return RESERVED;
91           case "new": return RESERVED;
92           case "try": return TRY;
93           case "var": return VAR;
94           case "byte": return RESERVED;
95           case "case": return CASE;
96           case "char": return RESERVED;
97           case "else": return ELSE;
98           case "enum": return RESERVED;
99           case "goto": return RESERVED;
100          case "long": return RESERVED;
101          case "null": return NULL;
102          case "true": return TRUE;
103          case "with": return RESERVED;
104          case "void": return RESERVED;
105          case "class": return RESERVED;
106          case "break": return BREAK;
107          case "while": return WHILE;
108          case "false": return FALSE;
109          case "const": return RESERVED;
110          case "final": return RESERVED;
111          case "super": return RESERVED;
112          case "throw": return THROW;
113          case "catch": return CATCH;
114          case "class": return RESERVED;
115          case "delete": return RESERVED;
116          case "return": return RETURN;
117          case "throws": return RESERVED;
118          case "double": return RESERVED;
119          case "assert": return ASSERT;
120          case "public": return RESERVED;
121          case "switch": return SWITCH;
122          case "typeof": return TYPEOF;
123          case "package": return RESERVED;
124          case "default": return DEFAULT;
125          case "finally": return FINALLY;
126          case "boolean": return RESERVED;
127          case "private": return RESERVED;
128          case "extends": return RESERVED;
129          case "abstract": return RESERVED;
130          case "continue": return CONTINUE;
131          case "debugger": return RESERVED;
132          case "function": return FUNCTION;
133          case "volatile": return RESERVED;
134          case "interface": return RESERVED;
135          case "protected": return RESERVED;
136          case "transient": return RESERVED;
137          case "implements": return RESERVED;
138          case "instanceof": return RESERVED;
139          case "synchronized": return RESERVED;
140          //#end
141          return -1;
142      }
143  
144      private int getIdentifier(int c) throws IOException {
145          in.startString();
146          while (Character.isJavaIdentifierPart((char)(c = in.read())));
147          in.unread();
148          String str = in.getString();
149          int result = getKeyword(str);
150          if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in XWT scripts");
151          if (result != -1) return result;
152          this.string = str.intern();
153          return NAME;
154      }
155      
156      private int getNumber(int c) throws IOException {
157          int base = 10;
158          in.startString();
159          double dval = Double.NaN;
160          long longval = 0;
161          boolean isInteger = true;
162          
163          // figure out what base we're using
164          if (c == '0') {
165              if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
166              else if (isDigit(c)) base = 8;
167          }
168          
169          while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
170          if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
171              isInteger = false;
172              if (c == '.') do { c = in.read(); } while (isDigit(c));
173              if (c == 'e' || c == 'E') {
174                  c = in.read();
175                  if (c == '+' || c == '-') c = in.read();
176                  if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value");
177                  do { c = in.read(); } while (isDigit(c));
178              }
179          }
180          in.unread();
181  
182          String numString = in.getString();
183          if (base == 10 && !isInteger) {
184              try { dval = (Double.valueOf(numString)).doubleValue(); }
185              catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); }
186          } else {
187              if (isInteger) {
188                  longval = Long.parseLong(numString, base);
189                  dval = (double)longval;
190              } else {
191                  dval = Double.parseDouble(numString);
192                  longval = (long) dval;
193                  if (longval == dval) isInteger = true;
194              }
195          }
196          
197          if (!isInteger) this.number = JS.N(dval);
198          else this.number = JS.N(longval);
199          return NUMBER;
200      }
201      
202      private int getString(int c) throws IOException {
203          StringBuffer stringBuf = null;
204          int quoteChar = c;
205          int val = 0;
206          c = in.read();
207          in.startString(); // start after the first "
208          while(c != quoteChar) {
209              if (c == '\n' || c == -1) throw new LexerException("unterminated string literal");
210              if (c == '\\') {
211                  if (stringBuf == null) {
212                      in.unread();   // Don't include the backslash
213                      stringBuf = new StringBuffer(in.getString());
214                      in.read();
215                  }
216                  switch (c = in.read()) {
217                  case 'b': c = '\b'; break;
218                  case 'f': c = '\f'; break;
219                  case 'n': c = '\n'; break;
220                  case 'r': c = '\r'; break;
221                  case 't': c = '\t'; break;
222                  case 'v': c = '\u000B'; break;
223                  case '\\': c = '\\'; break;
224                  case 'u': {
225                      int v = 0;
226                      for(int i=0; i<4; i++) {
227                          int ci = in.read();
228                          if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
229                              throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
230                          v = (v << 8) | Integer.parseInt(ci + "", 16);
231                      }
232                      c = (char)v;
233                      break;
234                  }
235                  default:
236                      // just use the character that was escaped
237                      break;
238                  }
239              }
240              if (stringBuf != null) stringBuf.append((char) c);
241              c = in.read();
242          }
243          if (stringBuf != null) this.string = stringBuf.toString().intern();
244          else {
245              in.unread(); // miss the trailing "
246              this.string = in.getString().intern();
247              in.read();
248          }
249          return STRING;
250      }
251  
252      private int _getToken() throws IOException {
253          int c;
254          do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
255          if (c == -1) return -1;
256          if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
257          if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
258          if (c == '"' || c == '\'') return getString(c);
259          switch (c) {
260          case ';': return SEMI;
261          case '[': return LB;
262          case ']': return RB;
263          case '{': return LC;
264          case '}': return RC;
265          case '(': return LP;
266          case ')': return RP;
267          case ',': return COMMA;
268          case '?': return HOOK;
269          case ':': return !in.match(':') ? COLON : in.match('=') ? GRAMMAR : le(":: is not a valid token");
270          case '.': return DOT;
271          case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
272          case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
273          case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
274          case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
275          case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
276          case '%': return in.match('=') ? ASSIGN_MOD : MOD;
277          case '~': return BITNOT;
278          case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? INC : ADD;
279          case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? DEC : SUB;
280          case '*': return in.match('=') ? ASSIGN_MUL : MUL;
281          case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
282          case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
283              in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
284          case '/':
285              if (in.match('=')) return ASSIGN_DIV;
286              if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
287              if (!in.match('*')) return DIV;
288              while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
289                  if (c == '\n' || c != '/' || !in.match('*')) continue;
290                  if (in.match('/')) return getToken();
291                  throw new LexerException("nested comments are not permitted");
292              }
293              if (c == -1) throw new LexerException("unterminated comment");
294              return getToken();  // `goto retry'
295          default: throw new LexerException("illegal character: \'" + ((char)c) + "\'");
296          }
297      }
298  
299      private int le(String s) throws LexerException { if (true) throw new LexerException(s); return 0; }
300  
301      // SmartReader ////////////////////////////////////////////////////////////////
302  
303      /** a Reader that tracks line numbers and can push back tokens */
304      private class SmartReader {
305          PushbackReader reader = null;
306          int lastread = -1;
307  
308          public SmartReader(Reader r) { reader = new PushbackReader(r); }
309          public void unread() throws IOException { unread((char)lastread); }
310          public void unread(char c) throws IOException {
311              reader.unread(c);
312              if(c == '\n') col = -1;
313              else col--;
314              if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
315          }
316          public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
317          public int peek() throws IOException {
318              int peeked = reader.read();
319              if (peeked != -1) reader.unread((char)peeked);
320              return peeked;
321          }
322          public int read() throws IOException {
323              lastread = reader.read();
324              if (accumulator != null) accumulator.append((char)lastread);
325              if (lastread != '\n' && lastread != '\r') col++;
326              if (lastread == '\n') {
327                  // col is -1 if we just unread a newline, this is sort of ugly
328                  if (col != -1) parserLine = ++line;
329                  col = 0;
330              }
331              return lastread;
332          }
333  
334          // FEATURE: could be much more efficient
335          StringBuffer accumulator = null;
336          public void startString() {
337              accumulator = new StringBuffer();
338              accumulator.append((char)lastread);
339          }
340          public String getString() throws IOException {
341              String ret = accumulator.toString().intern();
342              accumulator = null;
343              return ret;
344          }
345      }
346  
347  
348      // Token PushBack code ////////////////////////////////////////////////////////////
349  
350      private int pushBackDepth = 0;
351      private int[] pushBackInts = new int[10];
352      private Object[] pushBackObjects = new Object[10];
353  
354      /** push back a token */
355      public final void pushBackToken(int op, Object obj) {
356          if (pushBackDepth >= pushBackInts.length - 1) {
357              int[] newInts = new int[pushBackInts.length * 2];
358              System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
359              pushBackInts = newInts;
360              Object[] newObjects = new Object[pushBackObjects.length * 2];
361              System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
362              pushBackObjects = newObjects;
363          }
364          pushBackInts[pushBackDepth] = op;
365          pushBackObjects[pushBackDepth] = obj;
366          pushBackDepth++;
367      }
368  
369      /** push back the most recently read token */
370      public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
371  
372      /** read a token but leave it in the stream */
373      public final int peekToken() throws IOException {
374          int ret = getToken();
375          pushBackToken();
376          return ret;
377      }
378  
379      /** read a token */
380      public final int getToken() throws IOException {
381          number = null;
382          string = null;
383          if (pushBackDepth == 0) {
384              mostRecentlyReadToken = op;
385              return op = _getToken();
386          }
387          pushBackDepth--;
388          op = pushBackInts[pushBackDepth];
389          if (pushBackObjects[pushBackDepth] != null) {
390              number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
391              string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
392          }
393          return op;
394      }
395  
396      class LexerException extends IOException {
397          public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }
398      }
399  }
400