001 // Copyright (c) 2001 Hursh Jain (http://www.mollypages.org)
002 // The Molly framework is freely distributable under the terms of an
003 // MIT-style license. For details, see the molly pages web site at:
004 // http://www.mollypages.org/. Use, modify, have fun !
005
006 package fc.web.page;
007
008 import java.io.*;
009 import java.util.*;
010
011 import fc.io.*;
012 import fc.util.*;
013
014 /**
015 A Reader suitable for lexing. Supports all of: <code>peek</code>,
016 <code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has
017 all of those). Mark/reset is not supported because it's too complex to
018 implement given the current <i>fixed-buffer</i> implementation of this
019 class. (on the flip-side this implementation does allow to read
020 <i>very</i> large files without risk of running out of JDK memory).
021 <p>
022 <xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-]
023 <p>
024 Note 1: If this class is invoked from the command line, setting the dbg
025 flag in the code to <code>true</code> is useful.
026
027 @author hursh jain
028 */
029 public final class PageReader extends Reader
030 {
031 /*
032 Old implemenation had a rolling buffer window and a previous spillover buffer.
033 It was too complicated, so that implemenation is pastured.
034
035 It is MUCH simpler to read the entire file into one character array...(unread,
036 read, mark, reset) etc, become trivial. By contrast, if using a
037 limited-size-read-buffer implementation (as before) all of those become much
038 harder since we loose the existing buffer contents when the buffer is *refilled*
039 (so how do you go back in the buffer if the buffer is gone ?).
040 */
041 static final boolean dbg = false;
042
043 //our own buf/pos because most/all reader subclasses dont have mark/reset/unread
044 char[] buf = null;
045 int pos = 0;
046 int count = 0;
047 int markpos = 0;
048 //line, col and other tracking
049 int line = 1;
050 int col = 0;
051 int lastcol = 1; //for unread past a newline
052 boolean pushBackNL = false;
053 boolean lastWasCR = false;
054 String encoding;
055 static String DEFAULT_ENCODING = "UTF-8";
056
057 /**
058 Creates a new PageReader wrapping the specified reader
059 */
060 public PageReader(Reader r) throws IOException
061 {
062 Argcheck.notnull(r, "specified reader was null");
063 buf = IOUtil.readerToCharArray(r);
064 this.encoding = DEFAULT_ENCODING;
065 }
066
067 /**
068 Creates a reader with the specified non-null encoding.
069 */
070 public PageReader(File file, String encoding) throws IOException
071 {
072 Argcheck.notnull(file, "specified file was null");
073 Argcheck.notnull(encoding, "specified encoding was null");
074 this.encoding = encoding;
075 buf = IOUtil.fileToCharArray(file, encoding);
076 }
077
078 /**
079 Creates a reader using the UTF-8 encoding.
080 */
081 public PageReader(File file) throws IOException
082 {
083 this(file, DEFAULT_ENCODING);
084 }
085
086 public void close() throws IOException
087 {
088 //no underlying stream since everything read into buffer. not much to do.
089 }
090
091 public int read() throws IOException
092 {
093 if (pos == buf.length) {
094 return -1;
095 }
096
097 char c = buf[pos++];
098
099 if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c));
100 adjustReadLineNum(c);
101
102 return c;
103 }
104
105 public int read(char[] buf, int start, int len) throws IOException
106 {
107 throw new IOException("not implemented, use the read() method instead");
108 }
109
110 /**
111 Unreads the current character (which could be EOF) so that the next read will
112 return the current character (or EOF) again.
113 */
114 public void unread() throws IOException
115 {
116 char c = 0;
117
118 if (pos == 0)
119 {
120 throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far");
121 }
122 else{
123 c = buf[--pos];
124 if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c));
125 }
126
127 adjustUnreadLineNum(c);
128 }
129
130 /**
131 Unreads the specified number of characters
132 */
133 public void unread(int count) throws IOException
134 {
135 for (int n = 0; n < count; n++) {
136 unread();
137 }
138 }
139
140 /**
141 Useful for inserting included files into the stream and then parsing that content in-line
142 with the rest of the file.
143 */
144 public void insertIntoStream(File file) throws IOException
145 {
146 char[] insert = IOUtil.fileToCharArray(file, encoding);
147
148 char[] result = new char[buf.length + insert.length];
149 System.arraycopy(buf, 0, result, 0, pos);
150 System.arraycopy(insert, 0, result, pos, insert.length);
151 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
152
153 buf = result;
154 }
155
156 /**
157 Useful for inserting included files into the stream and then parsing that content in-line
158 with the rest of the file.
159 */
160 public void insertIntoStream(Reader r) throws IOException
161 {
162 char[] insert = IOUtil.readerToCharArray(r);
163
164 char[] result = new char[buf.length + insert.length];
165 System.arraycopy(buf, 0, result, 0, pos);
166 System.arraycopy(insert, 0, result, pos, insert.length);
167 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos);
168
169 buf = result;
170 }
171
172
173 void adjustReadLineNum(char c)
174 {
175 // we can read: \r, \r\n , \n all of which increase line count by exactly 1
176 switch (c)
177 {
178 case '\n':
179 if (! lastWasCR) {
180 line++;
181 lastcol=col;
182 col=1;
183 }
184 else {
185 lastWasCR = false;
186 }
187 break;
188
189 case '\r':
190 line++;
191 lastcol=col;
192 col=1;
193 lastWasCR = true;
194 break;
195
196 case '\t':
197 col = col + 4;
198 break;
199
200 default:
201 col++;
202 }
203 }
204
205
206 void adjustUnreadLineNum(char c)
207 {
208 // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1
209 switch (c) {
210 case '\n':
211 pushBackNL = true;
212 line--;
213 col=lastcol;
214 break;
215 case '\r':
216 if (! pushBackNL) {
217 line--;
218 col=lastcol;
219 }
220 else{
221 pushBackNL = false;
222 }
223 break;
224 case '\t':
225 col = col - 4;
226 break;
227 default:
228 col--;
229 }
230 }
231
232 public int peek() throws IOException
233 {
234 return buf[pos];
235 }
236
237
238 /**
239 Skips all whitespace characters such that the next {@link read} will
240 return the <b>next</b> non-whitespace character (or EOF if there are no
241 more characters).
242 */
243 public void skipWhitespace() throws IOException
244 {
245 int c = -1;
246 while (true)
247 {
248 c = read();
249
250 if (c == -1) {
251 break;
252 }
253
254 if (! Character.isWhitespace(c)) {
255 unread();
256 break;
257 }
258 }
259 }
260
261
262
263 /**
264 Tries to read/consumes the specified char and returns true
265 if successful. If the specified char is not found, does not
266 consume anything and returns false.
267 */
268 public boolean match(int target) throws IOException
269 {
270 int c = read();
271
272 if (c == target)
273 return true;
274 else
275 unread();
276
277 return false;
278 }
279
280 /**
281 Tries to read/consumes the specified non-null string and returns true
282 if successful. If the specified string is not found, does not
283 consume anything and returns false.
284 */
285 public boolean match(String target) throws IOException
286 {
287 if (target == null)
288 throw new IllegalArgumentException("Specified target string was null");
289
290 int c = -1;
291 for (int i = 0; i < target.length(); i++)
292 {
293 c = read();
294
295 if ( c == -1 || c != target.charAt(i)) {
296 unread(i+1);
297 return false;
298 }
299 }
300
301 return true;
302 }
303
304 public boolean matchIgnoreCase(String target) throws IOException
305 {
306 if (target == null)
307 throw new IllegalArgumentException("Specified target string was null");
308
309 int c = -1;
310 for (int i = 0; i < target.length(); i++)
311 {
312 c = read();
313
314 if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) {
315 unread(i+1);
316 return false;
317 }
318
319 }
320
321 return true;
322 }
323
324 public boolean markSupported()
325 {
326 return false;
327 }
328
329 public int getLine() {
330 return line;
331 }
332
333 public int getCol() {
334 return col;
335 }
336
337 char[] getBuf() { return buf; }
338 int getPos() { return pos; }
339
340 //other utility methods
341
342 public static void main (String args[]) throws IOException
343 {
344 //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */
345 StringReader sr = null;
346 PageReader lex = null;
347 int c = -1;
348
349 System.out.println("Reading an empty string.....");
350 sr = new StringReader("");
351 lex = new PageReader(sr);
352 while ( (c = lex.read()) != -1) {
353 testprint(lex, c);
354 }
355
356 System.out.println("----------------- TEST 2 --------------");
357 sr = new StringReader("abc");
358 lex = new PageReader(sr);
359 while ( (c = lex.read()) != -1) {
360 testprint(lex, c);
361 //System.out.print(c + " ");
362 }
363
364 System.out.println("----------------- TEST 3 --------------");
365 sr = new StringReader("abcde");
366 lex = new PageReader(sr);
367 try {
368 c = lex.read();
369 testprint(lex, c);
370 lex.unread();
371 testprint(lex, -10);
372 lex.unread();
373 testprint(lex, -10);
374 c = lex.read();
375 testprint(lex, c);
376 }
377 catch (Exception e) {
378 e.printStackTrace();
379 }
380
381 System.out.println("----------------- TEST 4 --------------");
382 sr = new StringReader("abcd\ne");
383 lex = new PageReader(sr);
384 try {
385 c = lex.read();
386 testprint(lex, c);
387 lex.unread();
388 testprint(lex, -10);
389
390 for (int i = 0; i < 5; i++) {
391 c = lex.read();
392 testprint(lex, c);
393 }
394
395 for (int i = 0; i < 5; i++) {
396 lex.unread();
397 testprint(lex, -10);
398 }
399
400 for (int i = 0; i < 5; i++) {
401 c = lex.read();
402 testprint(lex, c);
403 }
404
405 c = lex.read();
406 testprint(lex, c);
407 }
408 catch (Exception e) {
409 e.printStackTrace();
410 }
411
412 System.out.println("----------------- TEST 5 --------------");
413 sr = new StringReader("abcd\r\ne");
414 lex = new PageReader(sr);
415 try {
416 c = lex.read();
417 testprint(lex, c, lex.peek());
418 lex.unread();
419 testprint(lex, -10, lex.peek());
420
421 for (int i = 0; i < 5; i++) {
422 c = lex.read();
423 testprint(lex, c, lex.peek());
424 }
425
426 for (int i = 0; i < 5; i++) {
427 lex.unread();
428 testprint(lex, -10, lex.peek());
429 }
430
431 for (int i = 0; i < 5; i++) {
432 c = lex.read();
433 testprint(lex, c, lex.peek());
434 }
435
436 c = lex.read();
437 testprint(lex, c, lex.peek());
438 }
439 catch (Exception e) {
440 e.printStackTrace();
441 }
442
443 System.out.println("--------- TEST 6 ---(insert into stream middle)-------");
444 sr = new StringReader("abc");
445 lex = new PageReader(sr);
446
447 try {
448 c = lex.read();
449 testprint(lex, c);
450
451 StringReader insert = new StringReader("123");
452 System.out.println("inserting \"123\" into the stream\n");
453 lex.insertIntoStream(insert);
454
455 while ( (c = lex.read()) != -1) {
456 testprint(lex, c);
457 }
458 }
459 catch (Exception e) {
460 e.printStackTrace();
461 }
462
463
464 System.out.println("--------- TEST 7 ---(insert into stream begin)-------");
465 sr = new StringReader("abc");
466 lex = new PageReader(sr);
467
468 try {
469 StringReader insert = new StringReader("123");
470 System.out.println("inserting \"123\" into the beginning of stream\n");
471 lex.insertIntoStream(insert);
472
473 while ( (c = lex.read()) != -1) {
474 testprint(lex, c);
475 }
476 }
477 catch (Exception e) {
478 e.printStackTrace();
479 }
480
481 System.out.println("--------- TEST 8 ---(insert into stream end)-------");
482 sr = new StringReader("abc");
483 lex = new PageReader(sr);
484
485 try {
486 while ( (c = lex.read()) != -1) {
487 testprint(lex, c);
488 }
489 StringReader insert = new StringReader("123");
490 System.out.println("inserting \"123\" into the end of the stream\n");
491 lex.insertIntoStream(insert);
492
493 while ( (c = lex.read()) != -1) {
494 testprint(lex, c);
495 }
496 }
497 catch (Exception e) {
498 e.printStackTrace();
499 }
500
501 }
502
503 private static void testprint(PageReader lex, int c, int peek)
504 {
505 if (c == -1) {
506 System.out.println("====> recieved EOF (-1) from read().......");
507 }
508
509 System.out.format(
510 "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]",
511 StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length,
512 lex.getLine(), lex.getCol(),
513 (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c));
514
515 if (peek != -2)
516 System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek));
517
518 System.out.print("\n\n");
519 }
520
521 private static void testprint(PageReader lex, int c)
522 {
523 testprint(lex, c, -2);
524 }
525
526 }