/* * Copyright (c) 1998 Sojourn Software Design Lab, Inc. All Rights Reserved. * * This software is the confidential and proprietary information of Sojourn * Software Design Lab, Inc. ("Confidential Information"). You shall not * disclose such Confidential Information and shall use it only in * accordance with the terms of the license agreement you entered into * with Sojourn Labs. * * Sojourn Labs MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. * * **************************************************************************** * * Author Jenny Lam * version 0.1 8/7/98 * * **************************************************************************** * * Grammar file for HTML 4.0 . This file will be the input to SableCC, which * will generate a framework for a parser. * * ***************************************************************************/ Helpers ht = 0x0009; lf = 0x000a; ff = 0x000c; cr = 0x000d; nl = 0x0012; sp = ' '; number = ['0'..'9']; letter = ['a'..'z'] + ['A'..'Z']; nonnumeric = ['_' + [['a'..'z'] + ['A'..'Z']]]; line_terminator = lf | cr | nl | cr lf; st_comment = ''; greater_than = '>'; States normal, tag, elelist, attlist, attval; Tokens {normal} space_terminator = (sp | ht | ff | greater_than | line_terminator)*; comment = st_comment (unicode_input_character)* end_comment; unicode_input_character = [0..0xffff]; STAG = '<'; ETAG = 'elelist} attlist} ALIGN = 'align'; ALINK = 'alink'; ALT = 'alt'; ARCHIVE = 'archive'; BACKGROUND = 'background'; BGCOLOR = 'bgcolor'; BORDER = 'border'; CLEAR = 'clear'; CODE = 'code'; CODEBASE = 'codebase'; COLOR = 'color'; CONTENT = 'content'; COORDS = 'coords'; FACE = 'face'; HEIGHT = 'height'; HREF = 'href'; ID = 'id'; ISMAP = 'ismap'; LABEL = 'label'; LINK = 'link'; NAME = 'name'; REL = 'rel'; REV = 'rev'; SCHEME = 'meta'; SIZE = 'size'; SRC = 'src'; TARGET = 'target'; TEXT = 'text'; TYPE = 'type'; VERSION = 'version'; VLINK = 'vlink'; VSPACE = 'vspace'; WIDTH = 'width'; {attlist->attval} EQUAL = '='; color = '#' ((number* letter*) | (letter* number*)); Ignored Tokens space_terminator, comment; Productions htmldoc = {content} html_structure; html_structure = {struct} STAG [htmlo]: