/* parser.mly - parse simple HTML, aware of tags that don't need an end */ %{ (* This is where some OCaml code could go *) (*let debug msg = print_string msg; print_newline() *) let debug msg = () let kill_quotes str = if String.get str 0 = '"' then String.sub str 1 ((String.length str) - 3) else str %} %token LEFT RIGHT SLASH %token IDENT NB_IDENT %token EQUAL %token STRING OTHER COMMENT %token EOF %start html %type html %% html: content EOF { debug "Got HTML!"; $1 } | EOF { [] } content: | content_item content { debug "Got content_item [more]"; $1::$2 } | content_item { debug "Got content_item"; $1::[] } content_item: | open_tag { debug "Open Tag"; $1 } | close_tag { debug "Close Tag"; $1 } | text { debug "Text"; Node.Text($1,!Node.lineNum, !Node.curFile) } | COMMENT { debug "Comment"; Node.Comment($1,!Node.lineNum, !Node.curFile) } open_tag: | LEFT IDENT attrs RIGHT { debug "open_tag with attrs"; Node.Tag($2,$3,[],!Node.lineNum, !Node.curFile) } | LEFT IDENT RIGHT { debug "open_tag with no attrs"; Node.Tag($2,[],[],!Node.lineNum, !Node.curFile) } | LEFT NB_IDENT attrs RIGHT { debug "NB tag"; Node.STag($2,$3,!Node.lineNum, !Node.curFile) } | LEFT NB_IDENT RIGHT { debug "NB tag"; Node.STag($2,[],!Node.lineNum, !Node.curFile) } close_tag: | LEFT SLASH IDENT RIGHT { debug "close_tag"; Node.CloseTag($3,!Node.lineNum, !Node.curFile) } | LEFT SLASH NB_IDENT RIGHT { debug "Close NB Tag"; Node.CloseTag($3,!Node.lineNum, !Node.curFile) } attrs: | IDENT EQUAL text_item attrs { debug "attr"; let noquotes = kill_quotes $3 in ($1,noquotes)::$4 } | IDENT EQUAL text_item { debug "attr"; let noquotes = kill_quotes $3 in ($1,noquotes)::[] } text: | text_item text { debug "text_item [more]"; $1 ^ $2 } | text_item { debug "text_item"; $1 } text_item: | STRING { debug ("string" ^ $1); $1 ^ " " } | IDENT { debug ("ident: " ^ $1); $1 ^ " " } | NB_IDENT { debug ("nb_ident: " ^ $1); $1 ^ " " } | SLASH { debug ("text - slash"); "/" }