/* parser.mly - parse simple HTML, aware of tags that don't need an end */
%{
(* This is where some OCaml code could go *)
(*let debug msg =
print_string msg;
print_newline() *)
let debug msg = ()
let kill_quotes str =
if String.get str 0 = '"' then
String.sub str 1 ((String.length str) - 3)
else str
%}
%token LEFT RIGHT SLASH
%token IDENT NB_IDENT
%token EQUAL
%token STRING OTHER COMMENT
%token EOF
%start html
%type html
%%
html: content EOF
{
debug "Got HTML!";
$1
}
| EOF { [] }
content:
| content_item content
{
debug "Got content_item [more]";
$1::$2
}
| content_item
{
debug "Got content_item";
$1::[]
}
content_item:
| open_tag
{
debug "Open Tag";
$1
}
| close_tag
{
debug "Close Tag";
$1
}
| text
{
debug "Text";
Node.Text($1,!Node.lineNum, !Node.curFile)
}
| COMMENT
{
debug "Comment";
Node.Comment($1,!Node.lineNum, !Node.curFile)
}
open_tag:
| LEFT IDENT attrs RIGHT
{
debug "open_tag with attrs";
Node.Tag($2,$3,[],!Node.lineNum, !Node.curFile)
}
| LEFT IDENT RIGHT
{
debug "open_tag with no attrs";
Node.Tag($2,[],[],!Node.lineNum, !Node.curFile)
}
| LEFT NB_IDENT attrs RIGHT
{
debug "NB tag";
Node.STag($2,$3,!Node.lineNum, !Node.curFile)
}
| LEFT NB_IDENT RIGHT
{
debug "NB tag";
Node.STag($2,[],!Node.lineNum, !Node.curFile)
}
close_tag:
| LEFT SLASH IDENT RIGHT
{
debug "close_tag";
Node.CloseTag($3,!Node.lineNum, !Node.curFile)
}
| LEFT SLASH NB_IDENT RIGHT
{
debug "Close NB Tag";
Node.CloseTag($3,!Node.lineNum, !Node.curFile)
}
attrs:
| IDENT EQUAL text_item attrs
{
debug "attr";
let noquotes = kill_quotes $3 in
($1,noquotes)::$4
}
| IDENT EQUAL text_item
{
debug "attr";
let noquotes = kill_quotes $3 in
($1,noquotes)::[]
}
text:
| text_item text
{
debug "text_item [more]";
$1 ^ $2
}
| text_item
{
debug "text_item";
$1
}
text_item:
| STRING
{
debug ("string" ^ $1);
$1 ^ " "
}
| IDENT
{
debug ("ident: " ^ $1);
$1 ^ " "
}
| NB_IDENT
{
debug ("nb_ident: " ^ $1);
$1 ^ " "
}
| SLASH
{
debug ("text - slash");
"/"
}