(* Lexer for complete deterministic automata in XML form.
   Copyright (C) 2003, 2008 Jean Goubault-Larrecq and LSV, CNRS UMR 8643 & ENS Cachan.

   This file is part of h1.

   h1 is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   h1 is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with h1; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*)
%{
open "xmlauto_tab_h";

val xa_value = ref (xanone ());

fun unstringify s =
    let val |[put, convert, ...]| = outstring ""
        fun unstringify1 ("\\"::l) =
	    (case l of
		 "\""::l' => (put "\""; unstringify1 l')
	       | "\\"::l' =>  (put "\\"; unstringify1 l')
	       | "n"::l' => (put "\n"; unstringify1 l')
	       | "t"::l' => (put "\t"; unstringify1 l')
	       | "v"::l' => (put "\v"; unstringify1 l')
	       | "f"::l' => (put "\f"; unstringify1 l')
	       | "^"::c::l' => (put (chr (ord c-ord "@")); unstringify1 l')
               | c::l' =>
		 if ord c >= ord "0" andalso ord c <= ord "9"
		     then let val c' :: c'' :: l'' = l'
			      val n = (ord c - ord "0") * 100
			      val n' = n + (ord c' - ord "0") * 10
			      val n'' = n' + (ord c'' - ord "0")
			  in
			      put (chr n'')
			  end
		 else skip l)
          | unstringify1 ("\"":: _) = ()
          | unstringify1 (c::l) = (put c; unstringify1 l)
          | unstringify1 nil = ()
        and skip ("\\"::l) = unstringify1 l
	  | skip (_ :: l) = skip l
    in
       unstringify1 (tl (explode s)); (* extremely inefficient, but so what? *)
       convert ()
    end;

%}

SPACE [\ \t\f\n\r]+
DIG [0-9]
FMT [\ \t\n\f]
CAPITAL [A-Z]
SMALL [a-z]
LETTER ({CAPITAL}|{SMALL})
ALPHA [A-Za-z0-9\'\_]
SYMB [\!\%\&\$\#\+\-\/\:\<\=\>\?\@\\\~\'\^\|\*\`]
INT ({DIG}+)

%x MODEL
%x XMLCOMMENT
%x XML
%x INTG

%%

<INITIAL>{
\<\?xml{SPACE}version=\"[0-9.]*\"{SPACE}encoding=\"[^"]*\"\?\>	{ glex_begin (yyd, XML); return kw_xml }
"+++ BEGIN MODEL" { glex_begin(yyd,MODEL); return kw_model }
{SPACE} continue
. continue
}

<XML>{
"<!--"	{ glex_begin (yyd, XMLCOMMENT); continue }
{SPACE}	continue
"<definitions"	return kw_open_definitions
"</definitions"	return kw_close_definitions
"<definition"	return kw_open_definition
"</definition"	return kw_close_definition
"<states"	return kw_open_states
"</states"	return kw_close_states
">"		return kw_end_tag
"/>"		return kw_short_end_tag
"<state"	return kw_open_state
"</state"	return kw_close_state
"<satisfies"	return kw_open_satisfies
"</satisfies"	return kw_close_satisfies
"<tables"	return kw_open_tables
"</tables"	return kw_close_tables
"<table"	return kw_open_table
"</table"	return kw_close_table
"<entry"	return kw_open_entry
"</entry"	return kw_close_entry
"<result"	return kw_open_result
"</result"	return kw_close_result
"<arg"		return kw_open_arg
"</arg"		return kw_close_arg
"name"		return kw_name
"arity"		return kw_arity
"arg"		return kw_arg
"cond"		return kw_cond
"result"	return kw_result
"="		return kw_equal
\"([^\"\\]|(\\([\"\\ntvf]|(\^[\@A-Z\[\\\]\^\_])|({DIG}{DIG}{DIG})|({FMT}*\\))))*\"	{
  xa_value := xastring (unstringify (glex_text yyd)); return string_constant }
.	{ #put stderr "Unrecognized character: ";
          #put stderr (glex_text yyd);
          #put stderr "\n"; continue }
<<EOF>>	{ glex_begin (yyd, 0); glex_flush (yyd, glex_current_buffer yyd);
                  raise EOFEncounteredEvt }
}

<XMLCOMMENT>{
  "-->"	{ glex_begin (yyd, XML); continue }
  {SPACE} continue
  . continue
  <<EOF>>	{ glex_begin (yyd, 0); glex_flush (yyd, glex_current_buffer yyd);
                  raise XmlUnterminatedCommentEvt }
}

<INTG>{
{INT}	{ xa_value := xaint (intofstring (glex_text yyd)); glex_begin(yyd,MODEL); return intg }
[\ \t]+	continue
(.|\n\r)	{ #put stderr "Expecting integer, got character: ";
          #put stderr (glex_text yyd);
          #put stderr "\n"; continue }
<<EOF>>	{ glex_begin (yyd, 0); glex_flush (yyd, glex_current_buffer yyd);
                  raise EOFEncounteredEvt }
}

<MODEL>{
"+++ END MODEL" { glex_begin(yyd,INITIAL); continue }
{SPACE}	continue
\%[ \t]*domain[ \t]+size[ \t]+is	{ glex_begin(yyd,INTG);
					 (* #put stderr "Domain size found\n";
					  #flush stderr (); *)
					 return kw_domain_size }
\%[ \t]*\n	continue
\%[ \t]*[^ \td].*\n	continue
"("	return kw_open_paren
")"	return kw_close_paren
","	return kw_comma
"="	return kw_equal
"<=>"	return kw_iff
"$true"	return kw_true
"$false"	return kw_false
\!{INT}	{ xa_value := xastring (glex_text yyd); return statename }
\_*{SMALL}{ALPHA}*	{ xa_value := xastring (glex_text yyd); return identifier }
\_*{CAPITAL}{ALPHA}*	{ xa_value := xastring (glex_text yyd); return VAR }
\_*{INT}		{ xa_value := xastring (glex_text yyd); return identifier }
.	{ #put stderr "Unrecognized character: ";
          #put stderr (glex_text yyd);
          #put stderr "\n";
          #flush stderr (); continue }
<<EOF>>	{ glex_begin (yyd, 0); glex_flush (yyd, glex_current_buffer yyd);
                  raise EOFEncounteredEvt }
}

%%
