-- HamsterSpeak Compiler

-- (C) Copyright 2001-2024 James Paige, Ralph Versteegen, and the OHRRPGCE Developers
-- Dual licensed under the GNU GPL v2+ and MIT Licenses. Read LICENSE.txt for terms and disclaimer of liability.

-- This is a compiler for HamsterSpeak scripts used for plotscripting in
-- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com

-- This code is written in Euphoria 4.0. You can get the Free Open-Source
-- Euphoria compiler from http://OpenEuphoria.com . I also highly recommend
-- David Cuny's EE editor

---------------------------------------------------------------------------

--Changelog
--3W  2024-02-06 Add srcpos debug info, local var names, and srcfiles.txt
--3Vb 2024-02-06 Disallow local/subscript or script/global with the same name
--    2024-01-11 Fix lexing of identifiers ending in : & | or -
--3Va 2022-04-08 Add exit
--3V  2022-04-07 Add showvalueof, and tracevalueof alias
--3Ug 2021-12-12 Change license to Dual GPL+MIT (with consent of all contributors)
--3Uf 2020-02-29 Accept non-breaking space as whitespace
--3Ue 2019-09-09 Windows: fix double-inclusion detection
--3Ud 2019-07-12 Windows: double-include detection now case-insensitive
--3Uc 2019-03-25 Enforce at most 32 args allowed
--3Ub 2018-11-17 Fix some error messages
--3Ua 2018-08-18 Script can also have attributes
--3U  2018-08-12 @obsolete attribute
--3Tl 2018-05-14 Delete (possibly-bad) .hs output file on error
--3Tk 2018-03-18 Optimise binary_compile; puts nodes in opposite order
--3Tj 2018-03-18 Fix some number and - lexing bugs
--3Ti 2018-03-17 More compiler global_scope optimisations
--3Th 2018-03-15 Optimize compiler, rewriting global_scope lookup
--3Tg 2017-12-31 Fix crash due to missing KIND_LONGNAMES[KIND_KEYWORD]
--3Tf 2017-11-08 Specialise assert(x==y) to use _asserteq
--3Te 2017-10-29 Look for plotscr.hsd/scancode.hsi first in global dirs
--3Td 2017-10-07 Add parent ID to scripts.bin, for --reuse-ids
--3Tc 2017-10-02 Make -j wait for warnings too
--3Tb 2017-09-28 Added -j option; warn on unknown options
--3Ta 2017-07-28 Increased MAXGLOBAL to 50000
--    2017-06-13 0x, 0o and 0b number prefixes
--3T  2017-06-11 Allow subscript vars/args to shadow outer locals
--3Se 2017-05-17 Disallow extra commas
--3Sd 2017-03-17 --incdir option; check ../share/games/ohrrpgce/ instead
--3Sc 2016-04-11 Check /usr/share/games/ohrrpgce/ for include files
--3Sb 2014-11-11 Better 'include' line lexing & garbage detection
--3Sa 2014-08-23 Fix printing of a lexer error message
--3S  2014-07-22 Automatically include plotscr.hsd and ignore
--               double inclusion; added --include and -b
--3Rg 2013-02-23 Fix a subscript related crash
--3Rf 2013-02-17 Fix several 31-bit int overflows
--3Re 2014-02-17 Trivial change to math builtin warnings
----- 2013-11-09 Prevent misuse of betweenable-operators in
--               declarations, and allow constants for global ids
--3Rc 2013-10-11 subscript implementation changes (add nonlocal kind)
--3Rb 2013-10-10 Remove variable() later, for syntax checking
--3Ra 2013-10-09 Fixed \r lineendings (AGAIN) and slow \r\n handling
--3R  2013-10-05 Add subscript
--3Qc 2013-10-04 Disallow flow statements used as expressions
--3Qb 2013-07-24 Require begin after then, do, etc. (else excluded)
--3Qa 2013-04-11 Warn when plotscr.hsd not included
----- 2013-02-28 Allow non-constant expressions in case blocks again
--3Pe 2013-01-03 Faster compile times
--3Pd 2012-10-24 Fix \x escape code
--3Pc 2012-10-09 Change srcpos encoding to remove source size limits
--3Pb 2012-10-08 \x escape code
--3Pa 2012-08-24 --reuse-ids option
--3P  2012-08-18 Change way include is lexed: quotes no longer needed
--               [], {}, . now totally disallowed
--               Add < and > operators
--               Add plotscrversion block and new HS header lump
--               New switch syntax: do after case optional, case(else)
--               Added elseif
--3Of 2012-08-18 Lexer bugfix
--3Oe 2012-04-11 Add \n and \t escape codes
--3Od 2012-03-03 Change directory search order slightly
--3Oc 2012-02-07 Increased MAXGLOBAL to 16383
--3Ob 2012-02-07 Make the max script global number set by a constant
--3Oa 2012-01-20 Fix lexer allowing newlines in strings
--3O  2012-01-19 Unicode support
--3Nd 2011-10-16 Fix commands.bin generation bug
--3Nc 2011-08-28 Additional expression optimisations
--3Nb 2011-08-28 Fix overflow_int32 and bitwise operator overflow
--3Na 2011-07-25 Add -x option for Hamster Whisper's benefit
--3N  2011-07-12 Add assert
--3Ma 2010-10-21 Correctly strip \r to fix newlines ... again.
--3M  2010-05-02 abs, sign, sqrt; fix error reporting
--3L  2010-04-29 tracevalue support, involving a few internal changes
--3Ke 2010-04-18 Add -t commandline option
--3Kd 2010-04-14 Quite a few bugfixes in optimizer
--3Kc 2010-04-14 Start checking well-formedness of numbers with lexer
--3Kb 2010-04-12 Whole lot of bugfixes in script declarations and
--               checking for unreserved identifier names
--3Ka 2010-04-12 Throw error if source file contains binary
--3K 2010-04-12 Error column reporting (introducing srcpos); lump
--              original source files into source.lumped
--3Ja 2010-04-07 Wrote a (nearly) proper lexer
--3J 2009-12-05 Multiple function names may now alias the same id
--3I 2009-07-27 Export a backup copy of full source to source.txt lump
--              disable with -n option
--3H 2009-06-18 Export function names to commands.bin lump
--3Gk 2009-05-25 Fix short references crash
--3Gj 2009-01-27 Show future-compatability warning when using square
--               brackets [] and curly brackets {} in names
--3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake
--3Gh 2008-07-14 Disable warnings for unused variables by default, and
--               Add -u option to enable warnings for unused variables.
--3Gg 2008-03-01 Raise global variable limits
--3Gf 2008-02-11 Give user another chance on missing Y key :P
--3Ge 2007-07-01 String conversion rewrite to allow more whitespace
--3Gd 2007-07-01 Allow constants for new syntax default args
--3Gc 2007-05-29 Handle empty paratheses correctly, and use
--               lowercase ".hs" extension
--3Gb 2007-05-09 Throw error on missing include file
--3Ga 2007-05-09 String length not limited to 40
--3G 2007-02-09 Offset to string literal tables given as a 32-bit
--              int, script format version++ to 2.
--3F 2006-12-11 Support for variable number of arguments to functions
--              by giving -1 as the number of args in the define block
--3Ec 2006-11-30 while(true) no longer produces a warning
--3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes
--3Ea 2006-09-03 Actually export triggers in scripts.bin
--3E 2006-08-30 "define trigger" support
--3D 2006-08-16 String literal lookup tables added to end of
--              HSZ lumps replacing setstring + appendstring
--3C 2006-08-15 New syntax for giving default script arguments,
--              and for leaving out definescript
--3B 2006-08-13 HSX lumps renamed to HSZ
--3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format
--              version added to HSX header. Current is 1
--2Na 2006-07-07 Minor update to add new logical operators
--2N 2006-07-07 Added not() logic function
--2M 2006-07-06 Set exit code on warnings
--2L 2006-05-13 Added switch statement (+ case keyword)
--2K 2006-05-01 Added @scriptname and @globalvariable syntax to
--              return script or global ID number at compile-time
--              not run-time (for use with "run script by ID" and
--              "read global" and "write global")
--2J 2006-04-10 Added break, continue, exitscript, exitreturning
--              flow statements. Also fixed some return bugs
--2I 2006-04-04 Extended HSX header to include number of arguments
--              to a script, to really fix arguments-overflow-into
--              -locals bug
--2H 2006-03-29 Display better help and wait for keypress when run
--              by double-clicking the icon. Added -k command line
--              option to skip waiting for keypress
--2G 2005-10-03 Additional operators $+ and $=
--              Mention GPL in help text
--2F 2005-07-24 Strings implemented:
--              $id="..." -> setstring
--              $id+"..." -> appendstring
--2E 2005-02-15 Changed license to GPL
--              Added += and -= operators
--2D 2002-08-03 Only a small change, strip out \r from lines of
--              the script as we read them to avoid errors related
--              to busted newlines
--2C 2002-03-05 Fixed some bugs that could cause crashes when
--              non-printable characters exist in the input file.
--              (naturally the script will not compile right, but
--              at least it will not crash)
--2B 2001-06-06 Added := as a commaless separater so it can be
--              defined as an operator
--2A 2001-05-04 Fixed -w command line option when used with -z
--2             First Release

---------------------------------------------------------------------------

without warning       --to avoid annoying warnings
without type_check    --for a small speed boost
--with profile_time     --time profiling

include hsspiffy.e --various routines, sequence manipulation - James Paige
include machine.e  --needed for int_to_bytes
include std/math.e --needed for mod
include std/graphics.e --needed for color output
include std/filesys.e
include std/map.e
include euphoria/unicode.e
include euphoria/ucstypes.e
include euphoria/io_with_unicode.e

---------------------------------------------------------------------------
--constants--
constant false=0
constant true=1

-- increment COMPILER_VERSION only for major language-altering changes
constant COMPILER_VERSION="3"
-- For small changes increment the second letter of COMPILER_SUB_VERSION
-- For large changes increment the first letter and reset the second letter
--*Please make sure this is 2 chararacters long! Append a space if required.*
constant COMPILER_SUB_VERSION="W "
constant COPYRIGHT_DATE="2024"

--Known single-character commandline options
constant VALID_OPTIONS="abcdfjknstwuyz"

--This is the oldest version of plotscr.hsd which is supported (feel free to
--increment this whenever using an older plotscr.hsd means you're missing out
--on new but nonessential features; there's no harm)
--*Should be 3 characters long, same captialisation as COMPILER_SUB_VERSION*
constant MIN_PLOTSCR_VERSION="3Tf"

--these constants are color-flags. May add more colors, up to #F8FF
--These values are also passed directly to Hamster Whisper.
--(these codes are at the end of the first private use area)
constant COL_FIRST=#F8E0
constant COLYEL=#F8E0
constant COLRED=#F8E1
constant COLPNK=#F8E2
constant COLWHI=#F8E3
constant COLBWHI=#F8E4
constant COL_LAST=#F8FF
--Map from the above to the color constants in std/graphics.e
--(which incidentally vary from the constants in graphics.e)
constant HS_TO_EU_COLORS={YELLOW, RED, BRIGHT_RED, WHITE, BRIGHT_WHITE}

--These are character classes used in lexer_table
constant LEX_CHAR=0       --Most character which are valid in identifiers. Everything above char 255
constant LEX_SPACE=1      --space and tab
constant LEX_BEGIN=2      --(
constant LEX_END=3        --)
constant LEX_OPERATOR=4   --The first character of an operator
constant LEX_COMMENT=5    --#
constant LEX_STRING=6     --"
constant LEX_COMMA=7      --,
constant LEX_WARN=8       --[]{}.
constant LEX_BINARY=9     --Nonprintable characters
constant LEX_DIGIT=10     --0-9
constant LEX_HEXDIGIT=11  --a-fA-F
constant LEX_OPERATOR2=12 --Only used in lexer_table_key2!!
constant LEX_NEWLINE=13   --\n

constant FILE_NAME=1          --With whatever path was needed to open it
constant FILE_TEXT=2          --Entire contents of the file
constant FILE_LINE_EXTENTS=3  --List of {start,end} pairs; inclusive indices, not including newlines
constant FILE_OFFSET=4        --Used by srcpos's
constant FILE_LUMP=5
--POS_* index sequence returned by decode_srcpos
constant POS_FILENAME=1  --FILE_NAME of the file
constant POS_LINE=2      --1-based line number
constant POS_COLUMN=3    --0-based column number
constant POS_LENGTH=4    --token length in characters (might be 0)
constant POS_LINETEXT=5  --The line containing the token
constant POS_TOKTEXT=6   --The text of the token
constant CMD_TEXT=1
constant CMD_POS=2
--For global_scope
constant SYM_KIND=1     --KIND_*
constant SYM_ID=2       --ID or 0
constant SYM_POS=3      --srcpos or 0
constant PAIR_NUM=1
constant PAIR_NAME=2
constant PAIR_START=1
constant PAIR_END=2
constant OPER_TRUENAME=3
constant OPER_POS=4
--FUNC_* are for elements of math_list, function_list or script_list
--(When changing these, also update create_function() and math_list)
constant FUNC_ID=1      --(aka PAIR_NUM)
constant FUNC_NAME=2    --(aka PAIR_NAME)
constant FUNC_ARGS=3    --Either list of default values (each an integer or NO_DEFAULT), or VAR_ARGS
constant FUNC_ATTRS=4   --List of attributes as a list of {string,TokenList} pairs where the
                        --TokenList are the arguments. Like a Node except children are Tokens instead of Nodes.
constant FUNC_FLAGS=5   --Bitvector of FLAG_* bits. Also indicates presence of attributes. See attribute_list
constant FUNC_POS=6     --srcpos of start of function/script. Not used by math_list
--ATTR_* are for attribute_list
constant ATTR_NAME=1
constant ATTR_FLAGS=2   --An integer which gets OR'd into FUNC_FLAGS
constant FLAG_OBSOLETE=#01  --Has @obsolete attribute
constant GLB_POS=3      --srcpos of globalvariable definition
constant CONST_VALUE=1
constant CONST_POS=2
constant VAR_FRAME=3    --VarToken index, integer giving frame number of a variable (0 is current script, 1 is enclosing...)
constant VAR_ID=4       --VarToken index, integer giving ID in frame
constant REUSE_PARENT_AND_NAME=2

constant NO_DEFAULT={}  --when appears in script_list[FUNC_ARGS], indicates arg with no default value
constant VAR_ARGS=0     --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args

--KIND_* constants are used BOTH to describe the types of raw tokens, and tokens
--which been parsed into Nodes in an AST, although many of them can only appear
--as one or the other.
constant KIND_NUMBER=1      --int32. ID is the value
constant KIND_FLOW=2        --Flow control (flow_list), including begin and end.
constant KIND_GLOBAL=3      --Global variable (global_list)
constant KIND_LOCAL=4       --Local variable
constant KIND_MATH=5        --Math builtin function (math_list)
constant KIND_FUNCTION=6    --Builtin command (function_list)
constant KIND_SCRIPT=7      --Call to script (script_list)
constant KIND_NONLOCAL=8    --Nonlocal variable
--The following never appear in compiled scripts
constant KIND_REFERENCE=9   --Reference to a global or script. Converted to KIND_NUMBER in compiled script
constant KIND_OPERATOR=10   --Infix binary operator created by defineoperator. (operator_list)
                            --If a name like 'mod' is both operator and something else, it gets added
                            --to global_scope as the other thing, not KIND_OPERATOR.
constant KIND_PARENS=11     --Node created by extra pair of brackets around an expression
constant KIND_MACRO=12      --Expanded by hspeak, e.g. assert
constant KIND_KEYWORD=13    --Other builtin keywords that can appear in scripts but aren't flow constructs.
                            --Currently just 'variable' and 'subscript'.
constant KIND_CONSTANT=14   --A constant which hasn't been translated to a KIND_NUMBER yet
constant KIND_SEPARATOR=15  --Auto-delimiting tokens like +, if they haven't been turned into a
                            --KIND_OPERATOR yet. Can only be used in defineoperator. (separator_list)
constant KIND_ATTRIBUTE=16  --An @attribute
constant KIND_LAST_VALID_IN_SCRIPT=16
constant KIND_TOPLEVEL=17   --Toplevel keyword such as defineconstant, or script trigger, eg. plotscript
constant KIND_UNIMPLEMENTED=18
constant KIND_LAST=18

constant KIND_LONGNAMES={"number"
                        ,"flow control statement"
                        ,"global variable"
                        ,"local variable"
                        ,"built-in function"
                        ,"hard-coded function"
                        ,"script"
                        ,"non-local variable"
                        ,"reference"
                        ,"operator"
                        ,"order-of-operations-enforcing parentheses"
                        ,"hard-coded function/macro"
                        ,"keyword"
                        ,"constant"
                        ,"untranslated operator"
                        ,"attribute declaration"
                        ,"top-level declaration"
                        ,"unimplemented keyword"
                        ,"function attribute"
                        }

constant nil_sym={0,0,0}

constant TREE_TRUNK=1
constant TREE_BRANCHES=2
constant TIMING_DEPTH=1
constant TIMING_DESCRIPTION=2
constant TIMING_TIME=3
constant TIMING_ACTIVE=4

--Version number for the .hs/.hsp format (not including HSZ files!)
constant HSP_FORMAT_VERSION=1

constant CODE_START_BYTE_OFFSET=32
constant HSZ_FORMAT_VERSION=3

constant MAXGLOBAL=50000  --largest ID
constant MAX_LOCALS=100   --max number of locals per script (not including nonlocals)
constant MAX_ARGS=32     --max number of arguments to a script, function or builting

constant MAX_NEST_DEPTH=4

---------------------------------------------------------------------------
--globals--               --initializations--
string compiler_dir       compiler_dir=""
string include_dir        include_dir=""
string source_file        source_file=""
string dest_file          dest_file=""
integer dest_file_fh      dest_file_fh=0          --For use by error handlers only!
integer cleanup_dest      cleanup_dest=false      --Whether to delete dest_file on error
sequence optlist          optlist={}
sequence file_list        file_list={}            --type {FILE_NAME,FILE_TEXT,FILE_LINE_EXTENTS}
sequence additional_includes
                          additional_includes={}
srcpos max_srcpos         max_srcpos=0            --Maximum possible value of a valid srcpos (for typechecking)
srcpos script_position    script_position=0         --All srcpos's emitted to a .hsz are relative to this. (Length 0)
integer debug_info        debug_info=false
integer total_lines       total_lines=0
sequence cmd              cmd={}                  --TokenList (but type checking is very slow)
sequence reuse_ids        reuse_ids={}            --type {PAIR_NUM, REUSE_PARENT_AND_NAME={parent_id, PAIR_NAME}}
map constant_list         constant_list=map:new()
sequence trigger_list     trigger_list={}         --type {PAIR_NUM, PAIR_NAME}
sequence operator_list    operator_list={}        --type {PAIR_NUM, PAIR_NAME, OPER_TRUENAME, OPER_POS}
sequence function_list    function_list={}        --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS, FUNC_ATTRS, FUNC_FLAGS}
sequence global_list      global_list={{},{},{}}  --NOTE: three lists {PAIR_NUM, PAIR_NAME, GLB_POS}, not a list of globals
sequence string_list      string_list={}          --list of strings
sequence script_list      script_list={}          --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS, FUNC_ATTRS, FUNC_FLAGS}
--See also math_list, below
                                                  --(PAIR_NAME is the full name)
map global_scope          global_scope=map:new()  --Maps all names in global scope EXCEPT operators to {SYM_KIND, SYM_ID, SYM_POS} tuples

--SCRIPT_* are for all_scripts.
--The number of data fields used is variable, between _SCRIPT_MIN_LAST and _SCRIPT_MAX_LAST
constant SCRIPT_TRIGGER_TOK=1     --Trigger token (type {CMD_TEXT, CMD_POS}), e.g. "script"
constant SCRIPT_NAME=2            --String, not fully-scoped
constant SCRIPT_POS=3             --srcpos of script name token
constant SCRIPT_FULL_NAME=4       --Full-scoped script name
constant SCRIPT_ARG_TOKS=5        --List of argument name tokens (type {CMD_TEXT, CMD_POS})
                                  --Initially the tokens may include default values (like "a=4"), which are later removed
constant SCRIPT_BODY_TOKS=6       --List of tokens (type {CMD_TEXT, CMD_POS}) in the body, of the form do,begin,...,end
constant SCRIPT_PARENT_IDX=7      --Index in all_scripts of parent, or 0 for none
constant SCRIPT_NEST_DEPTH=8      --Nesting depth. A script is 0, subscript 1, sub-subscript 2...
constant SCRIPT_ATTRIBUTES=9      --Temporary store, do not use. These attributes get copied to script_list's FUNC_ATTRS
constant SCRIPT_END_POS=10        --A srcpos of the last token of the script ('end' or ')')
constant _SCRIPT_MIN_LAST=10
--The following are added later
constant SCRIPT_ID=11             --Script ID. Value not appended until all IDs are assigned in check_script_declarations
constant SCRIPT_TRIGGER_ID=12     --trigger ID. SCRIPT_TRIGGER_ID and later not appended until compile_a_script
constant SCRIPT_NUM_NONLOCALS=13

constant SCRIPT_VARIABLES=14      --VarList of locals and nonlocals
constant SCRIPT_AST=15
constant SCRIPT_BINARY=16
constant _SCRIPT_MAX_LAST=16

sequence all_scripts      all_scripts={}       --list of 'Script's == {SCRIPT_*} (variable length)

atom start_time           start_time=time()
atom run_time             run_time=0
sequence timing_data      timing_data={}
integer timing_depth      timing_depth=-1
integer get_cmd_pointer   get_cmd_pointer=0
integer autonumber_id     autonumber_id=32767
sequence flow_list        flow_list={
                              {0,"do"}
                             ,{1,"begin"}
                             ,{2,"end"}
                             ,{3,"return"}
                             ,{4,"if"}
                             ,{5,"then"}
                             ,{6,"else"}
                             ,{7,"for"}
                             ,{10,"while"}
                             ,{11,"break"}
                             ,{12,"continue"}
                             ,{13,"exitscript"}
                             ,{14,"exitreturning"}
                             ,{15,"switch"}
                             ,{16,"case"}  --never appears in compiled script
                             ,{17,"subscript"}  --never appears in compiled script
                          }
                          --elseif is not a flow type
sequence flow_requiring_brackets  --else is excluded, because of case(else)
                          flow_requiring_brackets={
                             "do",
                             "if",
                             "then",
                             "elseif",
                             "for",
                             "while",
                             "exitreturning",
                             "switch",
                             "case"
                          }
sequence math_list        math_list={
                            --{FUNC_ID, FUNC_NAME, FUNC_ARGS, FUNC_ATTRS, FUNC_FLAGS} (FUNC_POS omitted)
                              {0,"random",                {0,1}, {}, 0}
                             ,{1,"exponent",              {0,2}, {}, 0}
                             ,{2,"modulus",               {0,1}, {}, 0}
                             ,{3,"divide",                {0,1}, {}, 0}
                             ,{4,"multiply",              {0,0}, {}, 0}
                             ,{5,"subtract",              {0,0}, {}, 0}
                             ,{6,"add",                   {0,0}, {}, 0}
                             ,{7,"xor",                   {0,0}, {}, 0}
                             ,{8,"or",                    {0,0}, {}, 0}
                             ,{9,"and",                   {0,0}, {}, 0}
                             ,{10,"equal",                {0,0}, {}, 0}
                             ,{11,"notequal",             {0,0}, {}, 0}
                             ,{12,"lessthan",             {0,0}, {}, 0}
                             ,{13,"greaterthan",          {0,0}, {}, 0}
                             ,{14,"lessthanorequalto",    {0,0}, {}, 0}
                             ,{15,"greaterthanorequalto", {0,0}, {}, 0}
                             ,{16,"setvariable",          {0,0}, {}, 0}
                             ,{17,"increment",            {0,1}, {}, 0}
                             ,{18,"decrement",            {0,1}, {}, 0}
                             ,{19,"not",                  {0},   {}, 0}
                             ,{20,"logand",               {0,0}, {}, 0}
                             ,{21,"logor",                {0,0}, {}, 0}
                             ,{22,"logxor",               {0,0}, {}, 0}
                             ,{23,"abs",                  {0},   {}, 0}
                             ,{24,"sign",                 {0},   {}, 0}
                             ,{25,"sqrt",                 {0},   {}, 0}
                          }
--update is_simple_math_op on adding new operators

sequence attribute_list   attribute_list={     --type {ATTR_NAME, ATTR_FLAGS}
                             {"@obsolete",FLAG_OBSOLETE}
                          }

sequence separator_list   separator_list={
                             "+=","-=","$+","$=","$","+","--","/","*","^^","^"
                             ,"==","<>",">>","<<","<=",">=",">","<",":=","=","&&","||"
                          }
sequence lexer_table      lexer_table={}      --maps from ASCII value+1 (note +1!) to LEX_* constant
sequence lexer_table_key2 lexer_table_key2={}
string hex_chars          hex_chars="0123456789abcedfABCDEF"
sequence hex_char_values  hex_char_values={0,1,2,3,4,5,6,7,8,9,#a,#b,#c,#e,#d,#f,#A,#B,#C,#D,#E,#F}

sequence compiled_data    compiled_data={}    --used inside binary_compile only; compiled data for the current script
map node_lookup                               --used inside binary_compile only; map from serialised nodes to offsets
string current_script     current_script=""   --name of current script
integer cur_script_num    cur_script_num=0    --index into all_scripts of current script
integer colors_enabled    colors_enabled=true
integer simple_colorcodes simple_colorcodes=false
integer error_file        error_file=false
map used_globals          used_globals=map:new()  --set of ids (keys are ids, values are true)
map used_locals           used_locals=map:new()   --set of ids (keys are ids, values are true)
sequence string_table     string_table={}     --binary data
string plotscr_version    plotscr_version=""
integer max_used_function max_used_function=0 --maximum id of a function used in a script
integer fast_mode         fast_mode=false
integer end_anchor_kludge end_anchor_kludge=false
integer was_warnings      was_warnings=false

---------------------------------------------------------------------------
--types--

--A position in the script source (where newlines are one character).
--Encoded in 32-bit integer, where:
--bits 0..7 : Length of the token in characters, including whitespace
--            (aside from leading/trailing whitespace), capped to 254.
--            (255 reserved for future use)
--bit  8    : Virtual flag
--bits 9..31: The (1-based) character number of the start of the token in the file
--            plus the file's FILE_OFFSET
--
--The first file has FILE_OFFSET=1, to ensure srcpos 0 is never used.
--Offset FILE_OFFSET+0 is reserved to refer to the file as a whole (unused);
--FILE_OFFSET+1 is the start of the file.
--
--See also encode_srcpos and decode_srcpos
type srcpos(object pos)
  --need to allow 0 as a dummy value, and ignore max_srcpos if it's uninitialised)
  if atom(pos) and pos=floor(pos) and pos>=0 and (max_srcpos=0 or pos<=max_srcpos) then
    --odd, 'or' and 'and' only seem to shortcut when used in if condition, not
    --in a general expression
    return true
  else
    return false
  end if
end type

--Not the same as Euphoria's string type, which is limited to extended ASCII
type string(object str)
  if not sequence(str) then
    return false
  end if
  --Too slow
  -- for i=1 to length(str) do
  --   if not integer(str[i]) or str[i]<0 then
  --     return false
  --   end if
  -- end for
  return true
end type

type Kind(integer kind)
  return kind>=0 and kind<=KIND_LAST
end type

--SYM_KIND,SYM_ID,SYM_POS tuple
type Symbol(object sym)
  return length(sym)=3 and Kind(sym[SYM_KIND]) and srcpos(sym[SYM_POS])
end type

--A CMD_TEXT,CMD_POS pair
type Token(object tok)
  if sequence(tok) and length(tok)=2 and string(tok[1]) and srcpos(tok[2]) then
    return true
  else
    return false
  end if
end type

--A CMD_TEXT,CMD_POS,VAR_FRAME,VAR_ID tuple describing a local/nonlocal variable
type VarToken(object var)
  if length(var)=4 and string(var[1]) and srcpos(var[2]) and integer(var[3]) and integer(var[4]) then
    return true
  end if
  return false
end type

--sequence of Tokens
type TokenList(object list)
  if not sequence(list) then
    return false
  end if
  for i=1 to length(list) do
    if not Token(list[i]) then
      return false
    end if
  end for
  return true
end type

--sequence of VarTokens
type VarList(object vars)
  if not sequence(vars) then
    return false
  end if
  for i=1 to length(vars) do
    if not VarToken(vars[i]) then
      return false
    end if
  end for
  return true
end type

--{Token,children}
type Node(object node)
  if sequence(node) and length(node)=2 and Token(node[1]) and sequence(node[2]) then
    for i=1 to length(node[2]) do
      if not Node(node[2][i]) then
        return false
      end if
    end for
    return true
  else
    return false
  end if 
end type

--sequence of Nodes
type NodeList(object list)
  if not sequence(list) then
    return false
  end if
  for i=1 to length(list) do
    if not Node(list[i]) then
      return false
    end if
  end for
  return true
end type

--An element of all_scripts
type Script(object script)
  if sequence(script) and length(script)>=_SCRIPT_MIN_LAST and length(script)<=_SCRIPT_MAX_LAST then
    return true
  else
    return false
  end if
end type

---------------------------------------------------------------------------

--time spent waiting for a user-keypress shouldnt count
function timeless_wait_key()
  atom skip_time
  integer key
  skip_time=time()
  key=wait_key()
  skip_time=time()-skip_time
  start_time+=skip_time
  for i=1 to length(timing_data) do
    if timing_data[i][TIMING_ACTIVE] then
      timing_data[i][TIMING_TIME]+=skip_time
    end if
  end for
  return(key)
end function

---------------------------------------------------------------------------

--Euphoria's built-in sprintf truncates all string elements to 8 bits,
--so this is an Unicode-enabled replacement. Supports %d, %s, %g, %%
--Beware! This will be way slower than sprintf, but luckily it's not
--needed anywhere where speed matters.
function sprintf_utf(string s, sequence printf_args)
  integer seg_start, seg_end
  integer arg_ctr
  integer code
  string ret
  if length(printf_args)=0 then
    return s
  end if

  ret=""
  arg_ctr=1
  seg_start=1
  seg_end=find('%',s)
  while seg_end do
    ret&=s[seg_start..seg_end-1]
    code=s[seg_end+1]
    if code='s' then
      ret&=printf_args[arg_ctr]
      arg_ctr+=1
    elsif code='d' or code='g' then
      ret&=sprintf({'%',code},printf_args[arg_ctr])
      arg_ctr+=1
    elsif code='%' then
      ret&='%'
    else
      simple_error(sprintf("sprintf_utf: unrecognised format code %%%s\n",{s[seg_end+1]}))
    end if
    seg_start=seg_end+2
    seg_end=find('%',s,seg_start)
  end while
  if seg_start<=length(s) then
    ret&=s[seg_start..$]
  end if

  if arg_ctr!=length(printf_args)+1 then
    simple_error(sprintf("sprintf_utf: recieved %d format arguments, only used %d\n",{length(printf_args),arg_ctr-1}))
  end if
  return ret
end function

---------------------------------------------------------------------------

--fprintf as UTF8
procedure print_utf(object fh, string s, sequence printf_args)
  if length(printf_args) then
    s=sprintf_utf(s,printf_args)
  end if
  s=toUTF(s,utf_32,utf_8)
  puts(fh,s)
end procedure

---------------------------------------------------------------------------

--prints a string with printf to stdout converting color codes
procedure color_print(string s, sequence printf_args)
  string buffer
  s=sprintf_utf(s,printf_args)
  if simple_colorcodes then
    buffer=s
  else
    buffer=""
    for i=1 to length(s) do
      if s[i]>=COL_FIRST and s[i]<=COL_LAST then
        print_utf(stdout,buffer,{})
        buffer=""
        if colors_enabled then
          text_color(HS_TO_EU_COLORS[s[i]-COL_FIRST+1])
        end if
      else
        buffer&=s[i]
      end if
    end for
  end if
  if length(buffer) then
    print_utf(stdout,buffer,{})
  end if
end procedure

---------------------------------------------------------------------------

procedure opt_wait_for_key(integer error_or_warning)
  if (not error_or_warning and find('j',optlist)) or find('k',optlist) then
    --skip
  else
    color_print("[Press Any Key]\n",{})
    timeless_wait_key()
  end if
end procedure

---------------------------------------------------------------------------

procedure enter_timing_zone(string description)
  if not find('t',optlist) then
    return
  end if
  timing_depth+=1
  timing_data=append(timing_data,{timing_depth,description,-time(),true})
end procedure

---------------------------------------------------------------------------

procedure reenter_timing_zone(string description)
  if not find('t',optlist) then
    return
  end if
  for i=1 to length(timing_data) do
    if equal(timing_data[i][TIMING_DESCRIPTION],description) then
      timing_depth+=1
      timing_data[i][TIMING_DEPTH]=timing_depth  --could change...
      timing_data[i][TIMING_TIME]-=time()
      timing_data[i][TIMING_ACTIVE]=true
      return
    end if
  end for
  enter_timing_zone(description)
end procedure

---------------------------------------------------------------------------

procedure exit_timing_zone()
  if not find('t',optlist) then
    return
  end if
  for i=length(timing_data) to 1 by -1 do
    --there can only be one active timing job at each depth at a time
    if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then
      timing_data[i][TIMING_TIME]+=time()
      timing_data[i][TIMING_ACTIVE]=false
      exit
    end if
  end for
  timing_depth-=1
end procedure

---------------------------------------------------------------------------

procedure print_timing_data()
  string indent
  sequence data
  string tmp1, tmp2
  if run_time=0 then
    --so that all percentages show as 0%
    run_time=1e100
  end if
  for i=1 to length(timing_data) do
    data=timing_data[i]
    indent=repeat(' ',data[TIMING_DEPTH]*4)
    tmp1=sprintf("%.2f",{data[TIMING_TIME]})
    tmp2=sprintf("%5.2g",{100*data[TIMING_TIME]/run_time})
    color_print("%s "&COLBWHI&"%s"&COLWHI&"s %s%%  %s\n",{indent,tmp1,tmp2,data[TIMING_DESCRIPTION]})
  end for
end procedure

---------------------------------------------------------------------------

function html_char_convert(string s)
  string buffer
  string result
  result=""
  buffer=""
  for i=1 to length(s) do
    if s[i]=' ' and i>1 then
      if s[i-1]=' ' then
        buffer&="&nbsp;"
      else
        buffer&=s[i]
      end if
    elsif s[i]='<' then
      buffer&="&lt;"
    elsif s[i]='>' then
      buffer&="&gt;"
    else
      buffer&=s[i]
    end if
  end for
  if length(buffer) then
    result&=buffer
  end if
  return(result)
end function

---------------------------------------------------------------------------

function error_string_convert(string s)
  string buffer
  string result
  result=""
  buffer=""
  for i=1 to length(s) do
    if s[i]>=COL_FIRST and s[i]<=COL_LAST then
      if s[i]=COLYEL then
        buffer&="<font color=\"#F0F000\">"
      elsif s[i]=COLRED then
        buffer&="</font>"
      elsif s[i]=COLPNK then
        buffer&="<font color=\"#F07070\">"
      elsif s[i]=COLWHI then
        buffer&="<font color=\"#909090\">"
      elsif s[i]=COLBWHI then
        buffer&="<font color=\"#F0F0F0\">"
      end if
    elsif s[i]='\n' then
      buffer&="<br>\n"
    else
      buffer&=s[i]
    end if
  end for
  if length(buffer) then
    result&=buffer
  end if
  return(result)
end function

---------------------------------------------------------------------------

--prints a long string wrapped at 80 columns
procedure wrap_print(string s, sequence arguments)
  string outstring
  s=sprintf_utf(s,arguments)
  while length(s) do
    outstring=before_wrap_point(s)
    s=after_wrap_point(s)
    color_print("%s\n",{outstring})
  end while
end procedure

---------------------------------------------------------------------------

procedure error_file_print(string s)
  integer fh
  if error_file then
    fh=open(compiler_dir&"hs_error.htm","a")
    if fh!=failure then
      print_utf(fh,error_string_convert(s)&"\n",{})
      close(fh)
    end if
  end if
end procedure

---------------------------------------------------------------------------

--prints out warning message in red with word wrap
procedure simple_warn(string s)
  sequence pos
  if not find('w',optlist) then
    --do not warn if -w is set
    pos=get_position()
    if pos[2]>1 then
      print_utf(stdout,"\n",{})
    end if
    wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s})
    error_file_print(sprintf_utf("<Font color=\"#FF0000\">WARNING: %s</font>",{html_char_convert(s)}))
    was_warnings = true
  end if
end procedure

---------------------------------------------------------------------------

--prints out an error message in red with word wrap, then aborts
procedure simple_error(string s)
  sequence pos
  pos=get_position()
  if pos[2]>1 then
    print_utf(stdout,"\n",{})
  end if
  wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s})
  error_file_print(sprintf_utf("<font color=\"#FF0000\">ERROR: %s</font>",{html_char_convert(s)}))
  if end_anchor_kludge then
    error_file_print("</a>\n<hr>\n")
  end if
  opt_wait_for_key(true)
  --Custom doesn't check hspeak's error code (due to difficulty retrieving it after spawning),
  --it only checks whether the output .hs file was created.
  if cleanup_dest then
    if dest_file_fh then
      close(dest_file_fh)
    end if
    delete_file(dest_file)
  end if
  abort(1)
end procedure

---------------------------------------------------------------------------

--prints out usage info if not enough arguments
procedure check_arg_count(sequence args)
  if length(args)=2 then
    wrap_print("HamsterSpeak Compiler v%s%s\n",{COMPILER_VERSION,COMPILER_SUB_VERSION})
    wrap_print("(C)2001-%s James Paige, Ralph Versteegen, and the OHRRPGCE Developers\n",{COPYRIGHT_DATE})
    wrap_print("Please read LICENSE.txt for GNU GPL & MIT dual license details and disclaimer of liability",{})
    wrap_print(COLYEL&"%s [-%s] [long-options] source.hss [dest.hs]"&COLWHI&"\n\n",{file_only(args[2]),VALID_OPTIONS})
    color_print("   -f fast mode. Disables some optimization\n",{})
    color_print("   -j don't wait for a keypress when finished if there's no error or warning\n",{})
    color_print("   -k don't wait for a keypress when finished\n",{})
    color_print("   -n don't add debug info, or a copy of the source code, to .hs file\n",{})
    color_print("   -w suppress warnings\n",{})
    color_print("   -u show warnings for unused variables\n",{})
    color_print("   -y overwrite the destination file without asking\n",{})
    color_print("   --incdir <path>\n" &
                "      also search this directory for include files\n",{})
    color_print("   --include <filename>\n" &
                "      include an additional file\n",{})
    color_print("\nAdvanced options (not intended for normal use):\n",{})
    color_print("   -b don't include plotscr.hsd automatically\n",{})
    color_print("   -c colors will be disabled\n",{})
    color_print("   -d dump debug report to hs_debug.txt\n",{})
    color_print("   -s print the name of each plotscript as it is compiled\n",{})
    color_print("     -a same as -s, but printing all scripts\n",{})
    color_print("   -t print detailed timing info (for HSpeak developers)\n",{})
    color_print("   -z write error messages to hs_error.htm\n",{})
    --Undocumented arguments:
    --              --unicode-cols
    --                 used by Hamster Whisper for piped output
    --              --reuse-ids scripts.bin
    --                 Read the provided scripts.bin file from a previous compilation, and reuse script ID mapping
    wrap_print("\nFor more info about HamsterSpeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{})
    wrap_print("\nThis is a command-line program. You should either run it from the command-line, or drag and drop your script file onto it. You don't ever need to run HSpeak yourself; Custom will do that for you.\n",{})
    opt_wait_for_key(false)
    abort(0)
  end if
end procedure

---------------------------------------------------------------------------

--Build the lexer tables, which for each code point 0<=i<256, contains the type
--of that character in entry i+1 (have to handle NUL)
--Other characters above code 255 are always LEX_CHAR
procedure init_lexer()
  sequence table_insert

  --main table
  --(\xa0 is non-breaking space)
  table_insert={" (),\n#\t\"[]{}.\xa0",
                {LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_NEWLINE,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_SPACE}}
  --by default each character is either binary, or an allowed identifier
  --character. This is for backwards compatibility
  --NOTE the +1!
  lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,127-32) & repeat(LEX_BINARY,#A1-127) & repeat(LEX_CHAR,256-#A1)
  for i=1 to length(table_insert[1]) do
    lexer_table[table_insert[1][i]+1]=table_insert[2][i]
  end for
  for i=0 to 9 do
    lexer_table['0'+i+1]=LEX_DIGIT
  end for
  for i=0 to 5 do
    lexer_table['a'+i+1]=LEX_HEXDIGIT
    lexer_table['A'+i+1]=LEX_HEXDIGIT
  end for
  --add the one-character prefix of each operator
  for i=1 to length(separator_list) do
    lexer_table[separator_list[i][1]+1]=LEX_OPERATOR
  end for

  --keyword suffix parsing table
  lexer_table_key2=repeat(LEX_CHAR,256)
  lexer_table_key2[' '+1]=LEX_SPACE
  lexer_table_key2['\t'+1]=LEX_SPACE
  --2nd character of each operator
  for i=1 to length(separator_list) do
    if length(separator_list[i])=2 then
      lexer_table_key2[separator_list[i][2]+1]=LEX_OPERATOR2
    end if
  end for

  --when floating point is added will want a table for number parsing
end procedure

---------------------------------------------------------------------------

--initializes global variables, and generally gets things ready to roll
procedure init()
  sequence args
  integer index
  integer key
  integer fh
  sequence sym

  --Process arguments
  args=command_line()
  compiler_dir=path_only(args[2])
  check_arg_count(args)

  wrap_print("HamsterSpeak Compiler v%s%s\n",{COMPILER_VERSION,COMPILER_SUB_VERSION})

  optlist={}
  index=3
  while index<=length(args) do
    --These are undocumented
    if equal(args[index],"--unicode-cols") then
      simple_colorcodes=true
      args=delete_element(args,index)
    elsif equal(args[index],"--reuse-ids") and index+1<=length(args) then
      color_print(COLWHI&"Reading script IDs from previous compile\n",{})
      reuse_ids=read_scripts_dot_bin(args[index+1])
      args=delete_slice(args,index,index+1)
    elsif equal(args[index],"--include") and index+1<=length(args) then
      additional_includes&={args[index+1]}
      args=delete_slice(args,index,index+1)
    elsif equal(args[index],"--incdir") and index+1<=length(args) then
      include_dir=args[index+1]
      args=delete_slice(args,index,index+1)
    elsif args[index][1]='-' then
      optlist=optlist&hs_lower(args[index][2..length(args[index])])
      args=delete_element(args,index)
    else
      index+=1
    end if
  end while
  for i=1 to length(optlist) do
    if not find(optlist[i], VALID_OPTIONS) then
      simple_warn("Unknown commandline option -" & optlist[i])
    end if
  end for
  check_arg_count(args)
  source_file=normalize_filename(args[3])
  if length(args)>3 then
    dest_file=normalize_filename(args[4])
  else
    dest_file=normalize_filename(alter_extension(source_file,"hs"))
  end if
  if find('f',optlist) then
    fast_mode=true
    color_print("Using fast mode. Some size optimization disabled\n",{})
  end if
  if find('c',optlist) then
    colors_enabled=false
  end if
  if not find('n',optlist) then
    debug_info=true
  end if
  --'x' used by old versions of Hamster Whisper to specify color code format
  --However this clashes with Latin-1 characters and doesn't work with UTF-8 anyway
  --Replaced by --unicode-cols
  --the semi-undocumented command line argument -z writes a file called
  --hs_error.htm formatted for HssEd to read
  if find('z',optlist) then
    error_file=true
    if file_exists(compiler_dir&"hs_error.htm") then
      fh=open(compiler_dir&"hs_error.htm","w")
      if fh!=failure then
        puts(fh,"")
        close(fh)
      end if
    end if
  end if

  wrap_print("Compiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file})
  if file_exists(dest_file) then
    if find(extension_only(dest_file),{"hss","txt"}) then
      simple_error("Refusing to write output to "&dest_file&" because only input files should have hss/txt extension!")
    end if
    if find('y',optlist) then
      --found the -y command line arg, overwrite automatically
    else
      while true do
        --prompt the user to overwrite
        wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file})
        key=timeless_wait_key()
        color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key})

        if hs_lower(key)='y' then
          exit
        elsif hs_lower(key)='n' then
          simple_error("output file overwrite cancelled by user")
        else
          wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)})
        end if
      end while
    end if
  end if
  --From now on, delete dest_file on error
  cleanup_dest=true

  --Init global data tables
  global_scope=map:new_from_kvpairs({
           {"defineconstant",{KIND_TOPLEVEL,0,0}}
          ,{"definetrigger" ,{KIND_TOPLEVEL,0,0}}
          ,{"defineoperator",{KIND_TOPLEVEL,0,0}}
          ,{"globalvariable",{KIND_TOPLEVEL,0,0}}
          ,{"definefunction",{KIND_TOPLEVEL,0,0}}
          ,{"definescript"  ,{KIND_TOPLEVEL,0,0}}
          ,{"plotscrversion",{KIND_TOPLEVEL,0,0}}
          ,{"include"       ,{KIND_TOPLEVEL,0,0}}  --should never appear, in theory
          ,{"cfor"          ,{KIND_UNIMPLEMENTED,0,0}}
          ,{"foreach"       ,{KIND_UNIMPLEMENTED,0,0}}
          ,{"="             ,{KIND_UNIMPLEMENTED,0,0}}
          ,{"elseif"        ,{KIND_FLOW,-1,0}}  --not in flow_list... shouldn't that be a problem?
          ,{"exit"          ,{KIND_FLOW,-1,0}}  --not in flow_list
          --keywords have special case handling
          ,{"subscript"     ,{KIND_KEYWORD,0,0}} --subscript is also in flow_list!
          ,{"variable"      ,{KIND_KEYWORD,0,0}}
          ,{"tracevalue"    ,{KIND_MACRO,0,0}}
          ,{"tracevalueof"  ,{KIND_MACRO,0,0}}
          ,{"showvalueof"   ,{KIND_MACRO,0,0}}
          ,{"assert"        ,{KIND_MACRO,0,0}}
          -- Stray "$"'s are picked up by the lexer (not totally ideal maybe)
        })

  for i=1 to length(flow_list) do
    sym=flow_list[i]
    if map:has(global_scope, sym[PAIR_NAME])=false then  --omit subscript
      map:put(global_scope, sym[PAIR_NAME], {KIND_FLOW,sym[PAIR_NUM],0})
    end if
  end for
  for i=1 to length(math_list) do
    sym=math_list[i]
    map:put(global_scope, sym[PAIR_NAME], {KIND_MATH,sym[PAIR_NUM],0})
  end for
  -- Adding the separators (operators) here prevents them from being used before
  -- defineoperator is handled (either a block before include,plotscr.hsd, or in
  -- e.g. defineconstant)
  for i=1 to length(separator_list) do
    if map:has(global_scope,separator_list[i])=false then  --omit =
      map:put(global_scope,separator_list[i],{KIND_SEPARATOR,i,0})
    end if
  end for
  for i=1 to length(attribute_list) do
    map:put(global_scope,attribute_list[i][ATTR_NAME],{KIND_ATTRIBUTE,i,0})
  end for
  --Append subscript trigger id (this is needed for scripts.bin)
  trigger_list=append(trigger_list,{-1,"subscript"})
  --Lexer finite state transition tables
  init_lexer()
end procedure

---------------------------------------------------------------------------

--Returns offset component of a srcpos (usually you want to use decode_srcpos_file
--instead if you want to know the file offset)
function srcpos_offset(srcpos pos)
  integer ret
  --It appears to be a Euphoria euc bug that it's necessary to explicitly cast to integer
  ret=floor(pos/power(2,9))
  return ret
end function

---------------------------------------------------------------------------

--Returns length of a srcpos (between 0 and 254)
function srcpos_length(srcpos pos)
  return and_bits(pos,power(2,8)-1)
end function

---------------------------------------------------------------------------

--returns {filenumber,file position} pair, later being 1-based character offset
function decode_srcpos_file(srcpos pos)
  integer len, offset

  --0 is an invalid srcpos
  if pos=0 then
    simple_error("compiler bug: can't decode uninitialised srcpos")
  end if

  integer point
  point=srcpos_offset(pos)
  if not integer(point) or point<=0 then
    simple_error(sprintf("compiler bug: bad srcpos offset %d", {point}))
  end if

  for i=1 to length(file_list) do
    len=length(file_list[i][FILE_TEXT])
    offset=file_list[i][FILE_OFFSET]
    if point<=offset+len then
      return {i,point-offset}
      exit
    end if
  end for
  simple_error(sprintf_utf("compiler bug: can't decode invalid srcpos %d",{pos}))
end function

---------------------------------------------------------------------------

function srcpos_file_number(srcpos pos)
  sequence temp
  temp=decode_srcpos_file(pos)
  return temp[1]
end function

---------------------------------------------------------------------------

--Returns 1-based character number in file
function srcpos_file_offset(srcpos pos)
  sequence temp
  temp=decode_srcpos_file(pos)
  return temp[2]
end function

---------------------------------------------------------------------------

--Combine base/file offset (usually FILE_OFFSET), token offset (in
--characters from start of file counting from 1) and token length into
--a non-virtual a srcpos.  The redundancy between baseoffset and
--offset is for clarity.
function encode_srcpos(integer baseoffset, integer offset, integer len)
  if len > 254 then
    len = 254
  end if
  if offset < 1 or len < 0 then
    --offset 0 is allowed, it refers to the file. But if you wanted it you'd
    --just write file_list[i][FILE_OFFSET]. So disallow it here.
    simple_error(sprintf("Compiler bug! encode_srcpos(%d,%d,%d)\n",{baseoffset,offset,len}))
  end if
  return (baseoffset+offset)*power(2,9)+len
end function

---------------------------------------------------------------------------

--This transforms a srcpos into one marked as virtual. Indicates a roughly relevant position, but
--not something that actually occurs in the source. Use for inserted dummy code
function virtual_pos(srcpos pos)
  return(or_bits(pos,256))
end function

---------------------------------------------------------------------------

--Return just the original text of a token.
--Unlike decode_srcpos, this is quite fast
function get_srcpos_text(srcpos pos)
  integer point, tokenlen, filenum
  sequence temp

  temp=decode_srcpos_file(pos)
  filenum=temp[1]
  point=temp[2]
  tokenlen=srcpos_length(pos)
  if tokenlen=0 then
    return ""
  else
    return file_list[filenum][FILE_TEXT][point..point+tokenlen-1]
  end if
end function

---------------------------------------------------------------------------

--Probably slow: avoid heavy use.
--Returns {file name, line number, column number, token length, line text, token text}: index with POS_*
--Column number returned is 0-based
function decode_srcpos(srcpos pos)
  integer point, tokenlen, filenum, columnnum
  integer lineno, line_start, line_end
  string filetext, linetext, tokentext
  sequence lines
  sequence temp

  if pos=0 then
    simple_error("compiler bug: can't decode uninitialised srcpos")
  end if

  temp=decode_srcpos_file(pos)
  filenum=temp[1]
  point=temp[2]
  tokenlen=srcpos_length(pos)

  --Find the line it's on
  filetext=file_list[filenum][FILE_TEXT]
  lines=file_list[filenum][FILE_LINE_EXTENTS]
  for i=1 to length(lines) do
    if point>=lines[i][PAIR_START] then
      lineno=i
    else
      exit
    end if
  end for

  line_start=lines[lineno][PAIR_START]
  line_end=lines[lineno][PAIR_END]
  linetext=filetext[line_start..line_end]
  -- +1 for 1-based column indexing
  columnnum=point-line_start+1
  if tokenlen=0 then
    tokentext=""
  else
    tokentext=linetext[columnnum..min({length(linetext),columnnum+tokenlen-1})]
  end if

  return({file_list[filenum][FILE_NAME],lineno,columnnum,tokenlen,linetext,tokentext})
end function

---------------------------------------------------------------------------

function form_error_text(string s, srcpos pos)
  integer poscol, poslen, posend
  string posline, line_display
  sequence src_position
  string column_display, caret_display
  integer tab_compensate, token_tab_compensate
  src_position=decode_srcpos(pos)
  --?pos
  --pretty_print(1,src_position,{2})

  posline=src_position[POS_LINETEXT]
  poscol=src_position[POS_COLUMN]
  poslen=src_position[POS_LENGTH]

  --Expand tabs, because we don't know how the terminal displays then.
  line_display=substring_replace(posline,"\t","    ")
  posline&=" "  --Used to point to the newline at the end
  posend=poscol+large(0,poslen-1)
  if posend>length(posline) then  --One past the end of the actual line is allowed
    simple_warn(sprintf_utf("Compiler bug: srcpos past end of line %d", {src_position[POS_LINE]}))
    posend=length(posline)
  end if
  --Count number of tabs in the line before and in the token,
  --so that we can determine their display length
  tab_compensate=3*count('\t',posline[1..poscol-1])
  token_tab_compensate=3*count('\t',posline[poscol..posend])
  caret_display=repeat('^',large(1,poslen)+token_tab_compensate)
  column_display=repeat(' ',poscol-1+tab_compensate)

  error_file_print(sprintf_utf("<a href=\"%s#%d\">\n",{src_position[POS_FILENAME],src_position[POS_LINE]}))
  if length(current_script) then
    return(
      sprintf_utf(
         "in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n"
         &COLBWHI&"%s\n"
         &"%s%s\n"
         &COLRED&"%s\n"
        ,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line_display,column_display,caret_display,s}
      )
    )
  else
    return(
      sprintf_utf(
         "in line %d of "&COLPNK&"%s"&COLRED&"\n"
         &COLBWHI&"%s\n"
         &"%s%s\n"
         &COLRED&"%s\n"
        ,{src_position[POS_LINE],src_position[POS_FILENAME],line_display,column_display,caret_display,s}
      )
    )
  end if
  --note that the calling procedure must send/deal with closing tags to error_file_print
end function

---------------------------------------------------------------------------

procedure src_warn(string s, srcpos pos)
  if not find('w',optlist) then
    --do not warn if -w is set
    simple_warn(form_error_text(s,pos))
    error_file_print("</a>\n<hr>\n")
  end if
end procedure

---------------------------------------------------------------------------

procedure src_error(string s, srcpos pos)
  end_anchor_kludge=true
  simple_error(form_error_text(s,pos))
end procedure

---------------------------------------------------------------------------

--Find newlines in a file (possibly mixed type), returns line extents
--({start of line, end of line} pairs, both inclusive, newlines excluded)
--and the file_text with Mac lineendings converted to Unix, because the engine's
--decode_srcpos expects source.lumped contents don't use Mac lineendings.
--Note also read_file() adds a newline at the end if none.
function split_lines(string file_text)
  integer line_start=1
  integer last_13=-1
  sequence line_extents = {}
  for i=1 to length(file_text) do
    if file_text[i]=13 then
      line_extents=append(line_extents, {line_start,i-1})
      last_13=i
      line_start=i+1
      if i+1<=length(file_text) and file_text[i+1]!=10 then -- A \r lineending
        file_text[i]=10
      end if
    elsif file_text[i]=10 then
      if last_13!=i-1 then  -- handle {13,10} "\r\n"
        line_extents=append(line_extents, {line_start,i-1})
      end if
      line_start=i+1
    end if
  end for

  if line_start<=length(file_text) then
    line_extents=append(line_extents, {line_start,length(file_text)})
  end if
  return {line_extents,file_text}
end function

---------------------------------------------------------------------------

--Read a file, adding to file_list.
--pos is used only for location of include statement, may be 0.
procedure load_source(string filename, string reading_how, srcpos pos)
  integer fh
  string file_text
  sequence line_extents  -- {start of line, end of line} pairs
  integer offset  -- FILE_OFFSET value

  -- First file starts at FILE_OFFSET=1, to ensure srcpos 0 is never used,
  -- and each file's FILE_OFFSET+0 is reserved. First char of first file
  -- has offset 2.
  offset=1
  if length(file_list) then
    offset=file_list[$][FILE_OFFSET]+length(file_list[$][FILE_TEXT])+1
  end if

  fh=open(filename,"r")
  if fh!=failure then
    wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename})

    reenter_timing_zone("read_file")
    --Autodetect the encoding
    file_text=read_file(fh, TEXT_MODE, UTF)
    exit_timing_zone()
    close(fh)

    sequence temp=split_lines(file_text)
    line_extents=temp[1]
    file_text=temp[2]

    total_lines+=length(line_extents)
    file_list=append(file_list,{filename,file_text,line_extents,offset,""})
    --Maximum valid srcpos points to length 1 virtual token at end of file.
    --max_srcpos is used to do srcpos type checking... so we need to disable that
    --check before we can assign to it!
    max_srcpos=0
    max_srcpos=virtual_pos(encode_srcpos(offset,length(file_text),1))
  else
    if pos then
      src_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos)
    else
      simple_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}))
    end if
  end if
end procedure

---------------------------------------------------------------------------

--Load and lex, recursively finding includes
procedure load_and_lex_source(string filename, string reading_how, srcpos pos, sequence include_stack)
  string canon_filename

  --We don't really support \ in paths on Unix.
  canon_filename=normalize_filename(filename)
  ifdef WINDOWS then
    canon_filename=hs_lower(canon_filename)
  end ifdef

  if find(canon_filename,include_stack) then
    src_error(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included recursively",{filename}),pos)
  end if
  include_stack&={canon_filename}

  if find(canon_filename,column(file_list,FILE_NAME)) then
    --This exact same include file has already been included.
    --Silently ignore this include. Double includes are allowed because of automatic inclusion.
    --src_warn(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included multiple times",{include_name}),pos)
    return
  end if

  --Reads one additional file
  reenter_timing_zone("load_and_lex_source/reading")
  load_source(canon_filename,reading_how,pos)
  exit_timing_zone()
  --Lex that file, and load and lex all included files
  lex_file(length(file_list), include_stack)
end procedure

---------------------------------------------------------------------------

function find_include_file_globally(string include_name)
  if file_exists(compiler_dir&include_name) then
    return normalize_filename(compiler_dir&include_name)
  end if
  --in case we're installed at $prefix/games, try $prefix/share/games/ohrrpgce/
  if file_exists(compiler_dir&"../share/games/ohrrpgce/"&include_name) then
    return normalize_filename(compiler_dir&"../share/games/ohrrpgce/"&include_name)
  end if
  --Failure: return filename, used for error messages
  return include_name
end function

---------------------------------------------------------------------------

--Search for an include file
function find_include_file(string include_name)
  sequence ret
  if equal(include_name,"plotscr.hsd") or equal(include_name,"scancode.hsi") then
    --Prefer to use fundamental include files found in global locations, because
    --plotscr.hsd and scancode.hsi will be exported too, when exporting scripts
    --from Custom.
    return find_include_file_globally(include_name)
  end if
  --try source directory
  if file_exists(path_only(source_file)&include_name) then
    return normalize_filename(path_only(source_file)&include_name)
  end if
  if file_exists(include_dir&include_name) then
    return include_dir&include_name
  end if
  --try current directory (which will be where the .rpg is)
  if file_exists(include_name) then
    return include_name
  end if
  --try global locations
  return find_include_file_globally(include_name)
end function

---------------------------------------------------------------------------

procedure show_source_info()
  if total_lines then
    wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)})
  else
    simple_error("no data to compile\n")
  end if
  if file_list[$][FILE_OFFSET]+length(file_list[$][FILE_TEXT])>power(2,23) then
    simple_warn("You have over 8 MB of scripts! Unfortunately compile and in-game errors will report the wrong line numbers for some errors.")
  end if
end procedure

---------------------------------------------------------------------------

procedure lexer_binary_error(srcpos pos)
  --better use src_error, to track down garbage if accidentally creeping in
  src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos)
end procedure

---------------------------------------------------------------------------

--Lex a string, processing the escape codes.
--s is the source text (typically whole file contents), i is the starting offset in s,
--1 past the opening ", and stop is last permissible offset in s to include.
--Returns {closing_quote_offset,quoted_string} (i.e. enclosed in quote marks, but with escape codes processed)
function lexer_read_string(string s, integer i, integer stop, integer posoff)
  string string
  integer escaping
  integer at1, at2
  escaping=false
  string="\""
  while true do
    if i>stop then  --did not find a closing " so not a valid string
      src_error("Expected \" at end of line to end string (multi-line strings are not supported)", encode_srcpos(posoff,i,1))
    end if
    if s[i]<256 and lexer_table[s[i]+1]=LEX_BINARY then
      lexer_binary_error(encode_srcpos(posoff,i,1))
    end if
    if escaping then
      if s[i]='"' or s[i]='\\' or s[i]='\'' then
        --including ' for closer compatibility with Python
        string&=s[i]
      elsif s[i]='n' then
        string&=10
      elsif s[i]='t' then
        string&=9
      elsif s[i]='x' then
        --escape 8 bit values (later will add \u and \U for 16 bit and 32 bit escapes)
        at1=0
        at2=0
        if i+2<=stop then
          at1=find(s[i+1],hex_chars)
          at2=find(s[i+2],hex_chars)
        end if
        if at1=0 or at2=0 then
          src_error("Expected two hexidecimal characters to follow \\x escape code",encode_srcpos(posoff,i-1,4))
        end if
        string&={16*hex_char_values[at1]+hex_char_values[at2]}
        i+=2
      else
        --invalid sequence
        string&='\\'
        string&=s[i]
      end if
      escaping=false
    else
      if s[i]='"' then
        return({i,string&'"'})
      elsif s[i]='\\' then
        escaping=true
      else
        string&=s[i]
      end if
    end if
    i+=1
  end while
end function

---------------------------------------------------------------------------

--Lex a signed or unsigned integer literal.
--Returns {newi, text} with text is the string representation of an integer.
function lexer_read_number(string s, integer i, integer stop, integer posoff)
  integer state
  atom val
  integer sign,base
  string text
  integer textstart
  integer textend
  integer digits

  textstart=i
  textend=i
  --Process negative prefix, because it isn't treated as a unary operator
  if s[i]='-' then
    text="-"
    i+=1
    sign=-1
  else
    text=""
    sign=1
  end if
  --Skip whitespace
  while i<=stop and (s[i]=' ' or s[i]='\t') do
    i+=1
  end while
  --Check for base prefix 0x 0o or 0b. No separating space allowed!
  base=10
  if i+1<=stop and s[i]='0' then
    if s[i+1]='x' or s[i+1]='X' then
      base=16
      i+=2
    elsif s[i+1]='o' or s[i+1]='O' then
      base=8
      i+=2
    elsif s[i+1]='b' or s[i+1]='B' then
      base=2
      i+=2
    end if
  end if
  val=0
  digits=0
  while i<=stop do
    if s[i]<256 then
      state=lexer_table[s[i]+1]
    else
      state=LEX_CHAR
    end if
    --color_print("lexing number i=%d %d=%s state %d\n",{i,s[i],{s[i]},state})
    if state=LEX_DIGIT then  --0-9
      integer digit
      digit=s[i]-'0'
      if digit>=base then
        src_error(sprintf_utf("Found digit "&COLYEL&"%s"&COLRED&" which is not permitted in a base-%d number",{s[i],base}),encode_srcpos(posoff,i,1))
      end if
      text&=s[i]
      textend=i
      val=val*base+digit
      digits+=1
    elsif base=16 and state=LEX_HEXDIGIT then  --a-fA-F
      text&=s[i]
      textend=i
      val=val*16+(toLower(s[i])-'a'+10)
      digits+=1
    elsif state=LEX_SPACE then  --yes, allowed in numbers too
    elsif state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_OPERATOR or state=LEX_COMMENT or state=LEX_BINARY or state=LEX_NEWLINE then
      --backtrack one character
      --FIXME: bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead
      i-=1
      exit
    else  --currently, state=LEX_CHAR or state=LEX_HEXDIGIT or state=LEX_WARN or state=LEX_STRING
      if s[i]='.' then
        src_error("Non-whole (decimal) numbers are not yet implemented", encode_srcpos(posoff,textstart,i-textstart+1))
      end if
      if equal(text,"-") then
        exit  --Jumps down to the 'if digits=0' error case
      end if
      src_error(
        sprintf_utf("Found unexpected character "&COLYEL&"%s"&COLRED&" while reading a number, "&COLYEL&"%s"&COLRED&". Names such as variables can't start with a digit. Or maybe you forgot a comma or operator?"
                ,{s[i],text})
        ,encode_srcpos(posoff,i,1)
      )
    end if
    i+=1
  end while
  if digits=0 then
    --Encountered just '-' or '0x' or '-0x' (etc)
    if equal(text,"-") then
      src_error(COLYEL&"-"&COLRED&" can only be used to take the negation of a number, not other values/expressions (sorry!). Write "&COLYEL&"-1 * ..."&COLRED&" instead. Write "&COLYEL&"x -- y"&COLRED&" for subtraction.", encode_srcpos(posoff, textstart, 1))
    else
      src_error(sprintf_utf(COLYEL&"%s"&COLRED&" looks like the start of a number, but there are no digits!",{s[textstart..i-1]}), encode_srcpos(posoff, textstart, i-textstart+1))
    end if
  end if
  if base!=10 then
    --Special behaviour for non-decimals: silently overflow large 32 bit unsigned values to negatives
    if val>=#80000000 and val<#100000000 then
      val=val-#100000000
    end if
    --Convert to text
    text=sprintf("%d",{sign*val})
  end if
  if not int32(sign*val) then
    src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32-bit signed integer. Numbers must be between -2147483648 and 2147483647",{text}),encode_srcpos(posoff,textstart,textend-textstart+1))
  end if
  return({textend,text})
end function

---------------------------------------------------------------------------

--Clean the rest of a line for seek_include after 'include' occurs
--s is the whole line, i is end of the 'include', posoff is the file position at start of line
function lexer_read_include_line(TokenList tokens, string s, integer i, integer stop, integer posoff)
  integer at
  integer whitespace_len
  sequence temp
  string filename_string
  srcpos pos
  integer isquoted

  if not equal(tokens[1][CMD_TEXT],"include") then
    src_error(COLYEL&"include"&COLRED&" must be the only statement on the line. Maybe you tried to use this reserved keyword illegally?",tokens[$][CMD_POS])
  end if

  filename_string=""
  --Trim the 'include', comments, and whitespace,
  i+=1  --Skip the comma or whatever character ended the 'include'
  s=s[i..stop]
  if length(s) then
    --Need to increment i while stripping
    while s[1]=' ' or s[1]='\t' do
      s=s[2..length(s)]
      i+=1
    end while
    whitespace_len=0
    --Optionally allow quotes around the filename.
    --A quote mark in the middle of a filename doesn't need any escaping
    isquoted=(s[1]='"')
    if isquoted then
      temp=lexer_read_string(s,2,length(s),posoff+i-1)
      -- temp is {closing_quote_offset,quoted_string}
      pos=encode_srcpos(posoff,i,temp[1])

      s=s[temp[1]+1..$]
      --s is now whatever is after the string
      i+=temp[1]  --1 past the closing "
      filename_string=temp[2]  --enclosed in quotes
      whitespace_len=length(s)

      --Trim comment and whitespace to check for anything after the filename string
      at=find('#',s)
      if at then
        s=s[1..at-1]
        whitespace_len=at-1
      end if
      s=trim_whitespace(s)
      if length(s) then
        src_error("Found unexpected text after "&COLYEL&"include, \"filename\""&COLRED,encode_srcpos(posoff,i,whitespace_len))
      end if

    else
      --Trim comment and whitespace: for an unquoted filename this delimits the filename
      at=find('#',s)
      if at then
        s=s[1..at-1]
      end if
      s=trim_whitespace(s)  --Trims only from the right because we already striped left

      filename_string=s
      pos=encode_srcpos(posoff,i,length(s))
    end if

    tokens=append(tokens,{filename_string,pos})
  end if

  if length(tokens)=1 then
    src_error("Expected name of file to follow "&COLYEL&"include"&COLRED,tokens[1][CMD_POS])
  end if
  if length(tokens)>2 then
    src_error(sprintf_utf("Found unexpected syntax "&COLYEL&"%s"&COLRED&" instead of filename to include. Syntax of an include statement is "&COLYEL&"include, filename"&COLRED&" possibly with a path",{tokens[2][CMD_TEXT]}),tokens[2][CMD_POS])
  end if

  return(tokens)
end function

---------------------------------------------------------------------------

-- s is usually the text for the whole file, the interval i..stop is a single line,
-- and posoff is the srcpos offset for the beginning of the file
function lex_line(sequence s, integer i, integer stop, integer posoff)
  integer state
  TokenList tokens
  string ident  --identifier text being built up
  sequence temp
  string masked
  integer found
  integer textstart  --the offset at which this identifier starts, otherwise 0
  integer textend    --the offset at which the last non-whitespace character was read
  string remem_ident
  integer remem_textstart
  integer backtracked
  integer last_state   --A LEX_* constant; the last state that was not LEX_SPACE
  integer last_state_i --Value at i where last_state was set

  ident=""
  last_state=LEX_NEWLINE
  last_state_i=0
  textstart=0
  tokens={}

  --pretty_print(stdout, {"lex_line posoff: ", posoff, "i:", i, "srcpos:", decode_srcpos(encode_srcpos(posoff,i,0))}, {2})

  while i<=stop do
    if s[i]<256 then
      state=lexer_table[s[i]+1]
    else
      state=LEX_CHAR  --other unicode
    end if
    --color_print("lexing i=%d %d=%s state %d ident=%s\n",{i,s[i],{s[i]},state,ident})
    if state=LEX_CHAR or state=LEX_HEXDIGIT then
      ident=ident & toLower(s[i])
      if textstart=0 then
        textstart=i
      end if
      textend=i
    elsif state=LEX_SPACE then
    elsif state=LEX_DIGIT then
      if length(ident) then
        ident=ident & s[i]
        textend=i
      else
        --Start of a number not starting with - (those are parsed below)
        temp=lexer_read_number(s,i,stop,posoff)  --{offset of end of number, number string}
        tokens=append(tokens,{temp[2],encode_srcpos(posoff,i,temp[1]-i+1)})
        i=temp[1]
      end if
    elsif state=LEX_WARN then
      ident=ident & toLower(s[i])
      if textstart=0 then
        textstart=i
      end if
      textend=i
      src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is reserved and not allowed in names (identifiers)!", {s[i]}), encode_srcpos(posoff,i,1))
    else
      --We've reached the end of a token... unless it's only the first character
      --of a two-character operator.
      remem_ident=ident  --for backtracking
      remem_textstart=textstart
      backtracked=false
      if length(ident) then
        tokens=append(tokens,{ident,encode_srcpos(posoff,textstart,textend+1-textstart)})
      end if
      ident=""
      textstart=0
      if state=LEX_BEGIN then  --(
        tokens=append(tokens,{"begin",encode_srcpos(posoff,i,1)})
      elsif state=LEX_END then  --)
        if last_state=LEX_COMMA then
          --foo(1,). This probably doesn't particularly need to be disallowed
          --Could also check for <op>) but that will be caught later
        end if
        tokens=append(tokens,{"end",encode_srcpos(posoff,i,1)})
      elsif state=LEX_COMMA then
        --Check we didn't just finish an identifier, to allow using : (which is LEX_OPERATOR) on the end of an identifier.
        if length(remem_ident)=0 and (last_state=LEX_COMMA or last_state=LEX_BEGIN or last_state=LEX_OPERATOR or last_state=LEX_OPERATOR2) then
          --foo(, 1) or foo(1, , 2) or <op>, but not ),
          src_error(
            sprintf_utf("Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",{}),
            encode_srcpos(posoff,i,1))
        end if
      elsif state=LEX_NEWLINE then
      elsif state=LEX_COMMENT then
        exit
      elsif state=LEX_STRING then
        temp=lexer_read_string(s,i+1,stop,posoff)  --{final offset, string}
        tokens=append(tokens,{temp[2],encode_srcpos(posoff,i,temp[1]-i+1)})
        i=temp[1]
      elsif state=LEX_OPERATOR then
        --A character which the start of some auto-separating sequence such as +=.
        --Read the next non-space character. If it's LEX_OPERATOR2 we greedily
        --add it, otherwise we do nothing (we only support length 1 and 2
        --separators).  However, we might have have been too greedy (not a value
        --two-char separator), and need to put it back.
        textstart=i
        masked=s[i..i]
        integer lookahead_state
        --This while loop is in order to allow whitespace in the middle of separators.
        while i<stop do
          i+=1
          if s[i]<256 then
            lookahead_state=lexer_table_key2[s[i]+1]
          else
            lookahead_state=LEX_CHAR
          end if
          if lookahead_state=LEX_OPERATOR2 then
            --Could be a second character. Stop
            masked&=s[i]
            exit
          elsif lookahead_state=LEX_CHAR then
            --includes binary gunk, warning characters and comments
            i-=1
            exit
          end if
          --otherwise, it's a space character
        end while
        --now i+1 is the start of another token, or is > stop
        --Lookup for separators which are prefixes of masked
        found=false
        for j=1 to length(separator_list) do
          if match(separator_list[j],masked)=1 then
            --very ugly
            if length(separator_list[j])=1 then
              --Backtrack if necessary to first character in masked, otherwise remain at second
              i=textstart
            end if
            tokens=append(tokens,{separator_list[j],encode_srcpos(posoff,textstart,i-textstart+1)})
            found=true
            exit
          end if
        end for
        if found=false then
          --need to backtrack, in particular for -
          --color_print("backtracking on '%s'\n", {masked})
          backtracked=true
          ident=remem_ident & masked[1]
          i=textstart
          if length(remem_ident) then
            --This character appeared inside a normal token, resume reading that token
            tokens=tokens[1..$-1]
            textstart=remem_textstart
          elsif equal(ident,"-") then
            --'-' appears at the start of a token: this is only allowed to be a number.
            --We haven't appended anything to tokens that needs removing.
            ident=""
            temp=lexer_read_number(s,i,stop,posoff)
            tokens=append(tokens,{temp[2],encode_srcpos(posoff,i,temp[1]-i+1)})
            i=temp[1]
            textstart=0
            state=LEX_DIGIT  --Sensible last_state to avoid 'extra comma' error
          else
            --a keyword character followed by garbage
            src_error(
              "Expected a two-character operator (like "&COLYEL&":="&COLRED&") but the second character (e.g. "&COLYEL&"="&COLRED&") is missing",
              encode_srcpos(posoff,i,1))
          end if
        else
          textstart=0
          if last_state=LEX_COMMA then
            -- ,<op>
            --(Could check for (<op> but that will be detected later anyway.)
            --Allowed if it's a unary operator. There's only one, since - is not an operator.
            if s[i]!='$' then
              src_error(
                "Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",
                encode_srcpos(posoff,last_state_i,1))
            end if
          end if
        end if
      elsif state=LEX_BINARY then
        lexer_binary_error(encode_srcpos(posoff,i,1))
      end if
      if backtracked=false then
        if equal(remem_ident,"include") then
          --include is a special case, as normal lexing rules don't apply for rest of line
          if length(ident) then
            simple_error("compiler bug! [before lexer_read_include_line]\n")
          end if
          return lexer_read_include_line(tokens,s,i,stop,posoff)
        end if
      end if
      --End of a complete token
    end if
    if state!=LEX_SPACE then
      last_state_i=i
      last_state=state
    end if
    i+=1
  end while
  if length(ident) then
    --ought to check whether ident is 'include'...
    tokens=append(tokens,{ident,encode_srcpos(posoff,textstart,textend+1-textstart)})
  end if
  return(tokens)
end function

---------------------------------------------------------------------------

--this also checks for rogue strings and $'s
function translate_plotstrings(TokenList tokens)
  integer state
  integer start_token
  integer i
  Token string_func
  Token string_token
  srcpos pos,vpos
  state=0
  i=1
  while i<=length(tokens) do
    if compare(tokens[i][CMD_TEXT],"$")=0 then
      pos=tokens[i][CMD_POS]
      vpos=virtual_pos(pos)
      if state=0 then
        state=1
        start_token=i
      else
        --jump to error throw
        state=1
        exit
      end if
    end if
    if state=2 then
      if tokens[i][CMD_TEXT][1]='"' then
        string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1])
        string_token={sprintf("@$string%d",{length(string_list)}),tokens[i][CMD_POS]}
        tokens=tokens[1..start_token-1]&{string_func,{"begin",vpos}}&tokens[start_token+1..i-2]&{string_token,{"end",vpos}}&tokens[i+1..$]
        state=0  --may have multiple strings on one line
      else  --we saw a + (or illegal =) inside the string number expression
        state=1
      end if
    end if
    if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then
      string_func={"setstringfromtable",pos}
      state=2
    elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then
      string_func={"appendstringfromtable",pos}
      state=2
    end if
    i+=1
  end while

  if state!=0 then
    --we've seen a surplus $, not right
    src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos)
  end if
  return(tokens)
end function

---------------------------------------------------------------------------

--Given a command tree, get the original text from the file
--OK, this function is horribly complicated just so tracevalue can print
--"hero X (me)" instead of "herox(me)". It could be implemented much simpler if
--macros were expanded in the lexer, but then build_ast and
--convert_macros would have to be as well
function tree_original_text(Node tree)
  TokenList script_toks
  Node subtree
  srcpos leftmost, rightmost
  integer left_at, right_at
  integer filenum
  integer left_point, right_point
  integer depth
  TokenList tokens
  string text

  script_toks=all_scripts[cur_script_num][SCRIPT_BODY_TOKS]

  --Find leftmost and rightmost tokens making up the expression, not including brackets
  --(Do we care about virtual flags?)
  --NOTE: we're making an assumption about srcpos encoding here, that
  --if token A begins before token B, then srcpos A < srcpos B

  leftmost=tree[TREE_TRUNK][CMD_POS]
  subtree=tree
  while length(subtree[TREE_BRANCHES]) do
    subtree=subtree[TREE_BRANCHES][1]
    if subtree[TREE_TRUNK][CMD_POS]<leftmost then
      leftmost=subtree[TREE_TRUNK][CMD_POS]
    end if
  end while

  rightmost=tree[TREE_TRUNK][CMD_POS]
  subtree=tree
  while length(subtree[TREE_BRANCHES]) do
    subtree=subtree[TREE_BRANCHES][$]
    rightmost=subtree[TREE_TRUNK][CMD_POS]
  end while

  --grab those tokens from the script's token list
  left_at=find_in_column(leftmost,script_toks,CMD_POS)
  right_at=find_in_column(rightmost,script_toks,CMD_POS)
  tokens=script_toks[left_at..right_at]
  --pretty_print(stdout,{"tree_original_text tokens: ",tokens},{2})

  --keep grabbing extra tokens until brackets are balanced
  --note: this will read 'foo(bar())' but will read 'foo()' as 'foo'
  depth=count("begin",column(tokens,CMD_TEXT))-count("end",column(tokens,CMD_TEXT))
  if depth<0 then
    src_error("Compiler bug: negative depth in tree_original_text",tokens[1][CMD_POS])
  end if
  while depth do
    right_at+=1
    if equal(script_toks[right_at][CMD_TEXT],"begin") then
      depth+=1
    else  --only other possibility is "end"
      depth-=1
    end if
  end while

  --grab original text corresponding to this range of tokens
  filenum=srcpos_file_number(leftmost)
  left_point=srcpos_file_offset(leftmost)

  --this +1 is safe, guaranteed to be followed by either another argument or "end"
  right_point=srcpos_file_offset(script_toks[right_at+1][CMD_POS])
  text=file_list[filenum][FILE_TEXT][left_point..right_point-1]
  --pretty_print(stdout, {"tree_original_text left/right_point:", left_point, right_point, "text:", text, decode_srcpos(leftmost)},{2})

  --NOTE: should be able to make use of srcpos lengths instead. (Again, how to handle virtuals?)

  --clean up the grabbed text
  text=trim_whitespace(substitute(text,'\n',','))
  while find(text[$],", \t") do
    text=text[1..$-1]
  end while

  return(text)
end function

---------------------------------------------------------------------------

function seek_include(TokenList tokens)
  string ret
  if length(tokens) and equal(tokens[1][CMD_TEXT],"include") then
    if length(tokens)=1 or length(tokens[2][CMD_TEXT])=0 then
      src_error(COLYEL&"include"&COLRED&" is a keyword which should be followed on the same line by the filename to include",tokens[1][CMD_POS])
    end if
    ret=tokens[2][CMD_TEXT]
    if ret[1]='"' then
      --trim quote marks
      ret=ret[2..$-1]
    end if
    return(ret)
    -- --Alternatively use original text (escape sequences won't work)... so might be useful on Wdinwos
    -- srcdata=decode_srcpos(s[at+1][CMD_POS])
    -- return(srcdata[POS_LINETEXT][srcdata[POS_COLUMN]+1..srcdata[POS_COLUMN]+srcdata[POS_LENGTH]])
  end if
  return("")
end function

---------------------------------------------------------------------------

procedure lex_file(integer file_num, sequence include_stack)
  TokenList broken
  string include_name
  sequence lines
  integer posoff
  -- object temp
  posoff=file_list[file_num][FILE_OFFSET]

  lines=file_list[file_num][FILE_LINE_EXTENTS]
  for i=1 to length(lines) do
    broken=lex_line(file_list[file_num][FILE_TEXT],lines[i][PAIR_START],lines[i][PAIR_END],posoff)

    -- for j=1 to length(broken) do
    --   temp = decode_srcpos(broken[j][CMD_POS])
    --   pretty_print(stdout,{broken[j][CMD_TEXT], temp},{2})
    -- end for

    include_name=seek_include(broken)
    if length(include_name) then
      load_and_lex_source(find_include_file(include_name),"including",broken[2][CMD_POS],include_stack)
    else
      --this is probably not the best place to do this translation, but for now we simply emulate old string translation
      --this also checks for stray strings and $'s
      cmd&=translate_plotstrings(broken)
    end if
  end for
end procedure

---------------------------------------------------------------------------

procedure load_and_lex_all()
  enter_timing_zone("Loading & lexing files")
  if find('b',optlist)=0 then
    load_and_lex_source(find_include_file("plotscr.hsd"),"reading",0,{})
  end if
  --Load any additional includes first, because it is almost certainly
  --an exported .hsi of constants. Due to the two-pass parsing
  --the order of includes matters for almost nothing aside from defineconstant,
  --eg including the .hsi first allows "defineconstant(song:foobar, victory song)"
  for i=1 to length(additional_includes) do
    load_and_lex_source(additional_includes[i],"reading",0,{})
  end for
  --Load the main file
  load_and_lex_source(source_file,"reading",0,{})
  exit_timing_zone()
end procedure

---------------------------------------------------------------------------

--Warning: you normally want to use check_undefined_string instead of this
--This does not check whether s is defined as a constant, subscript, or local variable,
--and may miss scripts too.
procedure check_for_reserved(string s, srcpos pos, string expect)
  string extra
  Symbol sym
  sequence src_position
  if map:has(global_scope,s) then
    extra=""
    sym=map:get(global_scope,s)
    if sym[SYM_POS] then
      src_position=decode_srcpos(sym[SYM_POS])
      extra&=sprintf_utf(
          " (which was declared on line %d in "&COLPNK&"%s"&COLRED&")"
          ,{src_position[POS_LINE],src_position[POS_FILENAME]}
        )
    end if
    if equal("top-level declaration",expect) then
      extra&=". Perhaps there is an extra "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" earlier in the file"
    end if
    src_error(
      sprintf_utf(
        "Expected %s, but found %s "&COLYEL&"%s"&COLRED&"%s"
        ,{expect, KIND_LONGNAMES[sym[SYM_KIND]], s, extra}
      )
      ,pos
    )
  end if
end procedure

---------------------------------------------------------------------------

procedure mustnt_be_a_number(Token s)
  --note that identifiers are allowed to start with -
  if length(exclude(s[CMD_TEXT],"-0123456789"))=0 and count('-',s[CMD_TEXT])!=length(s[CMD_TEXT]) then
    --Work backwards to figured out if constant substitution has occurred on s[CMD_TEXT]
    sequence src_position
    sequence const_src_position
    string original_text
    string msg
    src_position=decode_srcpos(s[CMD_POS])
    original_text=src_position[POS_TOKTEXT]

    object const_data
    const_data=map:get(constant_list,original_text,{})
    if length(const_data) then
      const_src_position=decode_srcpos(const_data[CONST_POS])
      msg=sprintf_utf("Expected a name, but found "&COLYEL&"%s"&COLRED&" which is defined as a constant on line %d of "&COLPNK&"%s"&COLRED&" with the value "&COLYEL&"%d"&COLRED,
                      {
                        original_text,
                        const_src_position[POS_LINE],
                        const_src_position[POS_FILENAME],
                        const_data[CONST_VALUE]
                      })
    else
      msg="Expected a name, but found a number "&COLYEL &s[CMD_TEXT] &COLRED
    end if

    src_error(msg, s[CMD_POS])
  end if
end procedure

---------------------------------------------------------------------------

procedure check_undefined_constant(Token s, atom value)
   sequence const_data
   sequence src_position
   string err_string
   string whats_happening
   if map:has(constant_list,s[CMD_TEXT]) then
     --constant is already defined
     const_data=map:get(constant_list,s[CMD_TEXT])
     src_position=decode_srcpos(const_data[CONST_POS])
     if const_data[CONST_VALUE]=value then
       whats_happening="will be ignored because"
     else
       whats_happening="is being redefined;"
     end if
     err_string=sprintf_utf("Constant "&COLYEL&"%s"&COLRED&" %s it is already defined in line %d of "&COLPNK&"%s"&COLRED&" with the value "&COLYEL&"%d"&COLRED
                   ,{
                      s[CMD_TEXT]
                     ,whats_happening
                     ,src_position[POS_LINE]
                     ,src_position[POS_FILENAME]
                     ,const_data[CONST_VALUE]
                   }
                )
     if const_data[CONST_VALUE]=value then
       src_warn(err_string,s[CMD_POS])
     else
       src_error(err_string,s[CMD_POS])
     end if
     --if it's in constant_list, then it's also in global_scope.
   else
     check_for_reserved(s[CMD_TEXT],s[CMD_POS],"constant name")
   end if
   mustnt_be_a_number(s)
end procedure

---------------------------------------------------------------------------

--Check whether an identifier is unused and available. If cur_script_num is set, also
--checks local variables and subscripts in the current scope, but not nonlocal vars.
procedure check_undefined_string(Token s, string expect)
  sequence src_position
  integer at

  --Check global_scope
  check_for_reserved(s[CMD_TEXT],s[CMD_POS],expect)
  --Check constant_list and for number literals
  mustnt_be_a_number(s)

  --Search for an in-scope subscript of that name (subscripts aren't shadowed)
  --Also, when check_for_reserved is called from inside parse_script, script names haven't been added to global_scope yet
  --so non-subscripts are missed by check_for_reserved.
  at=lookup_scoped_script_name(s[CMD_TEXT])
  if at then
    src_position=decode_srcpos(all_scripts[at][SCRIPT_POS])
    src_error(
      sprintf_utf(
        COLYEL&"%s"&COLRED&" can't be used as a %s because it's already declared as a %s on line %d of "&COLPNK&"%s"&COLRED,
        {s[CMD_TEXT], expect, all_scripts[at][SCRIPT_TRIGGER_TOK][CMD_TEXT],
         src_position[POS_LINE], src_position[POS_FILENAME]}
      )
      ,s[CMD_POS]
    )
  end if

  if cur_script_num!=0 then
    --Search for a local with that name. Ignore non-locals because they are allowed to be shadowed.
    VarList local_vars
    --Might be in the middle of building this script and not have SCRIPT_VARIABLES yet
    if length(all_scripts[cur_script_num]) >= SCRIPT_VARIABLES then
      local_vars=all_scripts[cur_script_num][SCRIPT_VARIABLES]
      for idx=1 to length(local_vars) do
        if equal(local_vars[idx][CMD_TEXT],s[CMD_TEXT]) then
          if local_vars[idx][VAR_FRAME]=0 then  --is local
            src_position=decode_srcpos(local_vars[idx][CMD_POS])
            src_error(
              sprintf_utf(
                COLYEL&"%s"&COLRED&" can't be used as a %s because it's already defined as a local variable/argument on line %d of "&COLPNK&"%s"&COLRED,
                {s[CMD_TEXT],expect,src_position[POS_LINE],src_position[POS_FILENAME]}
              )
              ,s[CMD_POS]
            )
          end if
        end if
      end for
    end if
  end if
end procedure

---------------------------------------------------------------------------

function force_16_bit(atom n, srcpos pos)
  if n>32767 then
    src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos)
    n=32767
  elsif n<-32768 then
    src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos)
    n=-32768
  end if
  return(n)
end function

---------------------------------------------------------------------------

function try_string_to_number(Token s)
  atom result
  result=floor(string_to_object(s[CMD_TEXT],0))
  if not string_is_int32(s[CMD_TEXT]) then
    src_error(sprintf_utf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS])
  end if
  return(result)
end function

---------------------------------------------------------------------------

function enforce_constants(string s)
  --enforces both constants
  object v
  v=map:get(constant_list,s,{})
  if length(v) then
    v=v[CONST_VALUE]
    if int32(v) then
      return(sprintf("%d",{v}))
    end if
  end if
  return(s)
end function

---------------------------------------------------------------------------

function get_cmd()
  Token result
  if get_cmd_pointer>length(cmd) then
    src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
  end if
  result=cmd[get_cmd_pointer]
  get_cmd_pointer+=1
  result[CMD_TEXT]=enforce_constants(result[CMD_TEXT])
  return(result)
end function

---------------------------------------------------------------------------

function get_cmd_no_constants()
  Token result
  if get_cmd_pointer>length(cmd) then
    src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
  end if
  result=cmd[get_cmd_pointer]
  get_cmd_pointer+=1
  result[CMD_TEXT]=result[CMD_TEXT]
  return(result)
end function

---------------------------------------------------------------------------

--Returns a list of tokens enclosed in begin,end, with no nested begin,end allowed.
function get_cmd_block(integer convert_constants)
  Token this
  TokenList result
  result={}
  this=get_cmd()
  if compare("begin",this[CMD_TEXT])!=0 then
    src_error(sprintf_utf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS])
  end if
  while true do
    if convert_constants then
      this=get_cmd()
    else
      this=get_cmd_no_constants()
    end if
    if compare("begin",this[CMD_TEXT])=0 then
      src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS])
    elsif compare("end",this[CMD_TEXT])=0 then
      exit--break out of the while
    else
      result=append(result,this)
    end if
  end while
  return(result)
end function

---------------------------------------------------------------------------

--Get a block containing balanced begin,end pairs.
--Returns {ptr,TokenList}
--Initial ptr is 1 if we already grabbed the opening 'begin'.
function get_cmd_depth(integer ptr, TokenList data, integer depth)
  TokenList result
  Token this
  result={}
  while true do
    --if get_key()=27 then abort(1/0) end if
    if ptr>length(data) then
      src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS])
    end if
    this=data[ptr]
    ptr+=1
    if compare("end",this[CMD_TEXT])=0 then
      depth-=1
    elsif compare("begin",this[CMD_TEXT])=0 then
      depth+=1
    end if
    if depth=0 then
      exit --break out of the while
    else
      result=append(result,this)
    end if
  end while
  return({ptr,result})
end function

---------------------------------------------------------------------------

--Like get_cmd_block, but allowed to contain balanced begin,end pairs.
function get_cmd_block_nestable(integer convert_constants)
  sequence ptr_and_block
  TokenList block
  ptr_and_block=get_cmd_depth(get_cmd_pointer,cmd,0)
  get_cmd_pointer=ptr_and_block[1]
  block=ptr_and_block[2][2..$]  --trim leading begin
  if convert_constants then
    for i=1 to length(block) do
      block[i][CMD_TEXT]=enforce_constants(block[i][CMD_TEXT])
    end for
  end if
  return block
end function

---------------------------------------------------------------------------

procedure parse_constant_block(TokenList block)
  atom num
  string name
  srcpos pos
  for i=1 to length(block) by 2 do
    if i+1>length(block) then
      src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS])
    end if
    num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
    check_undefined_constant(block[i+1],num)
    name=block[i+1][CMD_TEXT]
    pos=block[i+1][CMD_POS]
    map:put(constant_list,name,{num,pos})
    map:put(global_scope,name,{KIND_CONSTANT,num,pos})
  end for
end procedure

---------------------------------------------------------------------------

procedure parse_trigger_block(TokenList block)
  integer num
  string name
  srcpos pos
  for i=1 to length(block) by 2 do
    if i+1>length(block) then
      src_error("Expected name but script trigger definition block ended",block[i][CMD_POS])
    end if
    num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
    check_undefined_string(block[i+1],"script trigger name")
    name=block[i+1][CMD_TEXT]
    pos=block[i+1][CMD_POS]
    trigger_list=append(trigger_list,{num,name})
    map:put(global_scope,name,{KIND_TOPLEVEL,0,pos})
  end for
end procedure

---------------------------------------------------------------------------

procedure parse_version_block(TokenList block)
  string req_version
  if length(block)<2 then
    --Don't have a srcpos
    simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (empty 'plotscr version' block)")
  end if
  plotscr_version=block[1][CMD_TEXT] --version ID of this plotscr.hsd version (equal to the version of HSpeak at the time)
  req_version=block[2][CMD_TEXT] --the minimum version of HSpeak which supports this plotscr.hsd
  --trim quote marks
  if plotscr_version[1]!='"' or req_version[1]!='"' then
    simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (expected strings in 'plotscr version')")
  end if
  plotscr_version=plotscr_version[2..$-1]
  req_version=req_version[2..$-1]
  if compare(plotscr_version, MIN_PLOTSCR_VERSION) < 0 then
    simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.")
  end if
  --printf(stdout,"'%s' '%s'\n", {req_version, COMPILER_VERSION&COMPILER_SUB_VERSION})
  if compare(req_version, COMPILER_VERSION&COMPILER_SUB_VERSION) > 0 then
    simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from a newer version of the OHRRPGCE than supported by this version of HSpeak. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.")
  end if
end procedure

---------------------------------------------------------------------------

procedure create_global(atom id, string name, srcpos pos)
  integer at
  at=find(id,global_list[PAIR_NUM])
  if at then
    src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos)
  else
    if id>=0 and id<=MAXGLOBAL then
      global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id)
      global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name)
      global_list[GLB_POS]=append(global_list[GLB_POS],pos)
      map:put(global_scope,name,{KIND_GLOBAL,id,pos})
    else
      src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to %d",{id,MAXGLOBAL}),pos)
    end if
  end if
end procedure

---------------------------------------------------------------------------

procedure parse_global_block(TokenList block)
  atom num
  for i=1 to length(block) by 2 do
    if i+1>length(block) then
      src_error("expected name but globalvariable block ended",block[i][CMD_POS])
    end if
    num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
    check_undefined_string(block[i+1],"global variable name")
    create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS])
  end for
end procedure

---------------------------------------------------------------------------

procedure parse_operator_block(TokenList block)
  atom num
  Symbol sym
  Kind kind
  string name,truename
  srcpos pos
  for i=1 to length(block) by 3 do
    if i+2>length(block) then
      src_error("expected name but defineoperator block ended",block[i][CMD_POS])
    end if
    num=try_string_to_number(block[i])
    mustnt_be_a_number(block[i+1])
    mustnt_be_a_number(block[i+2])
    name=block[i+1][CMD_TEXT]
    truename=block[i+2][CMD_TEXT]
    pos=block[i+2][CMD_POS]
    operator_list=append(operator_list,{num,name,truename,pos})
    sym=map:get(global_scope,name,nil_sym)
    kind=sym[SYM_KIND]
    if kind then
      if kind!=KIND_SEPARATOR and kind!=KIND_MATH then
        src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" may not be used as an operator",{KIND_LONGNAMES[kind],name}),pos)
      else
        --Existing names do not get added to global_scope, because we need to be able to use
        --global_scope to map names like 'mod' to the math function rather than the operator.
        --Instead, operator_list is consulted directly to lookup operators.
      end if
    else
      --Add to global_scope to prevent it from being reused for anything else.
      map:put(global_scope,name,{KIND_OPERATOR,length(operator_list),pos})
    end if
  end for
end procedure

---------------------------------------------------------------------------

--Decide which ID number to assign to an autonumbered script.
--name isn't fully-scoped.
function pick_script_id(integer parent_id, string name)
  integer at, id
  sequence encoded

  if length(reuse_ids) then
    encoded=encode_ohr(name)
    if length(encoded)>36 then
      encoded=encoded[1..36]
    end if
    at=find_in_column({parent_id,encoded},reuse_ids,REUSE_PARENT_AND_NAME)
    if at then
      return reuse_ids[at][PAIR_NUM]
    end if
  end if

  loop do
    autonumber_id-=1
    id=autonumber_id
    until find_in_column(id,reuse_ids,PAIR_NUM)=0
  end loop
  return id
end function

---------------------------------------------------------------------------

--Compute bits to add to FUNC_FLAGS from a FUNC_ATTRS sequence
function compute_attribute_bits(sequence attributes)
  integer at
  integer attr_bits
  attr_bits=0
  for i=1 to length(attributes) do
    at=find_in_column(attributes[i][CMD_TEXT],attribute_list,ATTR_NAME)
    if at=0 then
      simple_error("Compiler bug! compute_attribute_bits can't handle "&attributes[i][CMD_TEXT])
    end if
    attr_bits=or_bits(attr_bits,attribute_list[at][ATTR_FLAGS])
  end for
  return attr_bits
end function

---------------------------------------------------------------------------

--arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to
--VAR_ARGS indicating a variable (unlimited) number of arguments.
--id may only be -1 (autonumbered) when there is no parent script (so that the unscoped
--and fully-scoped name are the same), otherwise pick_script_id must be called explicitly!
function create_function(sequence list, integer id, string name, object arglist, integer func_type, srcpos pos, sequence attributes)
  integer at, attr_bits
  if func_type=KIND_SCRIPT then
    at=find(id,column(list,PAIR_NUM))
    if at then
      src_error(sprintf_utf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{KIND_LONGNAMES[func_type],id,list[at][PAIR_NAME]}),pos)
    end if
    if id=0 or id<-1 then
      src_error(sprintf_utf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos)
    elsif id=-1 then
      --autonumber
      id=pick_script_id(0,name)
    end if
  end if
  attr_bits=compute_attribute_bits(attributes)
  list=append(list,{id,name,arglist,attributes,attr_bits,pos})
  map:put(global_scope,name,{func_type,id,pos})
  return(list)
end function

---------------------------------------------------------------------------

--Read zero or more attributes (token prefixed with @ to set them apart, optionally with parameters)
--from current position 'ptr' in 'block'.
--We assume that this is never called on a token that might be a reference instead of an attribute
--(eg by first checking for KIND_ATTRIBUTE)
function parse_attributes(TokenList block, integer ptr)
  sequence attributes
  sequence attr_args, ptr_and_block
  string attr_name
  integer at
  attributes={}
  while ptr<=length(block) and block[ptr][CMD_TEXT][1]='@' do
    attr_name=block[ptr][CMD_TEXT]
    at=find_in_column(attr_name,attribute_list,ATTR_NAME)
    if at=0 then
      src_error("Unrecognised function attribute "&COLYEL&attr_name&COLRED, block[ptr][CMD_POS])
    end if

    if ptr+1<length(block) and equal(block[ptr+1][CMD_TEXT],"begin") then
      --FIXME: should probably enforce constants in the params, because when called from parse_top_level,
      --that hasn't been done yet.
      ptr_and_block=get_cmd_depth(ptr+2,block,1)
      ptr=ptr_and_block[1]
      attr_args=ptr_and_block[2]
    else
      ptr+=1
      attr_args={}
    end if
    attributes&={{attr_name,attr_args}}
  end while
  return {attributes,ptr}
end function

---------------------------------------------------------------------------

function parse_define_block(TokenList block, sequence list, integer func_type)
  atom num
  string name
  atom args
  object arglist  --sequence of strings, or VAR_ARGS
  sequence attributes
  sequence attrs_and_ptr
  srcpos name_pos
  integer i
  i=1
  while i<=length(block) do

    --Parse any attributes
    attrs_and_ptr=parse_attributes(block,i)
    attributes=attrs_and_ptr[1]
    i=attrs_and_ptr[2]

    if length(attributes)>0 and i>length(block) then
      src_error(sprintf_utf("expected %s id to follow, but define block ended",{KIND_LONGNAMES[func_type]}),block[i-1][CMD_POS])
    end if

    --ID
    num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
    if i+1>length(block) then
      src_error(sprintf_utf("expected %s name but define block ended",{KIND_LONGNAMES[func_type]}),block[i][CMD_POS])
    end if

    --Name
    i+=1
    check_undefined_string(block[i],KIND_LONGNAMES[func_type]&" name")
    name=block[i][CMD_TEXT]
    name_pos=block[i][CMD_POS]
    if i+1>length(block) then
      src_error("expected argument count but define block ended",block[i][CMD_POS])
    end if

    --Get arglist
    i+=1
    args=try_string_to_number(block[i])
    arglist={}
    if args<0 then
      if func_type=KIND_SCRIPT then
        src_error("Number of arguments for a script can't be negative (variable number of arguments aren't supported)",block[i][CMD_POS])
      end if
      arglist=VAR_ARGS
    else
      for j=1 to args do
        if i+1>length(block) then
          src_error("expected argument default but define block ended",block[i][CMD_POS])
        end if
        i+=1
        arglist=append(arglist,try_string_to_number(block[i]))
      end for
      if args>MAX_ARGS then
        src_error(sprintf_utf("Number of arguments (%d) is more than allowed (%d)",{args,MAX_ARGS}),block[i][CMD_POS])
      end if
    end if
    i+=1

    list=create_function(list,num,name,arglist,func_type,name_pos,attributes)
  end while
  return(list)
end function

---------------------------------------------------------------------------

procedure preliminary_parse_pass()
  Token this
  enter_timing_zone("Preliminary pass")
  color_print("preliminary pass\n",{})
  get_cmd_pointer=1
  while get_cmd_pointer<=length(cmd) do
    --read a top-level command
    this=get_cmd()
    if equal("defineconstant",this[CMD_TEXT]) then
      parse_constant_block(get_cmd_block(false))
    elsif equal("definetrigger",this[CMD_TEXT]) then
      parse_trigger_block(get_cmd_block(false))
    elsif equal("plotscrversion",this[CMD_TEXT]) then
      parse_version_block(get_cmd_block(false))
    end if
  end while
  --Check for very old plotscr.hsd
  if equal(plotscr_version,"") then
    if find("plotscr.hsd",column(file_list,FILE_NAME)) then
      simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE..")
    end if
    --If plotscr.hsd is not included then we're compiling with -b, so don't complain
  end if
  exit_timing_zone()
end procedure

---------------------------------------------------------------------------

--Parse a script or subscript, finding the list of tokens (NOT yet transformed into a tree),
--and appending an entry to all_scripts.
--parent_name is a fully-scoped name of parent script, or ""
--if this script isn't a subscript of another.
--Returns Token with name of parsed script
function parse_script(Token trigger, string parent_name, integer parent_idx, sequence attributes)
  Token name
  string full_name
  TokenList arglist
  TokenList body
  Token this
  string err_string
  string previous_script
  sequence src_position
  string msg
  integer depth
  integer nest_depth
  Symbol sym
  integer temp
  integer idx
  sequence subscript_attributes
  sequence attrs_and_ptr

  name=get_cmd()
  if parent_idx=0 then
    full_name=name[CMD_TEXT]
    nest_depth=0
  else
    full_name=sprintf_utf("%s<-%s",{parent_name,name[CMD_TEXT]})
    nest_depth=all_scripts[parent_idx][SCRIPT_NEST_DEPTH]+1
    if nest_depth>MAX_NEST_DEPTH then
      src_error(
        sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is nested too deeply (%d layers deep), which is more than is supported. Reorganise your scripts to not use so much nesting."
          ,{trigger[CMD_TEXT],name[CMD_TEXT],nest_depth})
        ,name[CMD_POS]
      )
    end if
  end if

  --Check name not already in use... unless it appears in a definescript block; if it did the name is already reserved.
  --Note that we won't add it to script_list or global_scope yet; that happens later in check_script_declarations
  if not find_in_column(full_name,script_list,PAIR_NAME) then
    cur_script_num=parent_idx --temporarily to check for subscripts
    check_undefined_string(name, COLYEL &trigger[CMD_TEXT] &COLRED &" name")
    cur_script_num=0 --This probably isn't needed
  end if

  previous_script=current_script
  current_script=full_name --name[CMD_TEXT]

  --Parse arguments
  arglist={}
  while true do
    if get_cmd_pointer>length(cmd) then
      src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{trigger[CMD_TEXT],name[CMD_TEXT]}),name[CMD_POS])
    end if
    this=get_cmd()
    if compare("begin",this[CMD_TEXT])=0 then
      exit--break the while
    end if
    --Note that arglist isn't the actual list of arguments, it's just a string of tokens right now (including defaults).
    --The actual arglist is built in process_arglist, while compiling each script.
    arglist=append(arglist,this)
  end while

  --Create all_scripts entry now for recursive calls; SCRIPT_BODY_TOKS and SCRIPT_END_POS filled in later,
  --and more fields are appended in compile_a_script
  all_scripts=append(all_scripts,{
           trigger
          ,name[CMD_TEXT]
          ,name[CMD_POS]
          ,full_name
          ,arglist
          ,{}  --SCRIPT_BODY_TOKS
          ,parent_idx
          ,nest_depth
          ,attributes
          ,0  --SCRIPT_END_POS
  })
  idx=length(all_scripts)

  --Read the body
  --Every script is nested inside a big fat do() block
  body={{"do",virtual_pos(name[CMD_POS])}}
  depth=0
  subscript_attributes={}
  while true do
    --If we just read some attribute statements, they must precede a subscript
    if length(subscript_attributes)>0 and equal("subscript",this[CMD_TEXT])=0 then
      src_error(
        sprintf_utf(
          "Expected a subscript to follow function attribute "&COLYEL&"%s"&COLRED&", but found "&COLYEL&"%s"&COLRED
          ,{subscript_attributes[$][CMD_TEXT],this[CMD_TEXT]}
        )
        , this[CMD_POS])
    end if

    body=append(body,this)
    if compare("end",this[CMD_TEXT])=0 then
      depth-=1
      if depth=0 then
        all_scripts[idx][SCRIPT_END_POS]=this[CMD_POS]
        exit--break while
      end if
    elsif compare("begin",this[CMD_TEXT])=0 then
      depth+=1
    elsif compare("subscript",this[CMD_TEXT])=0 then
      --Recurse, so body of subscript doesn't appear in this script.
      --The subscript node stays, for syntax checking, but is removed in normalize_flow_control
      parse_script(this,full_name,idx,subscript_attributes)
      subscript_attributes={}
      body&={{"begin",this[CMD_POS]},{"end",this[CMD_POS]}}
    else
      sym=map:get(global_scope,this[CMD_TEXT],nil_sym)
      temp=sym[SYM_KIND]
      if temp=KIND_ATTRIBUTE then
        --Read all subscript attributes, and don't put them in this script's body
        body=body[1..$-1]
        attrs_and_ptr=parse_attributes(cmd,get_cmd_pointer-1)
        subscript_attributes=attrs_and_ptr[1]
        get_cmd_pointer=attrs_and_ptr[2]
      elsif temp>KIND_LAST_VALID_IN_SCRIPT then
        err_string=sprintf_utf(
          "%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",
          {KIND_LONGNAMES[temp], this[CMD_TEXT]}
        )
        if temp=KIND_TOPLEVEL then
          err_string&=sprintf_utf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" or is missing a "&COLYEL&")"&COLRED&" or "&COLYEL&"end"&COLRED&".",{name[CMD_TEXT]})
        end if
        src_error(err_string,this[CMD_POS])
      end if
    end if
    if get_cmd_pointer>length(cmd) then
      src_error(
        sprintf_utf(
           "%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED
          ,{trigger[CMD_TEXT],name[CMD_TEXT]}
        )
        ,name[CMD_POS]
      )
    end if
    this=get_cmd()
  end while
  all_scripts[idx][SCRIPT_BODY_TOKS]=body

  current_script=previous_script
  return name
end function

---------------------------------------------------------------------------

procedure parse_top_level()
  Token this
  sequence triggers, attributes
  sequence attrs_and_ptr
  integer triggeridx
  enter_timing_zone("Top level pass")
  color_print("parsing top-level\n",{})
  triggers=column(trigger_list,PAIR_NAME)
  attributes={}
  get_cmd_pointer=1
  while get_cmd_pointer<=length(cmd) do
    --read a top-level command
    this=get_cmd()
    triggeridx=find(this[CMD_TEXT],triggers)

    --If we just read some attribute statements, they must precede a script
    if length(attributes)>0 and triggeridx=0 then
      src_error(
        sprintf_utf(
          "Expected a script to follow function attribute "&COLYEL&"%s"&COLRED&", but found "&COLYEL&"%s"&COLRED
          ,{attributes[$][CMD_TEXT],this[CMD_TEXT]}
        )
        , this[CMD_POS])
    end if

    if compare("defineconstant",this[CMD_TEXT])=0 then
      get_cmd_block(true)
    elsif compare("definetrigger",this[CMD_TEXT])=0 then
      get_cmd_block(true)
    elsif compare("globalvariable",this[CMD_TEXT])=0 then
      parse_global_block(get_cmd_block(true))
    elsif compare("defineoperator",this[CMD_TEXT])=0 then
      parse_operator_block(get_cmd_block(true))
    elsif compare("definefunction",this[CMD_TEXT])=0 then
      function_list=parse_define_block(get_cmd_block_nestable(true),function_list,KIND_FUNCTION)
    elsif compare("definescript",this[CMD_TEXT])=0 then
      script_list=parse_define_block(get_cmd_block(true),script_list,KIND_SCRIPT)
    elsif compare("plotscrversion",this[CMD_TEXT])=0 then
      --ignore
      get_cmd_block(false)
    elsif triggeridx>0 then
      --"script"/etc. Negative trigger IDs are subscripts
      if trigger_list[triggeridx][PAIR_NUM]<0 then
        src_error(
          sprintf_utf(
            COLYEL&"%s"&COLRED&" can only occur inside another script. Use "&COLYEL&"script"&COLRED&" instead for regular scripts."
            ,{this[CMD_TEXT]}
          )
          ,this[CMD_POS]
        )
      else
        parse_script(this,{},0,attributes)
        attributes={}
      end if
    elsif this[CMD_TEXT][1]='@' then
      --This will keep reading all consecutive attributes until there are no more
      attrs_and_ptr=parse_attributes(cmd,get_cmd_pointer-1)
      attributes=attrs_and_ptr[1]
      get_cmd_pointer=attrs_and_ptr[2]
    else
      check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration")
      src_error(
        sprintf_utf(
          "Expected top-level declaration but found "&COLYEL&"%s"&COLRED
          ,{this[CMD_TEXT]}
        )
        ,this[CMD_POS]
      )
    end if
  end while
  if length(attributes)>0 then
    --If we just read some attribute statements, they must precede a script
    src_error("Expected a script to follow function attribute, but file ended", this[CMD_POS])
  end if

  cmd={}  --Free memory
  exit_timing_zone()
end procedure

---------------------------------------------------------------------------

procedure dump_script_and_function_info(integer fh, sequence list)
  sequence this
  string id_string
  sequence src_position
  for i=1 to length(list) do
    this=list[i]
    if this[PAIR_NUM]>autonumber_id then
      id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]})
    else
      id_string=sprintf("ID=%d",{this[PAIR_NUM]})
    end if
    src_position=decode_srcpos(this[FUNC_POS])
    print_utf(fh,"%s %d\t%s\t%s(",{
       src_position[POS_FILENAME]
      ,src_position[POS_LINE]
      ,id_string
      ,this[PAIR_NAME]
    })
    if equal(this[FUNC_ARGS],VAR_ARGS) then
      print_utf(fh,"VARIABLE ARGS",{})
    else
      for j=1 to length(this[FUNC_ARGS]) do
        if j>1 then
          print_utf(fh,",",{})
        end if
        if equal(this[FUNC_ARGS][j],NO_DEFAULT) then
          print_utf(fh,"REQ",{})
        else
          print_utf(fh,"%d",{this[FUNC_ARGS][j]})
        end if
      end for
    end if
    print_utf(fh,")\n",{})
  end for
end procedure

---------------------------------------------------------------------------

function seek_string_by_id(integer id, sequence list, string name)
  integer at
  at=find_in_column(id,list,PAIR_NUM)
  if at then
    return(list[at][PAIR_NAME])
  else
    simple_error(sprintf_utf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id}))
  end if
  return("")
end function

---------------------------------------------------------------------------

--Used while dumping script binary
function name_lookup(sequence pair, VarList locals)
  integer at
  integer frame,id
  if pair[1]=KIND_NUMBER then
    return(sprintf("%d",{pair[2]}))
  elsif pair[1]=KIND_LOCAL or pair[1]=KIND_NONLOCAL then
    frame=and_bits(pair[2],#FF00)/#100
    id=and_bits(pair[2],#FF)
    for i=1 to length(locals) do
      if locals[i][VAR_FRAME]=frame and locals[i][VAR_ID]=id then
        return locals[i][CMD_TEXT]
      end if
    end for
    simple_error(sprintf_utf("HSpeak bug: decompiler found illegal variable ID "&COLYEL&"%d*256+%d"&COLRED,{frame,id}))
  elsif pair[1]=KIND_GLOBAL then
    at=find(pair[2],global_list[PAIR_NUM])
    if at then
      return(global_list[PAIR_NAME][at])
    else
      simple_error(sprintf_utf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]}))
    end if
  elsif pair[1]=KIND_FLOW then
    return(seek_string_by_id(pair[2],flow_list,"flow control structure"))
  elsif pair[1]=KIND_SCRIPT then
    return(seek_string_by_id(pair[2],script_list,"user script"))
  elsif pair[1]=KIND_FUNCTION then
    return(seek_string_by_id(pair[2],function_list,"hardcoded function"))
  elsif pair[1]=KIND_MATH then
    return(seek_string_by_id(pair[2],math_list,"built-in function"))
  else
    simple_error(sprintf_utf("HSpeak bug: decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]}))
  end if
end function

---------------------------------------------------------------------------

function binstring_to_int(sequence encoded)
  --bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles
  atom temp
  temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000
  if and_bits(temp,#80000000) then
    return(temp-#100000000)
  end if
  return(temp)
end function

---------------------------------------------------------------------------

function read_word(sequence encoded)
  --opposite of output_word. encoded should be length 2 or more
  integer temp
  temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100
  if and_bits(temp,#8000) then
    return(temp-#10000)
  end if
  return(temp)
end function

---------------------------------------------------------------------------

--Convenience wrapper around read_word, to read an int16 with offset given
--in int16 words and starting from 0 instead of 1.
function read_int16(sequence encoded, integer offset)
  return read_word(encoded[1+2*offset..2+2*offset])
end function

---------------------------------------------------------------------------

function dump_script_binary(sequence bin, integer offset, integer depth, VarList locals)
  string result
  sequence kind_and_id
  integer kind
  integer argcount
  integer new_offset
  result=""
  kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])}
  kind=kind_and_id[1]
  result&=sprintf_utf("%s%s",{
     repeat(' ',depth)  --indent
    ,name_lookup(kind_and_id,locals)
  })
  if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then
    argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11])
    if argcount then
      result&="(\n"
      for i=0 to argcount-1 do
        new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3])
        result&=dump_script_binary(bin,new_offset,depth+2,locals)
      end for
      result&=repeat(' ',depth)&")\n"
    else
      result&="()\n"
    end if
  else
    result&="\n"
  end if

  return(result)
end function

---------------------------------------------------------------------------

function dump_script_tree(NodeList tree, integer depth)
  string result
  result=""
    for i=1 to length(tree)  do
      result&=sprintf_utf("%s%s",{
         repeat(' ',depth)--indent
        ,tree[i][TREE_TRUNK][CMD_TEXT]
      })
      if length(tree[i][TREE_BRANCHES])>0 then
        result&="(\n"
        result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2)
        result&=repeat(' ',depth)&")\n"
      else
        result&="\n"
      end if
    end for
  return(result)
end function

---------------------------------------------------------------------------

function dump_script_strings(sequence bin)
  string result
  integer table_start
  integer offset
  integer len
  result=""
  table_start=binstring_to_int(bin[9..12])
  if table_start=0 then
    return("")
  end if
  bin=bin[table_start+1..$]
  offset=1
  while offset<length(bin) do
    len=binstring_to_int(bin[offset..offset+3])
    result&=sprintf("string offset %-4d: \"",{offset/4})
    result&=escape_string(bin[offset+4..offset+4+len-1]) & "\"\n"
    offset+=4+len
    if remainder(len,4) then
      offset+=4-remainder(len,4)
    end if
  end while
  return(result)
end function

---------------------------------------------------------------------------

procedure dump_debug_report()
  integer fh
  string debug_file
  sequence src_position
  Script script
  integer idx
  VarList locals
  --only do this if the -d debug option was on the command line
  if find('d',optlist) then
    enter_timing_zone("Writing hs_debug.txt")
    if length(path_only(dest_file))>1 then
      debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt")
    else
      debug_file="hs_debug.txt"
    end if
    fh=open(debug_file,"w")
    if fh!=failure then
      wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file})
      -------------------------------------
      print_utf(fh,"[Scripts]\n",{})
      dump_script_and_function_info(fh,script_list)
      print_utf(fh,"\n",{})
      -------------------------------------
      print_utf(fh,"[Global Variables]\n",{})
      for i=1 to length(global_list[PAIR_NUM]) do
        src_position=decode_srcpos(global_list[GLB_POS][i])
        print_utf(fh,"%s %d\tID=%d\t%s\n",{
           src_position[POS_FILENAME]
          ,src_position[POS_LINE]
          ,global_list[PAIR_NUM][i]
          ,global_list[PAIR_NAME][i]
        })
      end for
      print_utf(fh,"\n",{})
      -------------------------------------
      print_utf(fh,"[Builtin Functions]\n",{})
      dump_script_and_function_info(fh,function_list)
      print_utf(fh,"\n",{})
      -------------------------------------
      print_utf(fh,"[Operators]\n",{})
      for i=1 to length(operator_list) do
        src_position=decode_srcpos(operator_list[i][OPER_POS])
        print_utf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{
           src_position[POS_FILENAME]
          ,src_position[POS_LINE]
          ,operator_list[i][PAIR_NAME]
          ,operator_list[i][OPER_TRUENAME]
          ,operator_list[i][PAIR_NUM]
        })
      end for
      print_utf(fh,"\n",{})
      -------------------------------------
      print_utf(fh,"[Script Dumps]\n",{})
      for i=1 to length(all_scripts) do
        script=all_scripts[i]
        src_position=decode_srcpos(script[SCRIPT_POS])
        print_utf(fh,"%s %d\tID=%d\t%s (%s)\n",{
           src_position[POS_FILENAME]
          ,src_position[POS_LINE]
          ,script[SCRIPT_ID]
          ,script[SCRIPT_NAME]
          ,script[SCRIPT_FULL_NAME]
        })
        idx=script[SCRIPT_PARENT_IDX]
        if idx then
          src_position=decode_srcpos(script[SCRIPT_POS])
          print_utf(fh,"%s %d\tparent=ID %d\t%s (access to %d nonlocals)\n",{
             src_position[POS_FILENAME]
            ,src_position[POS_LINE]
            ,all_scripts[idx][SCRIPT_ID]
            ,all_scripts[idx][SCRIPT_NAME]
            ,script[SCRIPT_NUM_NONLOCALS]
          })
        end if
        src_position=decode_srcpos(script[SCRIPT_TRIGGER_TOK][CMD_POS])
        print_utf(fh,"%s %d\tTrigger=%d\t%s\n",{
           src_position[POS_FILENAME]
          ,src_position[POS_LINE]
          ,script[SCRIPT_TRIGGER_ID]
          ,script[SCRIPT_TRIGGER_TOK][CMD_TEXT]
        })
        locals=script[SCRIPT_VARIABLES]
        for j=1 to length(locals) do
          src_position=decode_srcpos(locals[j][CMD_POS])
          print_utf(fh,"%s %d\tvar(frame %d id %d)=%s\n",{
             src_position[POS_FILENAME]
            ,src_position[POS_LINE]
            ,locals[j][VAR_FRAME]
            ,locals[j][VAR_ID]
            ,locals[j][CMD_TEXT]
          })
        end for
        print_utf(fh,"%d bytes compiled\n",{length(script[SCRIPT_BINARY])})
--        print_utf(fh,"%s\n\n",{dump_script_tree(script[SCRIPT_AST],0)})
        print_utf(fh,"%s",{dump_script_binary(script[SCRIPT_BINARY][CODE_START_BYTE_OFFSET+1..$],0,0,script[SCRIPT_VARIABLES])})
        print_utf(fh,"%s\n\n",{dump_script_strings(script[SCRIPT_BINARY])})
      end for
      print_utf(fh,"\n",{})
      -------------------------------------
      close(fh)
    else
      wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file})
    end if
    exit_timing_zone()
  end if
end procedure

---------------------------------------------------------------------------

--Look up the name of a [sub]script in all current scopes, returning all_scripts
--index if found, or 0 if not
function lookup_scoped_script_name(string name)
  string s
  integer at
  integer idx
  idx=cur_script_num
  while true do
    if idx then
      s=all_scripts[idx][SCRIPT_FULL_NAME]&"<-"&name
    else
      s=name
    end if
    at=find_in_column(s,all_scripts,SCRIPT_FULL_NAME)
    if at then
      return at
    end if

    if idx=0 then
      return 0
    end if
    idx=all_scripts[idx][SCRIPT_PARENT_IDX]
  end while
end function

---------------------------------------------------------------------------

--identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis
function what_kind_and_id(Token command, VarList local_vars, integer look_for_operators)
  integer kind
  integer at
  atom id
  Symbol sym
  string s
  s=command[CMD_TEXT]
  if length(s)=0 then
    kind=KIND_PARENS
    id=0
  elsif (s[1] >= '0' and s[1] <= '9') or s[1] = '-' then
    kind=KIND_NUMBER
    id=string_to_object(s,{})
  elsif look_for_operators and find_in_column(s,operator_list,PAIR_NAME) then
    kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name
    id=0
  else
    sym=map:get(global_scope,s,nil_sym)
    if sym[SYM_KIND] then
      kind=sym[SYM_KIND]
      id=sym[SYM_ID]
    elsif s[1] = '@' then
      kind=KIND_REFERENCE
      id=0 -- ID always resolves to 0 for references here, since it is too early to know all
           -- script IDs (really? Surely false). The real work is done in binary_compile_recurse
    elsif s[1] = '"' then
      src_error(
        sprintf_utf(
          "The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
          {shorten_string(s,15)}),
        command[CMD_POS]
      )
    else
      --Local variables can't shadow any names in global scope, they only shadow
      --nonlocal variables. So cen check this last
      at=find_in_column(s,local_vars,CMD_TEXT)
      if at then
        if local_vars[at][VAR_FRAME]=0 then
          kind=KIND_LOCAL
        else
          kind=KIND_NONLOCAL
        end if
        id=local_vars[at][VAR_ID] + 256*local_vars[at][VAR_FRAME]
      else
        --It may be a subscript, in which case the scoped name needs to be looked up
        at=lookup_scoped_script_name(s)
        if at then
          kind=KIND_SCRIPT
          id=all_scripts[at][SCRIPT_ID]
        else
          src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
        end if
      end if
    end if
  end if
  return({kind,id})
end function

---------------------------------------------------------------------------

--Check whether a math operator is not an assignment, a compare operator, or 'random'
function is_simple_math_op(integer id)
  return (id >= 1 and id <= 9) or id >= 19
end function

---------------------------------------------------------------------------

--Given name (CMD_TEXT) of a Node, return whether it is a block: all its arguments are
--statements as opposed to expressions (if, for, while, switch, which take a mix need special handling)
function block_command(string name)
  return compare("do",name)=0 or compare("then",name)=0 or compare("else",name)=0
end function

---------------------------------------------------------------------------

procedure check_is_expression(Node node, string parent)
  Symbol sym
  Kind kind
  --Local variables are always expressions
  sym=map:get(global_scope,node[TREE_TRUNK][CMD_TEXT],nil_sym)
  kind=sym[SYM_KIND]
  --This function is always called after convert_macros, so we don't need to
  --worry about whether a particular macro is an expression
  if kind=KIND_FLOW or kind=KIND_KEYWORD then
    src_error(
      sprintf_utf(COLYEL&"%s"&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&"%s"&COLRED&")"
                  ,{node[TREE_TRUNK][CMD_TEXT],parent})
      ,node[TREE_TRUNK][CMD_POS])
  end if
end procedure

---------------------------------------------------------------------------

--this function not used anywhere
function how_many_args(string name, integer kind)
  integer result
  integer at
  if kind=KIND_PARENS then
    result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P
  elsif kind=KIND_FLOW then
    result=-1 --flow supports an unknown number of args
  elsif kind=KIND_OPERATOR then
    result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions
  elsif kind=KIND_MATH then
    at=find_in_column(name[CMD_TEXT],math_list,PAIR_NAME)
    result=length(math_list[at][FUNC_ARGS])
  elsif kind=KIND_FUNCTION then
    at=find_in_column(name[CMD_TEXT],function_list,PAIR_NAME)
    if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then
      result=-1
    else
      result=length(function_list[at][FUNC_ARGS])
    end if
  elsif kind=KIND_SCRIPT then
    at=find_in_column(name[CMD_TEXT],script_list,PAIR_NAME)
    result=length(script_list[at][FUNC_ARGS])
  elsif kind=KIND_MACRO then
    result=-1 --unknown number of args
  else
    --numbers, variables, etc do not permit args
    result=0
  end if
  return(result)
end function

---------------------------------------------------------------------------

--Something that potentially takes args
function takes_args(integer kind)
  if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO or kind=KIND_KEYWORD then
    return(true)
  else
    return(false)
  end if
end function

---------------------------------------------------------------------------

--Get next AST node from a list of tokens by pairing up begin,end
--Returns {new_ptr,{token,children}} where children is a TokenList not a NodeList
function get_script_node(integer ptr, TokenList data, VarList vars)
  Token command
  sequence ptr_and_block
  TokenList block
  sequence temp
  integer kind
  integer followed_by_begin
  block={}
  command=data[ptr]
  ptr+=1
  if compare("end",command[CMD_TEXT])=0 then
    src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS])
  elsif compare("begin",command[CMD_TEXT])=0 then
    --floaty brackets for order-of-operations-enforcement
    ptr-=1
    command[CMD_TEXT]=""
  end if

  temp=what_kind_and_id(command,vars,true)  --check for KIND_OPERATOR too
  kind=temp[1]
  followed_by_begin=false
  if ptr<=length(data) then
    --there is room for args
    followed_by_begin=equal("begin",data[ptr][CMD_TEXT])

    --distinguishing between functions with and without args means wait() would be ok but noop() would not
    if takes_args(kind) then
      if followed_by_begin then
        --yes, it has args
        ptr+=1--only increment the pointer when we have args
        ptr_and_block=get_cmd_depth(ptr,data,1)
        ptr=ptr_and_block[1]
        block=ptr_and_block[2]
        if length(block)=0 and kind=KIND_PARENS then
          src_error("found empty parentheses not associated with a function call",command[CMD_POS])
        end if
      end if
    end if
  end if

  if kind=KIND_FLOW then
    --Require certain flow control types to be followed by parentheses.
    --We have to check this here or earlier, because after building the parse tree
    --there will be no difference between "()" and omitted parentheses.
    --(Note that several later checks in normalize_flow_control are redundant to this)
    --FIXME: Ideally we wouldn't actually check anything here, but instead preserve presence
    --of empty parentheses to check later (it will matter in future anyway according to typing plan)
    --FIXME: excludes else, because of 'case(else)'
    if find(command[CMD_TEXT],flow_requiring_brackets) then
      if not followed_by_begin then
        src_error(sprintf_utf(COLYEL&"%s"&COLRED&" must be followed by "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{command[CMD_TEXT]}),command[CMD_POS])
      end if
    end if

    --else has a special check for 'else,if' mistake.
    --'not followed_by_begin' excludes 'else(...)if'
    if equal("else",command[CMD_TEXT]) and ptr<=length(data) and equal("if",data[ptr][CMD_TEXT]) and not followed_by_begin then
      src_error(COLYEL&"else if"&COLRED&" may not be written with a separator in-between",command[CMD_POS])
      --temp=get_script_node(ptr, data, vars)  --hack to allow if necessary
      --temp[2][1][CMD_TEXT]="elseif"
      --return temp
    end if
  end if

  return({ptr,{command,block}})
end function

---------------------------------------------------------------------------

--Returns a NodeList
function build_ast(TokenList script_body_toks, VarList vars)
  integer ptr
  sequence ptr_and_seminode
  sequence seminode  --Not a Node, has a TokenList instead of NodeList
  Node node
  NodeList result
  result={}
  ptr=1
  while true do
    ptr_and_seminode=get_script_node(ptr,script_body_toks,vars)
    ptr=ptr_and_seminode[1]
    seminode=ptr_and_seminode[2]
    if length(seminode[TREE_BRANCHES])>0 then
      --this seminode has arguments that need parsing
      node={seminode[TREE_TRUNK],build_ast(seminode[TREE_BRANCHES],vars)}
    else
      node=seminode
    end if
    result=append(result,node)
    if ptr>length(script_body_toks) then
      exit --break out of while when there is no more data
    end if
  end while
  return(result)
end function

---------------------------------------------------------------------------

--When variables are inherited by a subscript all frame numbers need to be incremented
function shift_variables_frame(VarList vars)
  for i=1 to length(vars) do
    vars[i][VAR_FRAME]+=1
  end for
  return vars
end function

---------------------------------------------------------------------------

--Given a VarList with all local & nonlocal variables, returns a VarList
--of just the locals, in-order
function filter_local_variables(VarList vars)
  sequence ret
  integer next_idx
  ret={}
  next_idx=0
  for i=length(vars) to 1 by -1 do
    if vars[i][VAR_FRAME]=0 then
      --Is local
      if vars[i][VAR_ID]!=next_idx then
        --This is really an assumption, not a bug
        simple_error("compiler bug! Local vars are not in order")
      end if
      next_idx+=1
      ret=append(ret,vars[i])
    end if
  end for
  return ret
end function

---------------------------------------------------------------------------

procedure add_local_variable(Token var)
  Script script
  VarList vars
  integer id
  script=all_scripts[cur_script_num]
  vars=script[SCRIPT_VARIABLES]
  --First check it's not already defined at global scope
  check_undefined_string(var,"local variable name")
  --Always add to scope of current script (frame == 0)
  id=length(vars)-script[SCRIPT_NUM_NONLOCALS]
  --Extend the Token into a VarToken, and prefix to the list, so that it takes
  --precedence over shadowed variables
  all_scripts[cur_script_num][SCRIPT_VARIABLES]={var & {0,id}} & vars
end procedure

---------------------------------------------------------------------------

--Find the variable declarations in a script
procedure gather_local_vars(TokenList data)
  Token this
  integer ptr
  ptr=1
  while true do
    this=data[ptr]
    ptr+=1
    if compare("variable",this[CMD_TEXT])=0 then
      if ptr>length(data) then
        src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
      end if
      this=data[ptr]
      ptr+=1
      if compare("begin",this[CMD_TEXT])=0 then
        while true do
          if ptr>length(data) then
            src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
          end if
          this=data[ptr]
          ptr+=1
          if compare("end",this[CMD_TEXT])=0 then
            exit--break the while
          end if
          add_local_variable(this)
        end while
      else
        src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
      end if
    end if
    if ptr>length(data) then
      exit --break out of the while
    end if
  end while
end procedure

---------------------------------------------------------------------------

--There are only two, hardcoded, macros right now: tracevalue, assert.
function expand_macros(NodeList tree)
  string cmd
  string tracecmd
  NodeList newtree
  NodeList newargs
  string new_string
  Node string_node, assertstr_node
  sequence src_position
  sequence temp
  srcpos pos, vpos
  integer ptr
  ptr=1
  while ptr<=length(tree) do
    --recurse first, because tree[ptr] might be split up (as in 'assert')
    if length(tree[ptr][TREE_BRANCHES]) then
      tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES])
    end if

    cmd=tree[ptr][TREE_TRUNK][CMD_TEXT]

    if equal(cmd,"tracevalue") or equal(cmd,"tracevalueof") then
      tracecmd="tracevalueinternal"
    elsif equal(cmd,"showvalueof") then
      tracecmd="showvalueofinternal"
    else
      tracecmd=""
    end if
    if length(tracecmd) then
      newargs={}
      tree[ptr][TREE_TRUNK][CMD_TEXT]=tracecmd
      pos=tree[ptr][TREE_TRUNK][CMD_POS]
      for i=1 to length(tree[ptr][TREE_BRANCHES]) do
        new_string=tree_original_text(tree[ptr][TREE_BRANCHES][i])
        string_list=append(string_list,new_string)
        string_node={ {sprintf("@$string%d",{length(string_list)}),virtual_pos(pos)}, {} }
        newargs&={string_node,tree[ptr][TREE_BRANCHES][i]}
      end for
      tree[ptr][TREE_BRANCHES]=newargs
    end if

    if equal(cmd,"assert") then
      --assert(condition)  -->  if (not(condition)) then ($assert expression string="condition", assertfailure)
      -- and
      --assert(x == y)  -->  if(_asserteq(x, y, assert expression string, "condition")) then (assertfailure)
      pos=tree[ptr][TREE_TRUNK][CMD_POS]
      vpos=virtual_pos(pos)
      src_position=decode_srcpos(pos)
      if length(tree[ptr][TREE_BRANCHES])!=1 then
        src_error(sprintf_utf(COLYEL&"assert"&COLRED&" statement takes exactly one argument: a condition. It has %d",{length(tree[ptr][TREE_BRANCHES])}),pos)
      end if

      new_string=sprintf_utf("%s:%d: %s",{src_position[POS_FILENAME],src_position[POS_LINE],tree_original_text(tree[ptr][TREE_BRANCHES][1])})
      string_list=append(string_list,new_string)
      string_node={ {sprintf("@$string%d",{length(string_list)}),vpos}, {} }

      assertstr_node={ {enforce_constants("assertexpressionstring"),vpos}, {} }

      newtree={{{"if",vpos},{}}, {{"then",vpos},{}}}

      if equal(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],"equal") then
        --Use _asserteq

        sequence x,y
        temp=tree[ptr][TREE_BRANCHES][1][TREE_BRANCHES]  --x, y
        x=temp[1]
        y=temp[2]
        --if(...)
        newtree[1][TREE_BRANCHES]={{ {"_asserteq",vpos}, {x,y,assertstr_node,string_node} }}
        --then(assertfailure)
        newtree[2][TREE_BRANCHES]={{ {"assertfailure",vpos}, {} }}

      else
        --if(not(condition))
        newtree[1][TREE_BRANCHES]={{ {"not",vpos}, {tree[ptr][TREE_BRANCHES][1]} }}

        temp={{{"setstringfromtable",vpos},{}}, {{"assertfailure",vpos},{}}}
        temp[1][TREE_BRANCHES]={assertstr_node,string_node}  --stringnum, string
        --then(...)
        newtree[2][TREE_BRANCHES]=temp
      end if

      tree=replace_slice(tree,ptr,ptr,newtree)  --tree[ptr..ptr]=newtree
      ptr+=1
    end if
    ptr+=1
  end while
  return(tree)
end function

---------------------------------------------------------------------------

--tree[ptr] is a partially constructed if Node (lacking else child), followed either by an else or elseif Node.
--Handle else/elseif, parenting to tree[pt], and return the new tree.
function build_else_node(NodeList tree, integer ptr)
  NodeList elseargs
  string nextcmd
  integer eptr
  srcpos pos

  nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT]
  pos=tree[ptr+1][TREE_TRUNK][CMD_POS]
  if equal("else",nextcmd) then
    --found else
    tree[ptr][TREE_BRANCHES]&={tree[ptr+1]}
    tree=delete_element(tree,ptr+1)
  else  -- equal("elseif",nextcmd)
    --Split off the if
    elseargs={ {{"if",pos},tree[ptr+1][TREE_BRANCHES]} }

    --Grab the rest of this if,elseif,...,else chain, to put inside the else
    eptr=ptr+2
    while eptr<=length(tree) do
      nextcmd=tree[eptr][TREE_TRUNK][CMD_TEXT]
      --Stop when we see an else but otherwise we don't check correctness until we recurse
      if equal(nextcmd,"else") then
        eptr+=1
        exit
      elsif find(nextcmd,{"elseif","then"}) then
        --continue
      else
        exit
      end if
      eptr+=1
    end while
    elseargs&=tree[ptr+2..eptr-1]
    tree=tree[1..ptr]&tree[eptr..$]
    tree[ptr][TREE_BRANCHES]&={ {{"else",pos},elseargs} }
  end if
  return tree
end function

---------------------------------------------------------------------------

--Build switch AST Nodes, return new tree
function build_switch_node(NodeList tree, VarList vars, integer ptr)
  srcpos pos
  srcpos argpos
  NodeList args
  Token thiscmd
  NodeList rawargs
  NodeList caseargs
  Token casecmd
  string lastnodename
  sequence collected  --either a Node or {}
  sequence temp
  integer have_else
  srcpos elsepos
  integer expect_case
  integer kind
  atom id
  string else_error_msg
  else_error_msg=COLYEL&"else"&COLRED&" case should be placed last inside a "&COLYEL&"switch"&COLRED&" block"

  pos=tree[ptr][TREE_TRUNK][CMD_POS]
  args=tree[ptr][TREE_BRANCHES]  --start with the condition, length 1

  if length(args)>1 then
    src_error(sprintf_utf(
                  COLYEL&"switch"&COLRED&" statement has %d expressions. It should have only one. (The body of the switch should go in its "&COLYEL&"do(...)"&COLRED&" block instead.)"
                  ,{length(args)}
                ),args[2][TREE_TRUNK][CMD_POS])
  elsif length(args)=0 then
    src_error(COLYEL&"switch"&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED,pos)
  end if
  check_is_expression(args[1],"switch")
  if ptr>=length(tree) then
    src_error(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,pos)
  end if
  thiscmd=tree[ptr+1][TREE_TRUNK]
  if equal("do",thiscmd[CMD_TEXT])=0 then
    src_error(sprintf_utf(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{thiscmd[CMD_TEXT]}),thiscmd[CMD_POS])
  end if

  --found do, processing starts here (build arguments to switch, after the expression)
  rawargs=tree[ptr+1][TREE_BRANCHES]  --contents of the 'do'
  lastnodename=""
  have_else=false
  expect_case=false
  collected={}

  for j=1 to length(rawargs) do
    thiscmd=rawargs[j][TREE_TRUNK]
    argpos=thiscmd[CMD_POS]
    if j=1 and equal("case",thiscmd[CMD_TEXT])=0 then
      src_error(sprintf_utf(COLYEL&"switch() do("&COLRED&" should be followed with a "&COLYEL&"case"&COLRED&", not with "&COLYEL&"%s"&COLRED&"."
                        ,{thiscmd[CMD_TEXT]}),argpos)
    end if
    --to resolve the ambiguity of an else, we always suppose that they belong to an 'if' (or elseif) if possible
    if equal("else",thiscmd[CMD_TEXT]) and find(lastnodename,{"if","then","elseif"})=0 then
      --Old syntax else(...) block
      if length(collected) then
        args&={collected}
        collected={}
      end if
      if have_else then
        src_error(else_error_msg,elsepos)
      end if
      expect_case=false

      if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
        --Last case is new-style syntax and wasn't followed by any statements
        --Lets just disallow this to prevent a little confusion
        src_error(COLYEL&"case(...) else(...)"&COLRED&" is ambiguous. "&
                  "Write either "&COLYEL&"case(..., else) ..."&COLRED&" or "&COLYEL&"case(...) do() else(...)"&COLRED&" as fits your intentions."
                  ,rawargs[j-1][TREE_TRUNK][CMD_POS])
      end if

      if j!=length(rawargs) then
        if length(rawargs[j][TREE_BRANCHES])=0 then
          else_error_msg&=". Did you forget to enclose the else block with brackets, like "&COLYEL&"else(...)"&COLRED&"?"
        end if
        src_error(else_error_msg,argpos)
      end if
      --convert it to a do
      args&={ {{"do",argpos},rawargs[j][TREE_BRANCHES]} }
      elsepos=argpos
      have_else=true

    elsif equal("case",thiscmd[CMD_TEXT]) then
      if length(collected) then
        args&={collected}
        collected={}
      end if
      if have_else then
        --Note we disallow  case(else) case(1)  but allow  case(else,1)
        src_error(else_error_msg,elsepos)
      end if
      expect_case=false

      caseargs=rawargs[j][TREE_BRANCHES]

      --Call a couple passes early here, so that constant expressions are allowed
      caseargs=collapse_floaty_brackets(caseargs)
      caseargs=optimize_script(caseargs,vars)

      --Check contents are only either integers, or "else", and append to args
      for k=1 to length(caseargs) do
        casecmd=caseargs[k][TREE_TRUNK]
        if equal(casecmd[CMD_TEXT],"else") then
          if have_else then
            src_error(COLYEL&"else"&COLRED&" appears multiple times inside this "&COLYEL&"switch"&COLRED&" block",casecmd[CMD_POS])
          end if
          have_else=true
          elsepos=argpos
          --don't emit anything: next do is the 'else' one
        else
          temp=what_kind_and_id(casecmd,vars,false)
          kind=temp[1]
          id=temp[2]
          if kind=KIND_FLOW or (kind=KIND_MATH and is_simple_math_op(id)=false) then
            src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is not allowed inside a "&COLYEL&"case"&COLRED
                          ,{casecmd[CMD_TEXT]}),casecmd[CMD_POS])
          end if
          args&={caseargs[k]}
        end if
      end for
    else
      --Disallow "case() do() garbage"
      if expect_case then
        src_error(sprintf_utf("Expected "&COLYEL&"case"&COLRED&" or "&COLYEL&"else"&COLRED&" to follow "&COLYEL&"case(...) do(...)"&COLRED&" but found "&COLYEL&"%s"&COLRED,{thiscmd[CMD_TEXT]}),argpos)
      end if
      if length(collected)=0 then
        --Start collecting into a do. But if it is a do (old syntax), don't double encapsulate it
        if equal("do",thiscmd[CMD_TEXT]) then
          args&={rawargs[j]}
          expect_case=true
        else
          collected={{"do",argpos},{rawargs[j]}}
        end if
      else
        --Continue collecting
        collected[TREE_BRANCHES]&={rawargs[j]}
      end if
    end if
    lastnodename=thiscmd[CMD_TEXT]
  end for
  if length(collected) then
    args&={collected}
  end if
  if length(args)>1 and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
    --Last case (possibly an else) is new-style syntax and wasn't followed by any statements,
    --so add the missing do. Don't need to do this, but I'd like to keep things tidy
    --(srcpos is of the last case)
    args&={ {{"do",virtual_pos(argpos)},{}} }
  end if
  if have_else=false then
    --Insert a dummy do default block since else has been left out
    args&={ {{"do",virtual_pos(pos)},{}} }
  end if

  tree[ptr][TREE_BRANCHES]=args
  --delete the do block
  return delete_element(tree,ptr+1)
end function

---------------------------------------------------------------------------

--Given a variable token for 'for' or 'setvariable', etc. (parent) and list of locals/non-locals 'vars',
--return a an integer ID token, or throw an error if not a variable.
function convert_variable_to_id(Token var, VarList locals, string parent)
  sequence kind_and_id
  atom id
  integer argkind
  integer var_at
  kind_and_id=what_kind_and_id(var,locals,false)
  argkind=kind_and_id[1]
  id=kind_and_id[2]
  if argkind=KIND_LOCAL or argkind=KIND_NONLOCAL then
    id=-1-id
  elsif argkind=KIND_GLOBAL then
    --id unmodified
  else
    --It's not a variable. bad!
    src_error(sprintf_utf(
      "first argument of "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED
      ,{parent,KIND_LONGNAMES[argkind],var[CMD_TEXT]}
    ),var[CMD_POS])
  end if
  return {sprintf("%d",{id}),var[CMD_POS]}
end function

---------------------------------------------------------------------------

--parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements
--Note that several of these checks are now redundant to the check for 'begin' performed in get_script_node
function normalize_flow_control(NodeList tree, VarList vars, string parent)
  integer ptr
  string s
  string nextcmd
  NodeList args
  Token ctr
  srcpos pos
  Symbol sym
  Kind kind
  sequence temp
  integer var_at
  ptr=1

  while ptr<=length(tree) do
    s=tree[ptr][TREE_TRUNK][CMD_TEXT]
    pos=tree[ptr][TREE_TRUNK][CMD_POS]
    args=tree[ptr][TREE_BRANCHES]

    sym=map:get(global_scope,s,nil_sym)
    kind=sym[SYM_KIND]
    if kind=KIND_FLOW or kind=KIND_KEYWORD then
      if equal(s,"then") or equal(s,"else") or equal(s,"do") then
        --these are checked for correct parents below.
      elsif block_command(parent)=false then
        src_error(COLYEL&s&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&parent&COLRED&")",pos)
      end if
    end if

    if kind=KIND_FLOW then
      if compare("if",s)=0 then
        if length(args)>1 then
          src_error(sprintf_utf(
                        COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions"
                        ,{length(args)}
                      ),args[2][TREE_TRUNK][CMD_POS])
        elsif length(args)=0 then
          src_error(sprintf_utf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos)
        end if
        check_is_expression(args[1],s)
        if ptr<length(tree) then
          --there is room
          nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT]
          if compare("then",nextcmd)=0 then
            tree[ptr][TREE_BRANCHES]&={tree[ptr+1]}
            tree=delete_element(tree,ptr+1)
            if ptr<length(tree) then
              --there is room for else
              nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT]
              if equal("else",nextcmd) or equal("elseif",nextcmd) then
                tree=build_else_node(tree,ptr)
              else
                --no else found, but thats okay.
                --add dummy else
                tree[ptr][TREE_BRANCHES]&={ {{"else",virtual_pos(pos)},{}} }
              end if
            else
              --add dummy else
              tree[ptr][TREE_BRANCHES]&={ {{"else",virtual_pos(pos)},{}} }
            end if
          elsif equal("else",nextcmd) or equal("elseif",nextcmd) then
            --add dummy then before else
            tree[ptr][TREE_BRANCHES]&={ {{"then",virtual_pos(pos)},{}} }
            tree=build_else_node(tree,ptr)
          else
            --found neither then nor else
            src_error(sprintf_utf("expected "&COLYEL&"then"&COLRED&" or "&COLYEL&"else"&COLRED&" but found "&COLYEL&"%s"&COLRED,{nextcmd}),tree[ptr+1][TREE_TRUNK][CMD_POS])
          end if
        else
          --no room for then or else
          src_error(COLYEL&"if"&COLRED&" should be followed by "&COLYEL&"then"&COLRED&" or "&COLYEL&"else"&COLRED,pos)
        end if

      elsif compare("then",s)=0 then
        if compare("if",parent)!=0 or ptr!=2 then
          src_error(COLYEL&"then"&COLRED&" should follow an "&COLYEL&"if"&COLRED&" or "&COLYEL&"else if"&COLRED&" block",pos)
        end if

      elsif compare("else",s)=0 then
        if compare("if",parent)!=0 or ptr!=3 then
          src_error(COLYEL&"else"&COLRED&" should follow an "&COLYEL&"if"&COLRED&" or "&COLYEL&"then"&COLRED&" or "&COLYEL&"else if"&COLRED&" block",pos)
        end if

      elsif compare("elseif",s)=0 then
        --Should be absorbed by now
        src_error(COLYEL&"else if"&COLRED&" should follow an "&COLYEL&"if"&COLRED&" or "&COLYEL&"then"&COLRED&" block",pos)

      elsif compare("do",s)=0 then
        if equal(parent,"")=false and block_command(parent)=false then
          --These check their arguments are expressions, so a do which is a child of one of these is correct
          if equal(parent,"for")=false and equal(parent,"while")=false and equal(parent,"switch")=false then
            src_error(COLYEL&s&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&parent&COLRED&")",pos)
          end if
        end if

      elsif compare("while",s)=0 then
        if length(args)>1 then
          src_error(sprintf_utf(
                    COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions"
                    ,{length(args)}
                    ),args[2][TREE_TRUNK][CMD_POS])
        elsif length(args)=0 then
          src_error(sprintf_utf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos)
        end if
        check_is_expression(args[1],s)
        if ptr<length(tree) then
          --there is room
          if compare("do",tree[ptr+1][TREE_TRUNK][CMD_TEXT])=0 then
            --found do
            tree[ptr][TREE_BRANCHES]&={tree[ptr+1]}
            tree=delete_element(tree,ptr+1)
          else
            src_error(sprintf_utf(COLYEL&"while"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{tree[ptr+1][TREE_TRUNK][CMD_TEXT]}),tree[ptr+1][TREE_TRUNK][CMD_POS])
          end if
        else
          src_error(sprintf_utf(COLYEL&"while"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,{}),pos)
        end if

      elsif compare("for",s)=0 then
        --Handle regular arguments
        if length(args)<3 then
          src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement needs three or four arguments",{}),pos)
        elsif length(args)>4 then
          src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(args)}),args[5][TREE_TRUNK][CMD_POS])
        elsif length(args)=3 then
          --append default step value
          args&={ {{"1",virtual_pos(pos)},{}} }
        end if
        ctr=args[1][TREE_TRUNK]
        temp=what_kind_and_id(ctr,vars,false)
        kind=temp[1]
        if kind=KIND_LOCAL then
          map:put(used_locals,temp[2],true)
        elsif kind=KIND_GLOBAL then
          --warn, then translate into a numeric reference to a variable
          src_warn(sprintf_utf(
            "Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop"
           ,{ctr[CMD_TEXT]}
          ),ctr[CMD_POS])
        end if
        args[1][TREE_TRUNK]=convert_variable_to_id(ctr,vars,"for")
        check_is_expression(args[2],s)
        check_is_expression(args[3],s)
        check_is_expression(args[4],s)

        --Handle "do"
        if ptr<length(tree) then
          --there is room
          if compare("do",tree[ptr+1][TREE_TRUNK][CMD_TEXT])=0 then
            --found do
            args&={tree[ptr+1]}
            tree=delete_element(tree,ptr+1)
          else
            src_error(sprintf_utf(COLYEL&"for"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{tree[ptr+1][TREE_TRUNK][CMD_TEXT]}),tree[ptr+1][TREE_TRUNK][CMD_POS])
          end if
        else
          src_error(sprintf_utf(COLYEL&"for"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,{}),pos)
        end if
        tree[ptr][TREE_BRANCHES]=args

      elsif compare("exit",s)=0 then
        if length(args)>1 then
          src_error(sprintf_utf(
                        COLYEL&s&COLRED&" statement has %d arguments. It should have only zero or one."
                        ,{length(args)}
                      ),args[2][TREE_TRUNK][CMD_POS])
        elsif length(args)=1 then
          tree[ptr][TREE_TRUNK][CMD_TEXT]="exitreturning"
        elsif length(args)=0 then
          tree[ptr][TREE_TRUNK][CMD_TEXT]="exitscript"
        end if

      elsif compare("return",s)=0 or compare("exitreturning",s)=0 then
        if length(args)>1 then
          src_error(sprintf_utf(
                        COLYEL&s&COLRED&" statement has %d arguments. It should have only one."
                        ,{length(args)}
                      ),args[2][TREE_TRUNK][CMD_POS])
        elsif length(args)=0 then
          src_error(sprintf_utf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit"&COLRED&" or "&COLYEL&"exit script"&COLRED,{}),pos)
        end if

      elsif compare("break",s)=0 or compare("continue",s)=0 then
        if length(args)>1 then
          src_error(sprintf_utf(
                        COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one."
                        ,{length(args)}
                      ),args[2][TREE_TRUNK][CMD_POS])
        elsif length(args)=0 then
          --append default value
          tree[ptr][TREE_BRANCHES]={{{"1",virtual_pos(pos)},{}}}
        end if

      elsif compare("exitscript",s)=0 then
        if length(args)>0 then
          src_error(sprintf_utf(
                        COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit"&COLRED&" or "&COLYEL&"exit returning"&COLRED
                        ,{length(args)}
                      ),args[1][TREE_TRUNK][CMD_POS])
        end if

      elsif compare("switch",s)=0 then
        tree=build_switch_node(tree,vars,ptr)

      elsif compare("case",s)=0 then
        src_error(COLYEL&"case"&COLRED&" is not allowed outside of "&COLYEL&"switch"&COLRED,pos)
      end if

    elsif kind=KIND_KEYWORD then

      if compare("subscript",s)=0 or compare("variable",s)=0 then
        --Does nothing; remove. (Kept until now to check they isn't placed in a silly place)
        --(Note: any attributes prefixing a subscript have already been removed in
        --parse_script())
        tree=delete_element(tree,ptr)
        continue
      end if

    end if

    if equal(s,"") then
      --Floaty brackets (they haven't been removed yet). Look to parent instead.
      s=parent
    end if
    tree[ptr][TREE_BRANCHES]=normalize_flow_control(tree[ptr][TREE_BRANCHES],vars,s)
    ptr+=1
  end while
  return(tree)
end function

---------------------------------------------------------------------------

--A modification of Dijkstra's shunting-yard algorithm
--(This could probably subsume build_ast too)
function convert_operators(NodeList tree)
  NodeList new_tree
  sequence stack   --{int,Node} pairs
  Node opnode
  integer ptr
  integer at
  integer priority
  integer nonoperator_before
  stack={}
  new_tree={}
  ptr=1
  nonoperator_before=false
  while ptr<=length(tree) do
    --for each peer branch on the tree
    at=find_in_column(tree[ptr][TREE_TRUNK][CMD_TEXT],operator_list,PAIR_NAME)

    if at>0 then
      if nonoperator_before=false then
        --there is no room for the before-operand. Bad!
        src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its left-side operand (did not expect to see an operator here)",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
      elsif ptr=length(tree) then
        --there is no room for the after-operand. Bad!
        src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its right-side operand",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
      end if

      priority=operator_list[at][PAIR_NUM]
      --convert the operator to its true function name (which might be the same)
      tree[ptr][TREE_TRUNK][CMD_TEXT]=operator_list[at][OPER_TRUENAME]
      while length(stack) and priority>=stack[$][1] do
        --pop the operator from the stack and "apply" it to the tree
        opnode=stack[$][2]
        stack=stack[1..$-1]
        opnode[TREE_BRANCHES]=new_tree[$-1..$]
        new_tree=new_tree[1..$-2]&{opnode}
      end while
      stack&={{priority,tree[ptr]}}
      nonoperator_before=false
    else
      if nonoperator_before then
        --Two non-ops in a row means moved onto the new argument in this argument list, so finish processing operators first
        while length(stack) do
          --pop the operator from the stack and "apply" it to the tree
          opnode=stack[$][2]
          stack=stack[1..$-1]
          opnode[TREE_BRANCHES]=new_tree[$-1..$]
          new_tree=new_tree[1..$-2]&{opnode}
        end while
      end if
      tree[ptr][TREE_BRANCHES]=convert_operators(tree[ptr][TREE_BRANCHES])
      new_tree&={tree[ptr]}
      nonoperator_before=true
    end if

    ptr+=1
  end while

  --copy of above
  while length(stack) do
    --pop the operator from the stack and "apply" it to the tree
    opnode=stack[$][2]
    stack=stack[1..$-1]
    opnode[TREE_BRANCHES]=new_tree[$-1..$]
    new_tree=new_tree[1..$-2]&{opnode}
  end while

  return new_tree
end function

---------------------------------------------------------------------------

--Get the parameters for a certain attribute of a script or function, if it exists
--func_defn: an element of script_list or function_list
--Returns a NodeList, possible empty
function attribute_params(string name, sequence func_defn)
  integer attridx
  attridx=find_in_column(name,func_defn[FUNC_ATTRS],1)
  if attridx=0 then
    return {}
  else
    return func_defn[FUNC_ATTRS][attridx][TREE_BRANCHES]
  end if
end function

---------------------------------------------------------------------------

--Ensure correct number of arguments, add defaults, and translate variables to IDs in assignments
--And also some extra warnings...
function fix_arguments(Node tree, integer kind, integer id, sequence list, VarList vars)
  integer at
  integer argnum,maxargs
  NodeList args, params
  integer fatal
  string message, param
  string funcname
  srcpos funcpos
  funcname=tree[TREE_TRUNK][CMD_TEXT]
  funcpos=tree[TREE_TRUNK][CMD_POS]
  args=tree[TREE_BRANCHES]
  argnum=length(args)
  --Lookup ID because name might be scoped
  at=find_in_column(id,list,PAIR_NUM)
  if equal(list[at][FUNC_ARGS],VAR_ARGS) then
    return(args)
  else
    maxargs=length(list[at][FUNC_ARGS])
  end if
  if maxargs < argnum then
    src_error(sprintf_utf(
      "%s "&COLYEL&"%s"&COLRED&" takes at most %d arguments but is being passed %d arguments"
      ,{KIND_LONGNAMES[kind],funcname,maxargs,argnum}
    ),args[maxargs+1][TREE_TRUNK][CMD_POS])
  elsif maxargs > argnum then
    --add defaults if not enough args are present
    if kind=KIND_MATH then
      --special processing for math
      if list[at][PAIR_NUM]<=15 or list[at][PAIR_NUM]>=19 then
        --math shouldnt have defaults
        src_error(sprintf_utf(
          "math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d"
          ,{funcname,argnum,maxargs}
        ),funcpos)
      else
        --variable assignment commands can have defaults
        if argnum = 0 then
          --no defaults for first argument of variable function
          src_error(sprintf_utf(
            "variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1"
            ,{funcname,argnum}
          ),funcpos)
        elsif argnum = 1 then
          --make defaults for second arg of variable function
          if list[at][PAIR_NUM]=16 then
            --setvariable
            args=append(args,{
               {"0",virtual_pos(funcpos)}
              ,{}
            })
          else
            --increment and decrement
            args=append(args,{
               {"1",virtual_pos(funcpos)}
              ,{}
            })
          end if
        end if
      end if
    else
      --normal processing for script and function
      for i=argnum+1 to maxargs do
        if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then
          src_error(sprintf_utf(
            "%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d"
            ,{KIND_LONGNAMES[kind],funcname,i}
          ),funcpos)
        end if
        args=append(args,{
           {sprintf("%d",{list[at][FUNC_ARGS][i]}),virtual_pos(funcpos)}
          ,{}
        })
      end for
    end if
  end if
  --Translate variables in the variable manipulation functions
  if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<=18 then
    args[1][TREE_TRUNK]=convert_variable_to_id(args[1][TREE_TRUNK],vars,funcname)
  end if

  --Obsolescence warnings
  --(This seems as good a spot to do this as any, because we've looked up the function in 'list' already)
  --Syntax: @obsolete([error,] [replacement_cmd,] ["message"])  (arg order can var)
  if and_bits(list[at][FUNC_FLAGS],FLAG_OBSOLETE)!=0 then
    params=attribute_params("@obsolete",list[at])
    message=""
    fatal=false
    for i=1 to length(params) do
      param=params[i][CMD_TEXT]
      if equal(param,"error") then
        fatal=true
      elsif param[1]='"' then
        message&=param[2..$-1]
      else
        message&=sprintf_utf("Use replacement function "&COLYEL&"%s"&COLRED&" instead. ",{param})
      end if
    end for
    if fatal then
      message=sprintf_utf("Function "&COLYEL&"%s"&COLRED&" is obsolete and can no longer be used. %s",{funcname,message})
      src_error(message,funcpos)
    else
      message=sprintf_utf("Function "&COLYEL&"%s"&COLRED&" is obsolete; avoid it. %s",{funcname,message})
      src_warn(message,funcpos)
    end if
  end if

  return(args)
end function

---------------------------------------------------------------------------

--Calls fix_arguments on each non-flow node with arguments
function normalize_arguments(NodeList tree, VarList vars)
  sequence kind_and_id
  integer kind
  atom id
  --Unlike the flow normalization and operator translation we do not insert/delete elements from
  --the current level, so we can use a "for" safely, and dont need a "while"
  for i=1 to length(tree) do
    kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars,false)
    kind=kind_and_id[1]
    id=kind_and_id[2]
    if kind=KIND_SCRIPT then
      tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,script_list,vars)
    elsif kind=KIND_FUNCTION then
      tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,function_list,vars)
    elsif kind=KIND_MATH then
      tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,math_list,vars)
    end if --number, flow, global, local, parens need no argchecking
    if length(tree[i][TREE_BRANCHES]) then
      --if there are sub-arguments, recurse
      tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars)
    end if
  end for
  return(tree)
end function

---------------------------------------------------------------------------

--returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order
function output_word(integer n)
  integer b1,b2
  b1=and_bits(n,#FF)
  b2=floor(and_bits(n,#FFFF)/256)
  return({b1,b2})
end function

---------------------------------------------------------------------------

function convert_to_bytes(sequence s)
  sequence result
  result={}
  for i=1 to length(s) do
    result&=int_to_bytes(s[i])
  end for
  return(result)
end function

---------------------------------------------------------------------------

--Encode a unicode (UTF32/native Euphoria) string in the format the engine
--requires, which is currently Latin-1
function encode_ohr(string string, integer maxbytes=0)
  for i=1 to length(string) do
    if string[i]>=256 then
      string[i]='?'
    end if
  end for
  if maxbytes and maxbytes<length(string) then
    string=string[1..maxbytes]
  end if
  return string
end function

---------------------------------------------------------------------------

--Returns byte sequence of "32bitstring"-encoded string:
--32 bit length followed one-char-per-byte padded to 4 byte multiple
function encode_32bitstring(string string)
  sequence ret
  integer len
  string=encode_ohr(string)
  len=length(string)
  ret=int_to_bytes(len)&string
  if remainder(len,4) then
    ret&=repeat(0,4-remainder(len,4))
  end if
  return ret
end function

---------------------------------------------------------------------------

--add a string to the table for this script, return its offset
function table_string(integer id)
  integer offset
  offset=length(string_table)/4
  string_table&=encode_32bitstring(string_list[id])
  return(offset)
end function

---------------------------------------------------------------------------

--Used to turn the local var list into a string table.
--vars must exclude nonlocals, and be in order.
function variable_names_to_table(VarList vars)
  string varname
  sequence result
  result={}
  for i=1 to length(vars) do
    --varname=vars[i][CMD_TEXT]
    varname=get_srcpos_text(vars[i][CMD_POS])
    result&=encode_32bitstring(varname)
  end for
  return(result)
end function

---------------------------------------------------------------------------

--returns the offset where this node was either added, or references an existing node
function binary_compile_recurse(Node tree, VarList vars)
  sequence result
  integer kind
  atom id
  integer numargs
  integer at
  string s
  sequence kind_and_id
  sequence value_temp
  string str_temp
  integer recurse_result
  integer can_reference  --whether can return a pointer to an earlier node
  integer nodeoffset
  srcpos cmdpos
  result={}
  can_reference=true
  nodeoffset=length(compiled_data)

  kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars,false)
  kind=kind_and_id[1]
  id=kind_and_id[2]
  s=tree[TREE_TRUNK][CMD_TEXT]
  cmdpos=tree[TREE_TRUNK][CMD_POS]

  if kind=KIND_REFERENCE then
    str_temp = s[2..length(s)]
    --is it a global variable?
    at=find(str_temp,global_list[PAIR_NAME])
    if at then
      --yes, it is a global, compile to global ID
      id=global_list[PAIR_NUM][at]
    else
      --is it a script?
      at=lookup_scoped_script_name(str_temp)
      if at then
        --Yes, it is a script. Compile to a script ID unless it's a reference to a subscript, as
        --I want to eventually make subscript handles closures (without which subscript handles can't work)
        if all_scripts[at][SCRIPT_PARENT_IDX] then
          src_error("References to subscripts haven't been implemented yet. Use a regular script instead if you need its ID.",cmdpos)
        end if
        id=all_scripts[at][SCRIPT_ID]
      else
        --is it a string literal?
        if length(str_temp)>7 and equal("$string",str_temp[1..7]) then
          value_temp=value(str_temp[8..length(str_temp)])
          id=table_string(value_temp[2])
        else
          src_error(sprintf_utf("Reference "&COLYEL&"@%s"&COLRED&" not understood: references must be the name of a global variable or a script",{str_temp}),cmdpos)
        end if
      end if
    end if
    kind=KIND_NUMBER
  end if

  result&=kind
  result&=id
  if kind=KIND_NUMBER then
    --No children and no debug info
  elsif kind=KIND_GLOBAL or kind=KIND_LOCAL or kind=KIND_NONLOCAL then
    --No children, debug info
    if debug_info then
      result&=cmdpos-script_position
    end if
  elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then
    --Use id from what_kind_and_id
    if kind=KIND_FUNCTION then
      if max_used_function<id then
        max_used_function=id
      end if
    end if
    numargs=length(tree[TREE_BRANCHES])
    if kind!=KIND_FLOW and numargs>MAX_ARGS then
      src_error(sprintf_utf("Number of arguments being passed (%d) is more than allowed (%d)",{numargs,MAX_ARGS}),cmdpos)
    end if
    result&=numargs
    --Add placeholders for each argoffset
    result&=repeat(0,numargs)

    if debug_info then
      result&=cmdpos-script_position
    end if

    if nodeoffset=0 then
      --Special case: Most nodes are added after their children, but the root
      --node needs to have offset 0, so append it already
      compiled_data&=result
    end if

    for i=1 to numargs do
      --actually evaluate each argument and set the real offsets
      recurse_result=binary_compile_recurse(tree[TREE_BRANCHES][i],vars)
      if recurse_result >= nodeoffset then
        can_reference=false
      end if
      result[3+i]=recurse_result
    end for

    if nodeoffset=0 then
      --Root node: replace the dummy argument offsets, and we're done.
      compiled_data[1..length(result)]=result
      return(nodeoffset)
    end if

    nodeoffset=length(compiled_data)

  else
    src_error(sprintf_utf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),cmdpos)
  end if

  --If we've have multiple copies of the exact same node (subtree), deduplicate them.
  --Don't bother with nodes with >4 children: unlikely to repeat.
  --The integer(result[2]) test here is to work around a bug in Euphoria (at least in 4.0.3 32-bit):
  --map:hash throws an error if given an int which doesn't fit in a 31-bit integer
  if not fast_mode and length(result)<=7 and integer(result[2]) then
    if can_reference then  --If false, this node must be unique, so map:get won't find anything.
      at=map:get(node_lookup,result,-1)
      if at>-1 then
        return at
      end if
    end if
    map:put(node_lookup,result,nodeoffset)
  end if

  --append data directly
  compiled_data&=result
  return(nodeoffset)
end function

---------------------------------------------------------------------------

function get_parent_id(Script script)
  integer parent_idx
  parent_idx=script[SCRIPT_PARENT_IDX]
  if parent_idx=0 then
    return(0)
  else
    return(all_scripts[parent_idx][SCRIPT_ID])
  end if
end function

---------------------------------------------------------------------------

function binary_compile(Script script)
  NodeList tree
  sequence result
  integer recurse_ret
  sequence localvars

  string_table={}
  compiled_data={}
  node_lookup=map:new()

  --binary data is in a mix of 16-bit and 32-bit signed words.
  --the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit.

  --output header (0-based offsets given here)
  --0..1: the zero-rooted byte-offset of the first executable code byte
  result=output_word(CODE_START_BYTE_OFFSET)
  --2..3: the number of local variables (including args but not nonlocals)
  result&=output_word(length(script[SCRIPT_VARIABLES]) - script[SCRIPT_NUM_NONLOCALS])
  --4..5: the number of arguments the script takes (also in SCRIPTS.TXT)
  result&=output_word(length(script[SCRIPT_ARG_TOKS]))
  --6..7: the format of the command data (presently used to indicate 32-bit encoding)
  result&=output_word(HSZ_FORMAT_VERSION)
  --8..11: a 32bit offset to the string literal table (in bytes), we don't know it yet
  result&=output_word(0)
  result&=output_word(0)
  --12..13: parent script ID or 0 if none
  result&=output_word(get_parent_id(script))
  --14..15: Nesting depth
  result&=output_word(script[SCRIPT_NEST_DEPTH])
  --16..17: number of nonlocals
  result&=output_word(script[SCRIPT_NUM_NONLOCALS])
  --18..21: the length of the string table in 32-bit words, also unknown
  result&=output_word(0)
  result&=output_word(0)
  --22..23: a bitfield detailing optional features.
  if debug_info then
    --First bit: non-leaf cmd nodes appended with srcpos
    result&=output_word(1)
  else
    result&=output_word(0)
  end if
  --24..27: the offset of the local variable name table, in 32-bit words
  --after the header, or 0 if not present. Currently unknown.
  result&=output_word(0)
  result&=output_word(0)
  --28..31: the srcpos offset of the start of the script
  integer script_start
  script_start=srcpos_offset(script[SCRIPT_TRIGGER_TOK][CMD_POS])
  result&=int_to_bytes(script_start)
  script_position=script_start*power(2,9) --aka encode_srcpos(0,script_start-1,0)

  --what follows is command data in the format [kindID,Value,argcount,argpointerlist]
  --numbers and variables have no argcount or argpointerlist
  --an argpointer is the zero-rooted word-offset of the argument relative
  --to the start of the executable commands. I realise that this format is
  --unnecessarily complicated. I had hoped to get benefits of being able to
  --store frequently reused commands only once and then just point to them,
  --but in actual practice, it isnt worth the trouble, since the only
  --commands that tend to be redundant are the really short ones.
  --the first command is always a "do". there can be only one top-level command
  tree=script[SCRIPT_AST]
  if length(tree)!=1 then
    simple_error(sprintf_utf("compiler bug! script tree has %s root nodes",{length(tree)}))
  end if
  reenter_timing_zone("binary_compile_recurse")
  recurse_ret=binary_compile_recurse(tree[1],script[SCRIPT_VARIABLES])
  if recurse_ret!=0 then
    simple_error(sprintf_utf("compiler bug! Root binary_compile_recurse call returned %d",{recurse_ret}))
  end if
  exit_timing_zone()
  result&=convert_to_bytes(compiled_data)
  compiled_data={}
  --append with a table of string literals and give its offset and length
  if length(string_table)>0 then
    if remainder((length(result)-CODE_START_BYTE_OFFSET),4) != 0 then
      simple_error("compiler bug! string table not aligned")
    end if
    if remainder(length(string_table),4) != 0 then
      simple_error("compiler bug! string table not padded")
    end if
    result[9..12]=int_to_bytes(length(result))
    result[19..22]=int_to_bytes(length(string_table)/4)
    result&=string_table
  end if
  if debug_info then
    --append local variable names table
    if remainder(length(result)-CODE_START_BYTE_OFFSET,4) != 0 then
      simple_error("compiler bug! varname table not aligned")
    end if
    localvars=filter_local_variables(script[SCRIPT_VARIABLES])
    if length(localvars) then
      result[25..28]=int_to_bytes((length(result)-CODE_START_BYTE_OFFSET)/4)
      --Could use seek_string_by_id instead of filter_local_variables
      result&=variable_names_to_table(localvars)
    end if
    --Otherwise remains 0
  end if
  return(result)
end function

---------------------------------------------------------------------------

--floaty brackets are un-needed after the operators have been translated.
function collapse_floaty_brackets(NodeList tree)
  integer i
  NodeList graft
  i=1
  while i<=length(tree) do
    if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then
      --found a floaty-bracket
      graft=tree[i][TREE_BRANCHES]
      if length(graft)!=1 then
        src_error(sprintf_utf("Found unexpected pair of brackets containing %d expressions. Either the brackets should not exist at all, or should only enclose a single expression",{length(graft)}),tree[i][TREE_TRUNK][CMD_POS])
      end if
      tree[i]=graft[1]
      --delete_element(tree,i)
      --tree=insert_sequence(tree,graft,i)
    else
      if length(tree[i][TREE_BRANCHES]) then
        tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES])
      end if
      i+=1
    end if
  end while
  return(tree)
end function

---------------------------------------------------------------------------

--parent is CMD_TEXT of parent node
function sanity_check(NodeList tree, VarList vars, string parent)
  string s
  sequence kind_and_id
  integer kind
  atom id
  srcpos pos
  for i=1 to length(tree) do
    s=tree[i][TREE_TRUNK][CMD_TEXT]
    pos=tree[i][TREE_TRUNK][CMD_POS]
    kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars,false)
    kind=kind_and_id[1]
    id=kind_and_id[2]
    if i=1 and equal("if",parent) then
      if kind=KIND_NUMBER then
        if id then
          src_warn(sprintf_utf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),pos)
        else
          src_warn("Condition is always false",pos)
        end if
      end if
    elsif block_command(parent) then
      if kind=KIND_NUMBER then
        src_warn(sprintf_utf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here."
                 ,{id}),pos)
      elsif kind=KIND_GLOBAL then
        src_warn(sprintf_utf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
                 ,{s}),pos)
      elsif kind=KIND_LOCAL or kind=KIND_NONLOCAL then
        src_warn(sprintf_utf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
                 ,{s}),pos)
      elsif kind=KIND_MATH and (id<=15 or id=19 or id>=22) then
        -- setvariable, increment, decrement, logand, logor have side effects
        src_warn(sprintf_utf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded"
                 ,{s}),pos)
      end if
    end if
    if kind=KIND_GLOBAL then
      map:put(used_globals,id,true)
    elsif kind=KIND_LOCAL then
      map:put(used_locals,id,true)
    end if
    if length(tree[i][TREE_BRANCHES]) then
      --if there are sub-arguments, recurse
      tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT])
    end if
  end for
  return(tree)
end function

---------------------------------------------------------------------------

--simulate a result overflowing a signed 32 bit register
function overflow_int32(atom val)
  --maybe check whether val is so large that this is could be inaccurate?
  atom ret
  ret=mod(val,#100000000)
  if ret>2147483647 then
    ret-=#100000000
  end if
  return(ret)
end function

---------------------------------------------------------------------------

function optimized_arg(Node tree, VarList vars)
  sequence kind_and_id
  integer kind
  atom id
  object arg1,arg2
  kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars,false)
  kind=kind_and_id[1]
  id=kind_and_id[2]
  if kind=KIND_NUMBER then
    return(id)
  elsif kind=KIND_MATH and (id<=15 or id>=19) then
    arg1=optimized_arg(tree[TREE_BRANCHES][1],vars)
    if equal(arg1,{}) then
      arg1=tree[TREE_BRANCHES][1]
    end if
    if length(tree[TREE_BRANCHES])>=2 then
      arg2=optimized_arg(tree[TREE_BRANCHES][2],vars)
    else
      arg2=0
    end if
    if equal(arg2,{}) then
      arg2=tree[TREE_BRANCHES][2]
    end if
    if atom(arg1) and atom(arg2) then
       if id=0 then
         --random
         if arg1=arg2 then
           return(arg1)
         end if
       elsif id=1 then
         --exponent
         if arg2>0 then
           --if too large, Euphoria will signal an overflow, but I'm also worried
           --about overflow_int32 giving inaccurate results
           if log(abs(arg1)+1)+log(abs(arg2)+1)<log(1e14) then
             return(overflow_int32(power(arg1,arg2)))
           end if
         elsif arg2=0 then
           --FB takes 0^0=1 (the most useful definition), Euphoria throws an error
           return(1)
         elsif arg2<0 then
           if arg1=0 then
             src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"0 ^ %d"&COLRED&". This is division by zero.",{arg2}),tree[TREE_TRUNK][CMD_POS])
           elsif arg1=1 or arg1=-1 then
             return(power(arg1,arg2))
           else
             return(0)
           end if
         end if
       elsif id=2 then
         --modulus
         if arg2=0 then
           src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"%d, mod, 0"&COLRED&". This is division by zero.",{arg1}),tree[TREE_TRUNK][CMD_POS])
         end if
         return(floor(remainder(arg1,arg2)))
       elsif id=3 then
         --divide
         if arg2=0 then
           src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"%d / 0"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS])
         end if
         --Euphoria doesn't have a truncating function?
         if arg1*arg2<0 then
           return(-floor(-arg1/arg2))
         else
           return(floor(arg1/arg2))
         end if
       elsif id=4 then
         --multiply
         return(overflow_int32(arg1*arg2))
       elsif id=5 then
         --subtract
         return(overflow_int32(arg1-arg2))
       elsif id=6 then
         --add
         return(overflow_int32(arg1+arg2))
       elsif id=7 then
         --xor
         return(overflow_int32(xor_bits(arg1,arg2)))
       elsif id=8 then
         --or
         return(overflow_int32(or_bits(arg1,arg2)))
       elsif id=9 then
         --and
         return(overflow_int32(and_bits(arg1,arg2)))
       elsif id=10 then
         --equal
         return(arg1=arg2)
       elsif id=11 then
         --notequal
         return(arg1!=arg2)
       elsif id=12 then
         --lessthan
         return(arg1<arg2)
       elsif id=13 then
         --greaterthan
         return(arg1>arg2)
       elsif id=14 then
         --lessthanorequalto
         return(arg1<=arg2)
       elsif id=15 then
         --greaterthanorequalto
         return(arg1>=arg2)
       elsif id=19 then
         --not
         return(arg1=0)
       elsif id=22 then
         --logxor
         return((arg1=0)!=(arg2=0))
       elsif id=23 then
         --abs
         return(overflow_int32(abs(arg1)))
       elsif id=24 then
         --sign
         return((arg1>0)-(arg1<0))
       elsif id=25 then
         --sqrt
         if arg1<0 then
           src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS])
         end if
         return(floor(sqrt(arg1)+0.5))
       end if
    end if

    --have to allow for side effects of shortcut evaluating operators by only checking first arg
    if atom(arg1) then
       if id=4 then
         --multiply
         if arg1=0 then
           return 0
         elsif arg1=1 then
           return arg2
         end if
       elsif id=6 then
         --add
         if arg1=0 then
           return arg2
         end if
       elsif id=20 then
         --logand
         if arg1=0 then
           return 0
         end if
       elsif id=21 then
         --logor
         if arg1!=0 then
           return 1
         end if
       end if
    end if

    if atom(arg2) then
       if id=4 then
         --multiply
         if arg2=0 then
           return 0
         elsif arg2=1 then
           return arg1
         end if
       elsif id=5 then
         --subtract
         if arg2=0 then
           return arg1
         end if
       elsif id=6 then
         --add
         if arg2=0 then
           return arg1
         end if
       end if
    end if
  end if
  return({})  --return nonatom
end function

---------------------------------------------------------------------------

--goes through a script simplifying expressions that always have the same value
function optimize_script(NodeList tree, VarList vars)
  object arg
  for i=1 to length(tree) do
    arg=optimized_arg(tree[i],vars)
    if atom(arg) then
      tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg)
      tree[i][TREE_BRANCHES]={}
    elsif sequence(arg) and length(arg) then
      tree[i]=arg
    end if
    if length(tree[i][TREE_BRANCHES]) then
      --if there are sub-arguments, recurse
      tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars)
    end if
  end for
  return(tree)
end function

---------------------------------------------------------------------------

--FIXME: if a local is used only in a subscript it is reported as unused
procedure warn_unused_locals(VarList vars)
  for i=1 to length(vars) do
    if vars[i][VAR_FRAME]=0 and not map:has(used_locals,vars[i][VAR_ID]) then
      src_warn(sprintf_utf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS])
    end if
  end for
end procedure

---------------------------------------------------------------------------

procedure warn_unused_globals()
  integer id
  for i=1 to length(global_list[PAIR_NUM]) do
    id=global_list[PAIR_NUM][i]
    if not map:has(used_globals,id) then
      src_warn(sprintf_utf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],id}),global_list[GLB_POS][i])
    end if
  end for
end procedure

---------------------------------------------------------------------------

--script_idx is all_scripts index
procedure compile_a_script(integer script_idx)
  Script script
  NodeList script_tree
  VarList local_vars
  TokenList args
  integer num_nonlocals
  integer parent_idx
  integer trigger_idx
  integer trigger

  cur_script_num=script_idx
  script=all_scripts[script_idx]
  trigger_idx=find_in_column(script[SCRIPT_TRIGGER_TOK][CMD_TEXT],trigger_list,PAIR_NAME)
  trigger=trigger_list[trigger_idx][PAIR_NUM]
  current_script=script[SCRIPT_FULL_NAME]
  parent_idx=script[SCRIPT_PARENT_IDX]

  reenter_timing_zone("gather_local_vars")
  if parent_idx then
    local_vars=shift_variables_frame(all_scripts[parent_idx][SCRIPT_VARIABLES])
  else
    local_vars={}
  end if
  num_nonlocals=length(local_vars)
  script&={
     trigger          --trigger type
    ,num_nonlocals
    ,local_vars
  }
  all_scripts[script_idx]=script

  --Get all locals (modifies all_scripts[cur_script_num])
  args=script[SCRIPT_ARG_TOKS] --The arguments are the first several local variables
  for i=1 to length(args) do
    --Checks for name validity
    add_local_variable(args[i])
  end for
  gather_local_vars(script[SCRIPT_BODY_TOKS])
  --script[SCRIPT_VARIABLES] is now outdated
  script=all_scripts[script_idx]
  local_vars=script[SCRIPT_VARIABLES]

  if length(local_vars)-num_nonlocals > MAX_LOCALS then
    simple_error(
      sprintf_utf("Script "&COLYEL&"%s"&COLRED&" has %d arguments and variables but at most %d are supported",
                  {script[SCRIPT_FULL_NAME],length(local_vars)-num_nonlocals,MAX_LOCALS})
    )
  end if
  exit_timing_zone()

  used_locals=map:new()
  reenter_timing_zone("build_ast")
  script_tree=build_ast(script[SCRIPT_BODY_TOKS],local_vars)
  exit_timing_zone()
  reenter_timing_zone("convert_operators")
  script_tree=convert_operators(script_tree)
  exit_timing_zone()
  reenter_timing_zone("expand_macros")
  script_tree=expand_macros(script_tree)
  exit_timing_zone()
  reenter_timing_zone("normalize_flow_control")
  script_tree=normalize_flow_control(script_tree,local_vars,"")
  exit_timing_zone()
  reenter_timing_zone("normalize_arguments")
  script_tree=normalize_arguments(script_tree,local_vars)
  exit_timing_zone()
  reenter_timing_zone("collapse_floaty_brackets")
  script_tree=collapse_floaty_brackets(script_tree)
  exit_timing_zone()
  if not fast_mode then
    reenter_timing_zone("optimize_script")
    script_tree=optimize_script(script_tree,local_vars)
    exit_timing_zone()
    reenter_timing_zone("sanity_check")
    script_tree=sanity_check(script_tree,local_vars,"")
    exit_timing_zone()
    if find('u',optlist) then
      warn_unused_locals(local_vars)
    end if
  end if

  script&={
     script_tree
    ,{}               --compiled data to go into the HSZ lumps
  }
  all_scripts[script_idx]=script
  reenter_timing_zone("binary_compile")
  all_scripts[script_idx][SCRIPT_BINARY]=binary_compile(script)
  exit_timing_zone()

  current_script=""
  cur_script_num=0
end procedure

---------------------------------------------------------------------------

--Parse argument declarations (initially containing default values)
--in a script argument list, returns {arg_names_list, default_vals_list} pair
--where any default values have been removed from the argument names
function process_arglist(TokenList args)
  TokenList arglist
  sequence defaults
  integer i
  integer using_defaults
  arglist={}
  defaults={}
  using_defaults=false
  i=1
  while i<=length(args) do
    if compare("=",args[i][CMD_TEXT])=0 then
      src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS])
    end if
    check_undefined_string(args[i],"argument name")
    if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then
      src_error(sprintf_utf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS])
    end if
    if i<length(args) and compare("=",args[i+1][CMD_TEXT])=0 then
      if i+2>length(args) then
        src_error(sprintf_utf("Syntax error: expected default value for script argument "&COLYEL&"%s"&COLRED,{args[i][CMD_TEXT]}),args[i+1][CMD_POS])
      end if
      arglist=append(arglist,args[i])
      defaults&=try_string_to_number({enforce_constants(args[i+2][CMD_TEXT]),args[i+2][CMD_POS]})
      using_defaults=true
      i+=3
    else
      if using_defaults=true then
        --any argument with a default must be followed with arguments with defaults
        src_error(sprintf_utf("Default value missing for argument "&COLYEL&"%s"&COLRED&": the previous argument had a default value, so all following ones must too",{args[i][CMD_TEXT]}),args[i][CMD_POS])
      end if
      arglist=append(arglist,args[i])
      defaults=append(defaults,NO_DEFAULT)
      i+=1
    end if
  end while
  return({arglist,defaults})
end function

---------------------------------------------------------------------------

--Check that every declared script has been defined, with the same number of arguments,
--and each script appears exactly once. Basically, compare script_list and all_scripts
--Also handle argument default values.
--Before this function, scripts only have IDs (stored in script_list) if they are declared in a definescript.
--Afterwards all scripts have IDs assigned (in all_scripts)
procedure check_script_declarations()
  integer at
  sequence args_and_defaults, args, defaults
  sequence scripts_found
  sequence src_position, attributes
  string fullname
  srcpos script_pos
  integer id, parent_id

  scripts_found=repeat(false,length(script_list))

  for i=1 to length(all_scripts) do
    fullname=all_scripts[i][SCRIPT_FULL_NAME]
    parent_id=get_parent_id(all_scripts[i])

    current_script=fullname  --global
    script_pos=all_scripts[i][SCRIPT_POS]
    attributes=all_scripts[i][SCRIPT_ATTRIBUTES]  --Attributes get copied to script_list, where they belong
    --at=find(fullname,column(script_list,PAIR_NAME))
    at=find_in_column(fullname,script_list,PAIR_NAME)

    args_and_defaults=process_arglist(all_scripts[i][SCRIPT_ARG_TOKS])
    args=args_and_defaults[1]
    defaults=args_and_defaults[2]

    if not at then
      --define script optional, add to script list
      id=pick_script_id(parent_id,all_scripts[i][PAIR_NAME])
      script_list=create_function(script_list,id,current_script,defaults,KIND_SCRIPT,script_pos,attributes)
      at=length(script_list)
      scripts_found&={true}
    else
      if scripts_found[at]=true then
        simple_error(sprintf_utf("Compiler bug; two scripts named %s but this error not handled properly",{current_script}))
      end if
      scripts_found[at]=true
      --The srcpos stored for this script will be from definescript. Repoint it at the actual script
      --(doesn't really matter, all_scripts would have ended up with that srcpos anyway)
      script_list[at][FUNC_POS]=script_pos
      script_list[at][FUNC_ATTRS]&=attributes

      if length(args) != length(script_list[at][FUNC_ARGS]) then
        src_error(
          sprintf_utf(
            "Script "&COLYEL&"%s"&COLRED&" has %d arguments named, but has %d arguments in its definescript declaration"
            ,{current_script,length(args),length(script_list[at][FUNC_ARGS])}
          )
          ,script_pos
        )
      end if
      for j=1 to length(script_list[at][FUNC_ARGS]) do
        if not equal(defaults[j],NO_DEFAULT) and not equal(defaults[j],script_list[at][FUNC_ARGS][j]) then
          src_error(
            sprintf_utf(
              "Default value for argument "&COLYEL&"%s"&COLRED&" does not match default given in script definition, %d"
              ,{all_scripts[i][SCRIPT_ARG_TOKS][j][CMD_TEXT],script_list[at][FUNC_ARGS][j]}
            )
            ,script_pos
          )
        end if
      end for
    end if
    if length(args)>MAX_ARGS then
      src_error(sprintf_utf("Number of arguments (%d) is more than allowed (%d)",{length(args),MAX_ARGS}),script_pos)
    end if

    all_scripts[i][SCRIPT_ARG_TOKS]=args
    all_scripts[i]&={script_list[at][PAIR_NUM]}  --append SCRIPT_ID
  end for
  current_script=""

  for i=1 to length(scripts_found) do
    if scripts_found[i]=false then
      src_error(
        sprintf_utf("Script "&COLYEL&"%s"&COLRED&" was declared using "&COLYEL&"define script"&COLRED&" but is missing.",{script_list[i][PAIR_NAME]})
        ,script_list[i][FUNC_POS]
      )
    end if
  end for
end procedure

---------------------------------------------------------------------------

procedure compile_each_script()
  sequence count
  enter_timing_zone("Compiling scripts")
  color_print("compiling scripts",{})
  count=repeat(0,length(file_list))

  enter_timing_zone("check_script_declarations")
  check_script_declarations()
  exit_timing_zone()

  for i=1 to length(all_scripts) do
   -- color_print("%s\n",{all_scripts[i][SCRIPT_NAME]})
    reenter_timing_zone("compile_a_script")
    compile_a_script(i)
    exit_timing_zone()
    count[srcpos_file_number(all_scripts[i][SCRIPT_POS])]+=1

    color_print(".",{})
  end for
  color_print("\n",{})
  if not fast_mode and find('u',optlist) then
    warn_unused_globals()
  end if
  for i=1 to length(count) do
    if count[i] then
      wrap_print("compiled %d scripts from "&COLBWHI&"%s"&COLWHI&"\n",{count[i],file_list[i][FILE_NAME]})
    end if
  end for
  exit_timing_zone()
end procedure

---------------------------------------------------------------------------

function generate_scripts_dot_txt()
  sequence result
  integer print_scripts
  result=""
  print_scripts = 0
  if find('a',optlist) then
    print_scripts = 2
  elsif find('s',optlist) then
    print_scripts = 1
  end if

  for i=1 to length(script_list) do
    result&=encode_ohr(sprintf_utf("%s\r\n%d\r\n%d\r\n",{script_list[i][PAIR_NAME],script_list[i][PAIR_NUM],length(script_list[i][FUNC_ARGS])}))
    for j=1 to length(script_list[i][FUNC_ARGS]) do
      if equal(script_list[i][FUNC_ARGS][j],NO_DEFAULT) then
        result&="0\r\n"
      else
        result&=sprintf("%d\r\n",{script_list[i][FUNC_ARGS][j]})
      end if
    end for
    if (print_scripts=1 and script_list[i][PAIR_NUM]<autonumber_id) or print_scripts=2 then
      wrap_print("%s\r\n",{script_list[i][PAIR_NAME]})
    end if
  end for
  return(result)
end function

---------------------------------------------------------------------------


function generate_scripts_dot_bin()
  sequence result
  sequence encoded_name
  --header size in bytes
  result=output_word(4)
  --record size in bytes
  result&=output_word(44)

  for i=1 to length(all_scripts) do
    result&=output_word(all_scripts[i][SCRIPT_ID])
    result&=output_word(all_scripts[i][SCRIPT_TRIGGER_ID])  --trigger type
    encoded_name=encode_ohr(all_scripts[i][SCRIPT_NAME],36)
    result&=output_word(length(encoded_name))
    result&=encoded_name
    result&=repeat(0,36-length(encoded_name))
    result&=output_word(get_parent_id(all_scripts[i]))
  end for
  return(result)
end function

---------------------------------------------------------------------------

--Returns a list of {id, {parent_id, encode_ohr(script_name)[1..36]}} pairs
function read_scripts_dot_bin(string filename)
  object data
  integer header_size
  integer rec_size
  integer offset
  sequence record, name
  integer id, parent_id
  integer name_len
  sequence ret

  data=read_file(filename)
  if equal(data,-1) then
    simple_error("Could not read "&filename)
  end if

  header_size=read_int16(data,0)
  rec_size=read_int16(data,1)
  if header_size!=4 or rec_size<42 then
    simple_error(filename&" in unrecognised format. Use the latest version of HSpeak")
  end if

  ret={}
  for i=0 to (length(data)-header_size) / rec_size - 1 do
    offset=header_size + i*rec_size
    record=data[offset+1..offset+rec_size]
    id=read_int16(record,0)
    name_len=read_int16(record,2)
    name=record[7..7+name_len-1]
    parent_id=0
    if rec_size>=44 then
      --Scripts compiled by older copies of hspeak didn't include the parent ID
      parent_id=read_int16(record,21)
    end if
    ret&={ {id, {parent_id,name}} }
  end for
  --pretty_print(stdout,ret,{3})
  return ret
end function

---------------------------------------------------------------------------

function generate_commands_dot_bin()
  sequence result
  sequence encoded_names
  integer i, id, name_num
  integer offset
  integer records
  function_list=sort(function_list)
  if length(function_list) then
    records=function_list[$][PAIR_NUM]+1
  else
    records=0
  end if
  if records>2048 then
    simple_warn(sprintf_utf("Maximum function id was %d, but will not export function names above id 2047",{records-1}))
    records=2048
  end if
  --header size in bytes (offset to location table)
  result=output_word(6)
  --file format version
  result&=output_word(0)
  --number of records in location table
  result&=output_word(records)

  offset=length(result)+records*2
  id=0
  i=1
  encoded_names={}
  while i<=length(function_list) do
    if function_list[i][PAIR_NUM]<id then
      --skip aliases
      i+=1
    elsif function_list[i][PAIR_NUM]=id then
      result&=output_word(offset)
      encoded_names&={encode_ohr(function_list[i][PAIR_NAME])}
      offset+=4+length(encoded_names[$])
      i+=1
      id+=1
    else
      --no function with this id
      result&=output_word(0)
      id+=1
    end if
  end while
  i=1
  name_num=1
  id=-1
  while i<=length(function_list) and function_list[i][PAIR_NUM]<records do
    if function_list[i][PAIR_NUM]=id then
      --skip aliases
      i+=1
      continue
    end if
    if equal(function_list[i][FUNC_ARGS],VAR_ARGS) then
      result&=output_word(-1)
    else
      result&=output_word(length(function_list[i][FUNC_ARGS]))
    end if
    result&=output_word(length(encoded_names[name_num]))
    result&=encoded_names[name_num]
    name_num+=1
    id=function_list[i][PAIR_NUM]
    i+=1
  end while
  return(result)
end function

---------------------------------------------------------------------------

procedure write_script_files(integer fh)
  string fname
  string temp, ext
  --because script files with identical names in different directories can be
  --included, append a number to lump names which would be duplicate
  integer suffix
  sequence names_used
  names_used={}
  for i=1 to length(file_list) do
    fname=file_only(file_list[i][FILE_NAME])
    ext=extension_only(fname)
    --Lump names can be at most 50 characters, so trim if longer
    if length(fname)>47 then
      temp=without_extension(fname)
      fname=temp[1..46-length(ext)]&"."&ext
    end if
    suffix=count(fname,names_used)
    names_used=append(names_used,fname)
    if suffix>0 then
      temp=without_extension(fname)  --you can't slice a function result directly! :(
      fname=sprintf_utf("%s~%d.%s",{temp[1..$-1],suffix,ext})
    end if
    file_list[i][FILE_LUMP]=fname
    --color_print("writing %d %s\n",{i,fname})
    if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then
      simple_error("unable to write a script source lump")
    end if
  end for
end procedure

---------------------------------------------------------------------------

function generate_srcfiles_dot_txt()
  sequence result
  result=""
  for i=1 to length(file_list) do
    result&=sprintf_utf("file=%s\nlump=%s\noffset=%d\nlength=%d\n",
      {
        absolutize_path(file_list[i][FILE_NAME]),
        file_list[i][FILE_LUMP],
        file_list[i][FILE_OFFSET],
        length(file_list[i][FILE_TEXT])
      })
  end for
  return(result)
end function

---------------------------------------------------------------------------

procedure write_output_file()
  sequence header
  integer fh
  object lh
  enter_timing_zone("Writing output file")
  header=sprintf("HamsterSpeak\n%s\n%d\n%s\n%d\n%d\n",{COMPILER_VERSION & COMPILER_SUB_VERSION,HSP_FORMAT_VERSION,plotscr_version,HSZ_FORMAT_VERSION,max_used_function})
  if length(all_scripts) then
    fh=open(dest_file,"wb")
    dest_file_fh=fh
    if fh!=-1 then
      wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
      --write header and version
      if write_lump(fh,"HS",header)=false then
        simple_error("unable to write header")
      end if
      --write script index (old file)
      if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then
        simple_error("unable to write script index")
      end if
      --write script index (new file)
      if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then
        simple_error("unable to write binary script index")
      end if
      --write script commands list
      if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then
        simple_error("unable to write commands listing")
      end if
      --write each script
      for i=1 to length(all_scripts) do
        if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][SCRIPT_ID]}),all_scripts[i][SCRIPT_BINARY])=false then
          simple_error(sprintf_utf("internal problem: unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][SCRIPT_NAME]}))
        end if
      end for
      if debug_info then
        --write copy of scripts
        wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
        lh=begin_lump(fh,"source.lumped")
        if equal(lh,false) then
          simple_error("unable to write scripts source file")
        end if
        write_script_files(fh)
        if end_lump(lh)=false then
          simple_error("unable to finish write of scripts source file")
        end if
        --write source file table (must occur after write_script_files)
        if write_lump(fh,"srcfiles.txt",generate_srcfiles_dot_txt())=false then
          simple_error("unable to write source files listing")
        end if
      end if
      close(fh)
    else
      simple_error(sprintf_utf("attempt to open "&COLYEL&"%s"&COLRED&" failed",{dest_file}))
    end if
  else
    color_print("no scripts to output\n",{})
  end if
  exit_timing_zone()
end procedure

---------------------------------------------------------------------------

init()
load_and_lex_all()
show_source_info()
preliminary_parse_pass()
parse_top_level()
compile_each_script()
dump_debug_report()
write_output_file()

run_time=time()-start_time
color_print("done (%g seconds)\n",{run_time})
if find('t',optlist) then
  print_timing_data()
end if
opt_wait_for_key(was_warnings)
if was_warnings = true then
  abort(2)
end if