-- HamsterSpeak Compiler
--(C) Copyright 2001-2017 James Paige and Hamster Republic Productions
-- Please read LICENSE.txt for GPL License details and disclaimer of liability
-- This is a compiler for HamsterSpeak scripts used for plotscripting in
-- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com
-- This code is written in Euphoria 4.0. You can get the Free Open-Source
-- Euphoria compiler from http://OpenEuphoria.com . I also highly recommend
-- David Cuny's EE editor
---------------------------------------------------------------------------
--Changelog
--3Ta 2017-07-28 Increased MAXGLOBAL to 50000
-- 2017-06-13 0x, 0o and 0b number prefixes
--3T 2017-06-11 Allow subscript vars/args to shadow outer locals
--3Se 2017-05-17 Disallow extra commas
--3Sd 2017-03-17 --incdir option; check ../share/games/ohrrpgce/ instead
--3Sc 2016-04-11 Check /usr/share/games/ohrrpgce/ for include files
--3Sb 2014-11-11 Better 'include' line lexing & garbage detection
--3Sa 2014-08-23 Fix printing of a lexer error message
--3S 2014-07-22 Automatically include plotscr.hsd and ignore
-- double inclusion; added --include and -b
--3Rg 2013-02-23 Fix a subscript related crash
--3Rf 2013-02-17 Fix several 31-bit int overflows
--3Re 2014-02-17 Trivial change to math builtin warnings
----- 2013-11-09 Prevent misuse of betweenable-operators in
-- declarations, and allow constants for global ids
--3Rc 2013-10-11 subscript implementation changes (add nonlocal kind)
--3Rb 2013-10-10 Remove variable() later, for syntax checking
--3Ra 2013-10-09 Fixed \r lineendings (AGAIN) and slow \r\n handling
--3R 2013-10-05 Add subscript
--3Qc 2013-10-04 Disallow flow statements used as expressions
--3Qb 2013-07-24 Require begin after then, do, etc. (else excluded)
--3Qa 2013-04-11 Warn when plotscr.hsd not included
----- 2013-02-28 Allow non-constant expressions in case blocks again
--3Pe 2013-01-03 Faster compile times
--3Pd 2012-10-24 Fix \x escape code
--3Pc 2012-10-09 Change srcpos encoding to remove source size limits
--3Pb 2012-10-08 \x escape code
--3Pa 2012-08-24 --reuse-ids option
--3P 2012-08-18 Change way include is lexed: quotes no longer needed
-- [], {}, . now totally disallowed
-- Add < and > operators
-- Add plotscrversion block and new HS header lump
-- New switch syntax: do after case optional, case(else)
-- Added elseif
--3Of 2012-08-18 Lexer bugfix
--3Oe 2012-04-11 Add \n and \t escape codes
--3Od 2012-03-03 Change directory search order slightly
--3Oc 2012-02-07 Increased MAXGLOBAL to 16383
--3Ob 2012-02-07 Make the max script global number set by a constant
--3Oa 2012-01-20 Fix lexer allowing newlines in strings
--3O 2012-01-19 Unicode support
--3Nd 2011-10-16 Fix commands.bin generation bug
--3Nc 2011-08-28 Additional expression optimisations
--3Nb 2011-08-28 Fix overflow_int32 and bitwise operator overflow
--3Na 2011-07-25 Add -x option for Hamster Whisper's benefit
--3N 2011-07-12 Add assert
--3Ma 2010-10-21 Correctly strip \r to fix newlines ... again.
--3M 2010-05-02 abs, sign, sqrt; fix error reporting
--3L 2010-04-29 tracevalue support, involving a few internal changes
--3Ke 2010-04-18 Add -t commandline option
--3Kd 2010-04-14 Quite a few bugfixes in optimizer
--3Kc 2010-04-14 Start checking well-formedness of numbers with lexer
--3Kb 2010-04-12 Whole lot of bugfixes in script declarations and
-- checking for unreserved identifier names
--3Ka 2010-04-12 Throw error if source file contains binary
--3K 2010-04-12 Error column reporting (introducing srcpos); lump
-- original source files into source.lumped
--3Ja 2010-04-07 Wrote a (nearly) proper lexer
--3J 2009-12-05 Multiple function names may now alias the same id
--3I 2009-07-27 Export a backup copy of full source to source.txt lump
-- disable with -n option
--3H 2009-06-18 Export function names to commands.bin lump
--3Gk 2009-05-25 Fix short references crash
--3Gj 2009-01-27 Show future-compatability warning when using square
-- brackets [] and curly brackets {} in names
--3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake
--3Gh 2008-07-14 Disable warnings for unused variables by default, and
-- Add -u option to enable warnings for unused variables.
--3Gg 2008-03-01 Raise global variable limits
--3Gf 2008-02-11 Give user another chance on missing Y key :P
--3Ge 2007-07-01 String conversion rewrite to allow more whitespace
--3Gd 2007-07-01 Allow constants for new syntax default args
--3Gc 2007-05-29 Handle empty paratheses correctly, and use
-- lowercase ".hs" extension
--3Gb 2007-05-09 Throw error on missing include file
--3Ga 2007-05-09 String length not limited to 40
--3G 2007-02-09 Offset to string literal tables given as a 32-bit
-- int, script format version++ to 2.
--3F 2006-12-11 Support for variable number of arguments to functions
-- by giving -1 as the number of args in the define block
--3Ec 2006-11-30 while(true) no longer produces a warning
--3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes
--3Ea 2006-09-03 Actually export triggers in scripts.bin
--3E 2006-08-30 "define trigger" support
--3D 2006-08-16 String literal lookup tables added to end of
-- HSZ lumps replacing setstring + appendstring
--3C 2006-08-15 New syntax for giving default script arguments,
-- and for leaving out definescript
--3B 2006-08-13 HSX lumps renamed to HSZ
--3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format
-- version added to HSX header. Current is 1
--2Na 2006-07-07 Minor update to add new logical operators
--2N 2006-07-07 Added not() logic function
--2M 2006-07-06 Set exit code on warnings
--2L 2006-05-13 Added switch statement (+ case keyword)
--2K 2006-05-01 Added @scriptname and @globalvariable syntax to
-- return script or global ID number at compile-time
-- not run-time (for use with "run script by ID" and
-- "read global" and "write global")
--2J 2006-04-10 Added break, continue, exitscript, exitreturning
-- flow statements. Also fixed some return bugs
--2I 2006-04-04 Extended HSX header to include number of arguments
-- to a script, to really fix arguments-overflow-into
-- -locals bug
--2H 2006-03-29 Display better help and wait for keypress when run
-- by double-clicking the icon. Added -k command line
-- option to skip waiting for keypress
--2G 2005-10-03 Additional operators $+ and $=
-- Mention GPL in help text
--2F 2005-07-24 Strings implemented:
-- $id="..." -> setstring
-- $id+"..." -> appendstring
--2E 2005-02-15 Changed license to GPL
-- Added += and -= operators thanks to a patch from
-- The Mad Cacti and Fyrewulff
--2D 2002-08-03 Only a small change, strip out \r from lines of
-- the script as we read them to avoid errors related
-- to busted newlines
--2C 2002-03-05 Fixed some bugs that could cause crashes when
-- non-printable characters exist in the input file.
-- (naturally the script will not compile right, but
-- at least it will not crash)
--2B 2001-06-06 Added := as a commaless separater so it can be
-- defined as an operator
--2A 2001-05-04 Fixed -w command line option when used with -z
--2 First Release
---------------------------------------------------------------------------
without warning --to avoid annoying warnings
without type_check --for a small speed boost
--with profile_time --time profiling
include hsspiffy.e --various routines, sequence manipulation - James Paige
include graphics.e --standard library, needed for color output
include machine.e --needed for int_to_bytes
include std/math.e --needed for mod
include euphoria/unicode.e
include euphoria/ucstypes.e
include euphoria/io_with_unicode.e
---------------------------------------------------------------------------
--constants--
constant false=0
constant true=1
-- increment COMPILER_VERSION only for major language-altering changes
constant COMPILER_VERSION="3"
-- For small changes increment the second letter of COMPILER_SUB_VERSION
-- For large changes increment the first letter and reset the second letter
--*Please make sure this is 2 chararacters long! Append a space if required.*
constant COMPILER_SUB_VERSION="Ta"
constant COPYRIGHT_DATE="2017"
--This is the oldest version of plotscr.hsd which is supported (feel free to
--increment this whenever using an older plotscr.hsd means you're missing out
--on new but nonessential features; there's no harm)
--*Should be 3 characters long, same captialisation as COMPILER_SUB_VERSION*
constant MIN_PLOTSCR_VERSION="3P "
--these constants are color-flags. May add more colors, up to #F8FF
--(these codes are at the end of the first private use area)
constant COL_FIRST=#F8E0
constant COLYEL=#F8E0
constant COLRED=#F8E1
constant COLPNK=#F8E2
constant COLWHI=#F8E3
constant COLBWHI=#F8E4
constant COL_LAST=#F8FF
--Euphoria's color constants vary depending on the platform, so we avoid them
constant HS_TO_EU_COLORS={YELLOW, RED, BRIGHT_RED, WHITE, BRIGHT_WHITE}
--These are character classes used in lexer_table
constant LEX_CHAR=0 --Most character which are valid in identifiers. Everything above char 255
constant LEX_SPACE=1 --space and tab
constant LEX_BEGIN=2 --(
constant LEX_END=3 --)
constant LEX_OPERATOR=4 --The first character of an operator
constant LEX_COMMENT=5 --#
constant LEX_STRING=6 --"
constant LEX_COMMA=7 --,
constant LEX_WARN=8 --[]{}.
constant LEX_BINARY=9 --Nonprintable characters
constant LEX_DIGIT=10 --0-9
constant LEX_HEXDIGIT=11 --a-fA-F
constant LEX_OPERATOR2=12 --Only used in lexer_table_key2!!
constant LEX_NEWLINE=13 --\n
constant FILE_NAME=1
constant FILE_TEXT=2
constant FILE_LINE_EXTENTS=3
constant POS_FILENAME=1
constant POS_LINE=2
constant POS_COLUMN=3
constant POS_TEXT=4
constant CMD_TEXT=1
constant CMD_POS=2
constant RESERVE_TOPLEVEL=1 --Tokens that are only allowed at the toplevel, but not including script triggers
constant RESERVE_UNIMPLEMENTED=2
constant RESERVE_FLOW=3 --Flow control keywords
constant RESERVE_FUNCTION=4 --Name of a defined function
constant RESERVE_SCRIPT=5 --etc
constant RESERVE_GLOBAL=6
constant RESERVE_BEGIN=7
constant RESERVE_END=8
constant RESERVE_OPERATOR=9
constant RESERVE_BUILTIN=10
constant RESERVE_CONSTANT=11
constant RESERVE_MACRO=12
constant RESERVE_KEYWORD=13 --Miscellaneous keywords which can only occur inside scripts
constant RESERVE_NAMES={"declaration"
,"unimplemented keyword/operator"
,"flow control"
,"hardcoded function"
,"user script"
,"global variable"
,"bracket "&COLYEL&"("&COLRED
,"bracket "&COLYEL&")"&COLRED
,"operator"
,"builtin command"
,"constant"
,"hardcoded function/macro"
,"keyword"
}
constant PAIR_NUM=1
constant PAIR_NAME=2
constant PAIR_START=1
constant PAIR_END=2
constant OPER_TRUENAME=3
constant OPER_POS=4
constant FUNC_ARGS=3 --Either list of default values (each an integer or NO_DEFAULT), or VAR_ARGS
constant FUNC_POS=4 --srcpos of start of function/script
constant GLB_POS=3 --srcpos of globalvariable definition
constant CONST_VALUE=1
constant CONST_POS=2
constant VAR_FRAME=3 --VarToken index, integer giving frame number of a variable (0 is current script, 1 is parent...)
constant VAR_ID=4 --VarToken index, integer giving ID in frame
constant NO_DEFAULT={} --when appears in script_list[FUNC_ARGS], indicates arg with no default value
constant VAR_ARGS=0 --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args
constant KIND_NUMBER=1
constant KIND_FLOW=2
constant KIND_GLOBAL=3
constant KIND_LOCAL=4
constant KIND_MATH=5
constant KIND_FUNCTION=6
constant KIND_SCRIPT=7
constant KIND_NONLOCAL=8
constant KIND_REFERENCE=9 --converted to KIND_NUMBER in compiled script
constant KIND_OPERATOR=10 --never appears in compiled script
constant KIND_PARENS=11 --never appears in compiled script
constant KIND_MACRO=12 --never appears in compiled script
constant KIND_KEYWORD=13 --never appears in compiled script
constant KIND_LONGNAMES={"number"
,"flow control statement"
,"global variable"
,"local variable"
,"built-in function"
,"hard-coded function"
,"script"
,"non-local variable"
,"reference"
,"untranslated operator"
,"order-of-operations-enforcing parenthesis"
,"hard-coded function/macro"
}
constant TREE_TRUNK=1
constant TREE_BRANCHES=2
constant TIMING_DEPTH=1
constant TIMING_DESCRIPTION=2
constant TIMING_TIME=3
constant TIMING_ACTIVE=4
--Version number for the .hs/.hsp format (not including HSZ files!)
constant HSP_FORMAT_VERSION=1
constant CODE_START_BYTE_OFFSET=18
constant HSZ_FORMAT_VERSION=3
constant MAXGLOBAL=50000 --largest ID
constant MAX_LOCALS=100 --max number of locals per script (not including nonlocals)
constant MAX_NEST_DEPTH=4
---------------------------------------------------------------------------
--globals-- --initializations--
string compiler_dir compiler_dir=""
string include_dir include_dir=""
string source_file source_file=""
string dest_file dest_file=""
sequence optlist optlist={}
sequence file_list file_list={} --type {FILE_NAME,FILE_TEXT,FILE_LINE_EXTENTS}
sequence additional_includes
additional_includes={}
integer max_srcpos max_srcpos=0
integer total_lines total_lines=0
sequence cmd cmd={} --TokenList (but type checking is very slow)
sequence reuse_ids reuse_ids={}
sequence constant_list constant_list=alpha_tree_create()
sequence trigger_list trigger_list={} --type {PAIR_NUM, PAIR_NAME}
sequence operator_list operator_list={} --type {PAIR_NUM, PAIR_NAME, OPER_TRUENAME, OPER_POS}
sequence function_list function_list={} --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS}
sequence global_list global_list={{},{},{}} --NOTE: three lists {PAIR_NUM, PAIR_NAME, GLB_POS}, not a list of globals
sequence string_list string_list={} --list of strings
sequence script_list script_list={} --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS} (PAIR_NAME is the full name)
constant SCRIPT_TRIGGER_TOK=1 --Trigger token (type {CMD_TEXT, CMD_POS}), e.g. "script"
constant SCRIPT_NAME=2 --String, not fully-scoped
constant SCRIPT_POS=3 --(scrpos of script name)
constant SCRIPT_FULL_NAME=4 --Full-scoped script name
constant SCRIPT_ARG_TOKS=5 --List of argument name tokens (type {CMD_TEXT, CMD_POS})
--Initially the tokens may include default values (like "a=4"), which are later removed
constant SCRIPT_BODY_TOKS=6 --List of tokens (type {CMD_TEXT, CMD_POS}) in the body, of the form do,begin,...,end
constant SCRIPT_PARENT_IDX=7 --index in all_scripts, or 0 for none
constant SCRIPT_NEST_DEPTH=8 --Nesting depth. A script is 0, subscript 1, sub-subscript 2...
constant SCRIPT_ID=9 --Script ID. Value not appended until all IDs are assigned in check_script_declarations
constant SCRIPT_TRIGGER_ID=10 --trigger ID. SCRIPT_TRIGGER_ID and later not appended until compile_a_script
constant SCRIPT_NUM_NONLOCALS=11
constant SCRIPT_AST=12
constant SCRIPT_VARIABLES=13 --VarList of locals and nonlocals
constant SCRIPT_BINARY=14
sequence all_scripts all_scripts={} --list of Scripts == {SCRIPT_*} (variable length)
sequence reserved reserved=alpha_tree_create()
atom start_time start_time=time()
atom run_time run_time=0
sequence timing_data timing_data={}
integer timing_depth timing_depth=-1
integer get_cmd_pointer get_cmd_pointer=0
integer autonumber_id autonumber_id=32767
sequence flow_list flow_list={
{0,"do"}
,{1,"begin"}
,{2,"end"}
,{3,"return"}
,{4,"if"}
,{5,"then"}
,{6,"else"}
,{7,"for"}
,{10,"while"}
,{11,"break"}
,{12,"continue"}
,{13,"exitscript"}
,{14,"exitreturning"}
,{15,"switch"}
,{16,"case"} --never appears in compiled script
,{17,"subscript"} --never appears in compiled script
}
--elseif is not a flow type
sequence flow_requiring_brackets --else is excluded, because of case(else)
flow_requiring_brackets={
"do",
"if",
"then",
"elseif",
"for",
"while",
"exitreturning",
"switch",
"case"
}
sequence math_list math_list={ --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS} (FUNC_POS omitted)
{0,"random",{0,1}}
,{1,"exponent",{0,2}}
,{2,"modulus",{0,1}}
,{3,"divide",{0,1}}
,{4,"multiply",{0,0}}
,{5,"subtract",{0,0}}
,{6,"add",{0,0}}
,{7,"xor",{0,0}}
,{8,"or",{0,0}}
,{9,"and",{0,0}}
,{10,"equal",{0,0}}
,{11,"notequal",{0,0}}
,{12,"lessthan",{0,0}}
,{13,"greaterthan",{0,0}}
,{14,"lessthanorequalto",{0,0}}
,{15,"greaterthanorequalto",{0,0}}
,{16,"setvariable",{0,0}}
,{17,"increment",{0,1}}
,{18,"decrement",{0,1}}
,{19,"not",{0}}
,{20,"logand",{0,0}}
,{21,"logor",{0,0}}
,{22,"logxor",{0,0}}
,{23,"abs",{0}}
,{24,"sign",{0}}
,{25,"sqrt",{0}}
}
--update is_simple_math_op on adding new operators
sequence separator_list separator_list={
"+=","-=","$+","$=","$","+","--","/","*","^^","^"
,"==","<>",">>","<<","<=",">=",">","<",":=","=","&&","||"
}
sequence lexer_table lexer_table={} --maps from ASCII value+1 (note +1!) to LEX_* constant
sequence lexer_table_key2 lexer_table_key2={}
string hex_chars hex_chars="0123456789abcedfABCDEF"
sequence hex_char_values hex_char_values={0,1,2,3,4,5,6,7,8,9,#a,#b,#c,#e,#d,#f,#A,#B,#C,#D,#E,#F}
sequence compiled_data compiled_data={} --used inside binary_compile only; compiled data for the current script
string current_script current_script="" --name of current script
sequence current_scopes current_scopes={""} --list of possible prefixes to add to (sub)script names, eg "master<-"
integer cur_script_num cur_script_num=0 --index into all_scripts of current script
integer colors_enabled colors_enabled=true
integer simple_colorcodes simple_colorcodes=false
integer error_file error_file=false
sequence used_globals used_globals={} --list of strings
sequence used_locals used_locals={} --list of strings
sequence string_table string_table={} --binary data
string plotscr_version plotscr_version=""
integer max_used_function max_used_function=0 --maximum id of a function used in a script
integer fast_mode fast_mode=false
integer end_anchor_kludge end_anchor_kludge=false
integer was_warnings was_warnings=false
---------------------------------------------------------------------------
--types--
--A position in the script source. An integer, encoding an index in file_list
--and a character number that file (where newlines are one character).
--See encode_srcpos and decode_srcpos
type srcpos(object pos)
--need to allow 0 as a dummy value
if atom(pos) and pos=floor(pos) and pos>=0 and pos<=max_srcpos then
return true
else
return false
end if
end type
--Not the same as Euphoria's string type, which is limited to extended ASCII
type string(object str)
if not sequence(str) then
return false
end if
--Too slow
-- for i=1 to length(str) do
-- if not integer(str[i]) or str[i]<0 then
-- return false
-- end if
-- end for
return true
end type
--A CMD_TEXT,CMD_POS pair
type Token(object tok)
if sequence(tok) and length(tok)=2 and string(tok[1]) and srcpos(tok[2]) then
return true
else
return false
end if
end type
--A CMD_TEXT,CMD_POS,VAR_FRAME,VAR_ID tuple describing a local/nonlocal variable
type VarToken(object var)
if length(var)=4 and string(var[1]) and srcpos(var[2]) and integer(var[3]) and integer(var[4]) then
return true
end if
return false
end type
--sequence of Tokens
type TokenList(object list)
if not sequence(list) then
return false
end if
for i=1 to length(list) do
if not Token(list[i]) then
return false
end if
end for
return true
end type
--sequence of VarTokens
type VarList(object vars)
if not sequence(vars) then
return false
end if
for i=1 to length(vars) do
if not VarToken(vars[i]) then
return false
end if
end for
return true
end type
--{Token,children}
type Node(object node)
if sequence(node) and length(node)=2 and Token(node[1]) and sequence(node[2]) then
for i=1 to length(node[2]) do
if not Node(node[2][i]) then
return false
end if
end for
return true
else
return false
end if
end type
--sequence of Nodes
type NodeList(object list)
if not sequence(list) then
return false
end if
for i=1 to length(list) do
if not Node(list[i]) then
return false
end if
end for
return true
end type
--An element of all_scripts
type Script(object script)
if sequence(script) and length(script)>=SCRIPT_NEST_DEPTH and length(script)<=SCRIPT_BINARY then
return true
else
return false
end if
end type
---------------------------------------------------------------------------
--time spent waiting for a user-keypress shouldnt count
function timeless_wait_key()
atom skip_time
integer key
skip_time=time()
key=wait_key()
skip_time=time()-skip_time
start_time+=skip_time
for i=1 to length(timing_data) do
if timing_data[i][TIMING_ACTIVE] then
timing_data[i][TIMING_TIME]+=skip_time
end if
end for
return(key)
end function
---------------------------------------------------------------------------
--Euphoria's built-in sprintf truncates all string elements to 8 bits,
--so this is an Unicode-enabled replacement. Supports %d, %s, %g, %%
--Beware! This will be way slower than sprintf, but luckily it's not
--needed anywhere where speed matters.
function sprintf_utf(string s, sequence printf_args)
integer seg_start, seg_end
integer arg_ctr
integer code
string ret
if length(printf_args)=0 then
return s
end if
ret=""
arg_ctr=1
seg_start=1
seg_end=find('%',s)
while seg_end do
ret&=s[seg_start..seg_end-1]
code=s[seg_end+1]
if code='s' then
ret&=printf_args[arg_ctr]
arg_ctr+=1
elsif code='d' or code='g' then
ret&=sprintf({'%',code},printf_args[arg_ctr])
arg_ctr+=1
elsif code='%' then
ret&='%'
else
simple_error(sprintf("sprintf_utf: unrecognised format code %%%s\n",{s[seg_end+1]}))
end if
seg_start=seg_end+2
seg_end=find('%',s,seg_start)
end while
if seg_start<=length(s) then
ret&=s[seg_start..$]
end if
if arg_ctr!=length(printf_args)+1 then
simple_error(sprintf("sprintf_utf: recieved %d format arguments, only used %d\n",{length(printf_args),arg_ctr-1}))
end if
return ret
end function
---------------------------------------------------------------------------
--fprintf as UTF8
procedure print_utf(object fh, string s, sequence printf_args)
if length(printf_args) then
s=sprintf_utf(s,printf_args)
end if
s=toUTF(s,utf_32,utf_8)
puts(fh,s)
end procedure
---------------------------------------------------------------------------
--prints a string with printf to stdout converting color codes
procedure color_print(string s, sequence printf_args)
string buffer
s=sprintf_utf(s,printf_args)
if simple_colorcodes then
buffer=s
else
buffer=""
for i=1 to length(s) do
if s[i]>=COL_FIRST and s[i]<=COL_LAST then
print_utf(stdout,buffer,{})
buffer=""
if colors_enabled then
text_color(HS_TO_EU_COLORS[s[i]-COL_FIRST+1])
end if
else
buffer&=s[i]
end if
end for
end if
if length(buffer) then
print_utf(stdout,buffer,{})
end if
end procedure
---------------------------------------------------------------------------
procedure opt_wait_for_key()
integer key
if not find('k',optlist) then
color_print("[Press Any Key]\n",{})
key=timeless_wait_key()
end if
end procedure
---------------------------------------------------------------------------
procedure enter_timing_zone(string description)
if not find('t',optlist) then
return
end if
timing_depth+=1
timing_data=append(timing_data,{timing_depth,description,-time(),true})
end procedure
---------------------------------------------------------------------------
procedure reenter_timing_zone(string description)
if not find('t',optlist) then
return
end if
for i=1 to length(timing_data) do
if equal(timing_data[i][TIMING_DESCRIPTION],description) then
timing_depth+=1
timing_data[i][TIMING_DEPTH]=timing_depth --could change...
timing_data[i][TIMING_TIME]-=time()
timing_data[i][TIMING_ACTIVE]=true
return
end if
end for
enter_timing_zone(description)
end procedure
---------------------------------------------------------------------------
procedure exit_timing_zone()
if not find('t',optlist) then
return
end if
for i=length(timing_data) to 1 by -1 do
--there can only be one active timing job at each depth at a time
if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then
timing_data[i][TIMING_TIME]+=time()
timing_data[i][TIMING_ACTIVE]=false
exit
end if
end for
timing_depth-=1
end procedure
---------------------------------------------------------------------------
procedure print_timing_data()
string indent
sequence data
string tmp1, tmp2
if run_time=0 then
--so that all percentages show as 0%
run_time=1e100
end if
for i=1 to length(timing_data) do
data=timing_data[i]
indent=repeat(' ',data[TIMING_DEPTH]*4)
tmp1=sprintf("%.2f",{data[TIMING_TIME]})
tmp2=sprintf("%5.2g",{100*data[TIMING_TIME]/run_time})
color_print("%s "&COLBWHI&"%s"&COLWHI&"s %s%% %s\n",{indent,tmp1,tmp2,data[TIMING_DESCRIPTION]})
end for
end procedure
---------------------------------------------------------------------------
function html_char_convert(string s)
string buffer
string result
result=""
buffer=""
for i=1 to length(s) do
if s[i]=' ' and i>1 then
if s[i-1]=' ' then
buffer&=" "
else
buffer&=s[i]
end if
elsif s[i]='<' then
buffer&="<"
elsif s[i]='>' then
buffer&=">"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
function error_string_convert(string s)
string buffer
string result
result=""
buffer=""
for i=1 to length(s) do
if s[i]>=COL_FIRST and s[i]<=COL_LAST then
if s[i]=COLYEL then
buffer&=""
elsif s[i]=COLRED then
buffer&=""
elsif s[i]=COLPNK then
buffer&=""
elsif s[i]=COLWHI then
buffer&=""
elsif s[i]=COLBWHI then
buffer&=""
end if
elsif s[i]='\n' then
buffer&="
\n"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
--prints a long string wrapped at 80 columns
procedure wrap_print(string s, sequence arguments)
string outstring
s=sprintf_utf(s,arguments)
while length(s) do
outstring=before_wrap_point(s)
s=after_wrap_point(s)
color_print("%s\n",{outstring})
end while
end procedure
---------------------------------------------------------------------------
procedure error_file_print(string s)
integer fh
if error_file then
fh=open(compiler_dir&"hs_error.htm","a")
if fh!=failure then
print_utf(fh,error_string_convert(s)&"\n",{})
close(fh)
end if
end if
end procedure
---------------------------------------------------------------------------
--prints out warning message in red with word wrap
procedure simple_warn(string s)
sequence pos
if not find('w',optlist) then
--do not warn if -w is set
pos=get_position()
if pos[2]>1 then
print_utf(stdout,"\n",{})
end if
wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s})
error_file_print(sprintf_utf("WARNING: %s",{html_char_convert(s)}))
was_warnings = true
end if
end procedure
---------------------------------------------------------------------------
--prints out an error message in red with word wrap, then aborts
procedure simple_error(string s)
sequence pos
pos=get_position()
if pos[2]>1 then
print_utf(stdout,"\n",{})
end if
wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s})
error_file_print(sprintf_utf("ERROR: %s",{html_char_convert(s)}))
if end_anchor_kludge then
error_file_print("\n
\n")
end if
opt_wait_for_key()
abort(1)
end procedure
---------------------------------------------------------------------------
--prints out usage info if not enough arguments
procedure check_arg_count(sequence args)
if length(args)=2 then
wrap_print("HamsterSpeak semicompiler v%s%s (C)%s James Paige&Hamster Republic Productions\n",{COMPILER_VERSION,COMPILER_SUB_VERSION,COPYRIGHT_DATE})
wrap_print("Please read LICENSE.txt for GPL License details and disclaimer of liability",{})
wrap_print(COLYEL&"%s [-abcdfknstwuyz] [long-options] source.hss [dest.hs]"&COLWHI&"\n\n",{hs_upper(file_only(args[2]))})
color_print(" -f fast mode. Disables some optimization\n",{})
color_print(" -k do not wait for a keypress when finished\n",{})
color_print(" -n don't add debug info, or a copy of the source code, to .hs file\n",{})
color_print(" -w suppress minor warnings\n",{})
color_print(" -u show warnings for unused variables\n",{})
color_print(" -y overwrite the destination file without asking\n",{})
color_print(" --incdir \n" &
" also search this directory for include files\n",{})
color_print(" --include \n" &
" include an additional file\n",{})
color_print("\nAdvanced options (not intended for normal use):\n",{})
color_print(" -b don't include plotscr.hsd automatically\n",{})
color_print(" -c colors will be disabled\n",{})
color_print(" -d dump debug report to hs_debug.txt\n",{})
color_print(" -s print the name of each plotscript as it is compiled\n",{})
color_print(" -a same as -s, but printing all scripts\n",{})
color_print(" -t print detailed timing info (for HSpeak developers)\n",{})
color_print(" -z write error messages to hs_error.htm\n",{})
--Undocumented arguments:
-- --unicode-cols
-- used by Hamster Whisper for piped output
-- --reuse-ids scripts.bin
-- Read the provided scripts.bin file from a previous compilation, and reuse script ID mapping
wrap_print("\nFor more info about HamsterSpeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{})
wrap_print("\nThis is a command-line program. You should either run it from the command-line, or drag and drop your script file onto it. You don't ever need to run HSpeak yourself; Custom will do that for you.\n",{})
opt_wait_for_key()
abort(0)
end if
end procedure
---------------------------------------------------------------------------
--Build the lexer tables, which for each code point 0<=i<256, contains the type
--of that character in entry i+1 (have to handle NUL)
--Other characters above code 255 are always LEX_CHAR
procedure init_lexer()
sequence table_insert
--main table
table_insert={" (),\n#\t\"[]{}.",
{LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_NEWLINE,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN}}
--by default each character is either binary, or an allowed identifier
--character. This is for backwards compatibility
--NOTE the +1!
lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,127-32) & repeat(LEX_BINARY,#A1-127) & repeat(LEX_CHAR,256-#A1)
for i=1 to length(table_insert[1]) do
lexer_table[table_insert[1][i]+1]=table_insert[2][i]
end for
for i=0 to 9 do
lexer_table['0'+i+1]=LEX_DIGIT
end for
for i=0 to 5 do
lexer_table['a'+i+1]=LEX_HEXDIGIT
lexer_table['A'+i+1]=LEX_HEXDIGIT
end for
--add the one-character prefix of each operator
for i=1 to length(separator_list) do
lexer_table[separator_list[i][1]+1]=LEX_OPERATOR
end for
--keyword suffix parsing table
lexer_table_key2=repeat(LEX_CHAR,256)
lexer_table_key2[' '+1]=LEX_SPACE
lexer_table_key2['\t'+1]=LEX_SPACE
--2nd character of each operator
for i=1 to length(separator_list) do
if length(separator_list[i])=2 then
lexer_table_key2[separator_list[i][2]+1]=LEX_OPERATOR2
end if
end for
--when floating point is added will want a table for number parsing
end procedure
---------------------------------------------------------------------------
--initializes global variables, and generally gets things ready to roll
procedure init()
sequence args
integer index
integer key
integer fh
--Process arguments
args=command_line()
compiler_dir=path_only(args[2])
check_arg_count(args)
optlist={}
index=3
while index<=length(args) do
--These are undocumented
if equal(args[index],"--unicode-cols") then
simple_colorcodes=true
args=delete_element(args,index)
elsif equal(args[index],"--reuse-ids") and index+1<=length(args) then
color_print("Reusing script ID numbers from previous compile\n",{})
reuse_ids=read_scripts_dot_bin(args[index+1])
args=delete_slice(args,index,index+1)
elsif equal(args[index],"--include") and index+1<=length(args) then
additional_includes&={args[index+1]}
args=delete_slice(args,index,index+1)
elsif equal(args[index],"--incdir") and index+1<=length(args) then
include_dir=args[index+1]
args=delete_slice(args,index,index+1)
elsif args[index][1]='-' then
optlist=optlist&hs_lower(args[index][2..length(args[index])])
args=delete_element(args,index)
else
index+=1
end if
end while
check_arg_count(args)
source_file=normalize_filename(args[3])
if length(args)>3 then
dest_file=normalize_filename(args[4])
else
dest_file=normalize_filename(alter_extension(source_file,"hs"))
end if
if find('f',optlist) then
fast_mode=true
color_print("Using fast mode. Some size optimization disabled\n",{})
end if
if find('c',optlist) then
colors_enabled=false
end if
--'x' used by old versions of Hamster Whisper to specify color code format
--However this clashes with Latin-1 characters and doesn't work with UTF-8 anyway
--Replaced by --unicode-cols
--the semi-undocumented command line argument -z writes a file called
--hs_error.htm formatted for HssEd to read
if find('z',optlist) then
error_file=true
if file_exists(compiler_dir&"hs_error.htm") then
fh=open(compiler_dir&"hs_error.htm","w")
if fh!=failure then
puts(fh,"")
close(fh)
end if
end if
end if
wrap_print("HamsterSpeak semicompiler v%s%s\n",{COMPILER_VERSION,COMPILER_SUB_VERSION})
wrap_print("Semicompiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file})
if file_exists(dest_file) then
if find('y',optlist) then
--found the -y command line arg, overwrite automatically
key='y'
else
while true do
--prompt the user to overwrite
wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file})
key=timeless_wait_key()
color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key})
if hs_lower(key)='y' then
exit
elsif hs_lower(key)='n' then
simple_error("output file overwrite cancelled by user")
else
wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)})
end if
end while
end if
end if
--Init global data tables
--why the alpha-tree? because the reserved-word list can get HUGE.
--we want to be able to look up words in it quickly. A btree or some such
--thing would have been even better, but thats alot of trouble :)
reserved=alpha_tree_mass_insert(reserved,{
{"defineconstant",RESERVE_TOPLEVEL}
,{"definetrigger" ,RESERVE_TOPLEVEL}
,{"defineoperator",RESERVE_TOPLEVEL}
,{"globalvariable",RESERVE_TOPLEVEL}
,{"definefunction",RESERVE_TOPLEVEL}
,{"definescript" ,RESERVE_TOPLEVEL}
,{"plotscrversion",RESERVE_TOPLEVEL}
,{"include" ,RESERVE_TOPLEVEL} --should never appear, in theory
,{"do" ,RESERVE_FLOW}
,{"begin" ,RESERVE_BEGIN}
,{"end" ,RESERVE_END}
,{"return" ,RESERVE_FLOW}
,{"if" ,RESERVE_FLOW}
,{"then" ,RESERVE_FLOW}
,{"else" ,RESERVE_FLOW}
,{"elseif" ,RESERVE_FLOW} --not in flow_list... shouldn't that be a problem?
,{"for" ,RESERVE_FLOW}
,{"cfor" ,RESERVE_UNIMPLEMENTED}
,{"foreach" ,RESERVE_UNIMPLEMENTED}
,{"while" ,RESERVE_FLOW}
,{"break" ,RESERVE_FLOW}
,{"continue" ,RESERVE_FLOW}
,{"exitscript" ,RESERVE_FLOW}
,{"exitreturning" ,RESERVE_FLOW}
,{"switch" ,RESERVE_FLOW}
,{"case" ,RESERVE_FLOW}
,{"subscript" ,RESERVE_KEYWORD} --keywords have special case handling
,{"variable" ,RESERVE_KEYWORD}
,{"=" ,RESERVE_UNIMPLEMENTED}
,{"tracevalue" ,RESERVE_MACRO}
,{"assert" ,RESERVE_MACRO}
-- Stray "$"'s are picked up by the lexer (not totally ideal maybe)
})--end mass_insert
for i=1 to length(math_list) do
reserved=alpha_tree_insert(reserved,math_list[i][PAIR_NAME],RESERVE_BUILTIN)
end for
-- Adding the separators (operators) here prevents them from being used before
-- defineoperator is handled (either a block before include,plotscr.hsd, or in
-- e.g. defineconstant)
for i=1 to length(separator_list) do
if alpha_tree_data(reserved,separator_list[i],false)=false then
reserved=alpha_tree_insert(reserved,separator_list[i],RESERVE_OPERATOR)
end if
end for
--Append subscript trigger id (this is needed for scripts.bin)
trigger_list=append(trigger_list,{-1,"subscript"})
--Lexer finite state transition tables
init_lexer()
end procedure
---------------------------------------------------------------------------
--returns {filenumber,file position} pair
function decode_srcpos_file(srcpos pos)
integer val
val=pos
--0 is an invalid srcpos
if val=0 then
simple_error("compiler bug: can't decode uninitialised srcpos")
end if
val-=1
for i=1 to length(file_list) do
--an offset of 0 is reserved, but currently unused
if val=0 then
simple_error("compiler bug: 0 srcpos file offset")
end if
if val<=length(file_list[i][FILE_TEXT]) then
return {i,val}
end if
val-=length(file_list[i][FILE_TEXT])+1
end for
simple_error(sprintf_utf("compiler bug: can't decode invalid srcpos %d",{pos}))
end function
---------------------------------------------------------------------------
function srcpos_file_number(srcpos pos)
sequence temp
temp=decode_srcpos_file(pos)
return temp[1]
end function
---------------------------------------------------------------------------
--Returns 1-based character number in file
function srcpos_point(srcpos pos)
sequence temp
temp=decode_srcpos_file(pos)
return temp[2]
end function
---------------------------------------------------------------------------
--Encode offset (1-based characters from start of file) in a file as a srcpos
function encode_srcpos(integer filenum, integer offset)
srcpos pos
pos=1
for i=1 to filenum-1 do
pos+=length(file_list[i][FILE_TEXT])+1
end for
return pos+offset
end function
---------------------------------------------------------------------------
--possibly slow: prehaps avoid heavy use
--returns {file name, line number, column number, line text}: index with POS_*
--column number returned is 0-based
function decode_srcpos(srcpos pos)
integer point, filenum
integer lineno
string filetext
sequence lines
sequence temp
temp=decode_srcpos_file(pos)
filenum=temp[1]
point=temp[2]
filetext=file_list[filenum][FILE_TEXT]
lines=file_list[filenum][FILE_LINE_EXTENTS]
lineno=length(lines)
for i=1 to length(lines) do
if point<=lines[i][PAIR_END] then
lineno=i
exit
end if
end for
return({file_list[filenum][FILE_NAME],lineno,point-lines[lineno][PAIR_START],filetext[lines[lineno][PAIR_START]..lines[lineno][PAIR_END]]})
end function
---------------------------------------------------------------------------
function form_error_text(string s, srcpos pos)
string line
sequence src_position
string column_display
integer tab_compensate
src_position=decode_srcpos(pos)
--?pos
--pretty_print(1,src_position,{2})
line=substring_replace(src_position[POS_TEXT],"\t"," ")
tab_compensate=3*count('\t',src_position[POS_TEXT][1..src_position[POS_COLUMN]])
column_display=repeat(' ',src_position[POS_COLUMN]+tab_compensate)
error_file_print(sprintf_utf("\n",{src_position[POS_FILENAME],src_position[POS_LINE]}))
if length(current_script) then
return(
sprintf_utf(
"in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s^\n"
&COLRED&"%s\n"
,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s}
)
)
else
return(
sprintf_utf(
"in line %d of "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s^\n"
&COLRED&"%s\n"
,{src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s}
)
)
end if
--note that the calling procedure must send/deal with closing tags to error_file_print
end function
---------------------------------------------------------------------------
procedure src_warn(string s, srcpos pos)
if not find('w',optlist) then
--do not warn if -w is set
simple_warn(form_error_text(s,pos))
error_file_print("\n
\n")
end if
end procedure
---------------------------------------------------------------------------
procedure src_error(string s, srcpos pos)
end_anchor_kludge=true
simple_error(form_error_text(s,pos))
end procedure
---------------------------------------------------------------------------
--Find newlines in a file (possibly mixed type), returns line extents
function split_lines(string file_text)
integer line_start=1
integer last_13=-1
sequence line_extents = {} -- {start of line, end of line} pairs, both inclusive
for i=1 to length(file_text) do
if file_text[i]=13 then
line_extents=append(line_extents, {line_start,i-1})
last_13=i
line_start=i+1
elsif file_text[i]=10 then
if last_13!=i-1 then -- handle {13,10} "\r\n"
line_extents=append(line_extents, {line_start,i-1})
end if
line_start=i+1
end if
end for
if line_start<=length(file_text) then
line_extents=append(line_extents, {line_start,length(file_text)})
end if
return line_extents
end function
---------------------------------------------------------------------------
--Read a file, adding to file_list.
--pos is used only for location of include statement, may be 0.
procedure load_source(string filename, string reading_how, srcpos pos)
integer fh
string file_text
sequence line_extents -- {start of line, end of line} pairs
fh=open(filename,"r")
if fh!=failure then
wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename})
line_extents={}
reenter_timing_zone("read_file")
--Autodetect the encoding
file_text=read_file(fh, TEXT_MODE, UTF)
exit_timing_zone()
close(fh)
line_extents=split_lines(file_text)
total_lines+=length(line_extents)
file_list=append(file_list,{filename,file_text,line_extents})
max_srcpos+=length(file_text)+1
else
if pos then
src_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos)
else
simple_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}))
end if
end if
end procedure
---------------------------------------------------------------------------
--Load and lex, recursively finding includes
procedure load_and_lex_source(string filename, string reading_how, srcpos pos, sequence include_stack)
integer include_num
if find(filename,include_stack) then
src_error(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included recursively",{filename}),pos)
end if
include_stack&={filename}
include_num=find(filename,column(file_list,FILE_NAME))
if include_num then
--Silently ignore this include. Double includes are allowed because of automatic inclusion.
--src_warn(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included multiple times",{include_name}),pos)
return
end if
--Reads one additional file
reenter_timing_zone("load_and_lex_source/reading")
load_source(filename,reading_how,pos)
exit_timing_zone()
--Lex that file, and load and lex all included files
lex_file(length(file_list), include_stack)
end procedure
---------------------------------------------------------------------------
--Search for an include file
function find_include_file(string include_name)
--try source directory
if file_exists(path_only(source_file)&include_name) then
return normalize_filename(path_only(source_file)&include_name)
end if
if file_exists(include_dir&include_name) then
return include_dir&include_name
end if
--try current directory
if file_exists(include_name) then
return include_name
end if
if file_exists(compiler_dir&include_name) then
return normalize_filename(compiler_dir&include_name)
end if
--in case we're installed at $prefix/games, try $prefix/share/games/ohrrpgce/
if file_exists(compiler_dir&"../share/games/ohrrpgce/"&include_name) then
return normalize_filename(compiler_dir&"../share/games/ohrrpgce/"&include_name)
end if
--give up
return include_name
end function
---------------------------------------------------------------------------
procedure show_source_info()
if total_lines then
wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)})
else
simple_error("no data to compile\n")
end if
end procedure
---------------------------------------------------------------------------
procedure lexer_binary_error(srcpos pos)
--better use src_error, to track down garbage if accidentally creeping in
src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos)
end procedure
---------------------------------------------------------------------------
--reads and processes the escape codes in a string and returns it enclosed in quote marks
function lexer_read_string(string s, integer i, integer stop, srcpos pos)
string string
integer escaping
integer at1, at2
escaping=false
string="\""
while true do
if i>stop then --did not find a closing " so not a valid string
src_error("Expected \" at end of line to end string (multi-line strings are not supported)",pos+stop)
end if
if s[i]<256 and lexer_table[s[i]+1]=LEX_BINARY then
lexer_binary_error(pos+i)
end if
if escaping then
if s[i]='"' or s[i]='\\' or s[i]='\'' then
--including ' for closer compatibility with Python
string&=s[i]
elsif s[i]='n' then
string&=10
elsif s[i]='t' then
string&=9
elsif s[i]='x' then
--escape 8 bit values (later will add \u and \U for 16 bit and 32 bit escapes)
at1=0
at2=0
if i+2<=stop then
at1=find(s[i+1],hex_chars)
at2=find(s[i+2],hex_chars)
end if
if at1=0 or at2=0 then
src_error("Expected two hexidecimal characters to follow \\x escape code",pos+i)
end if
string&={16*hex_char_values[at1]+hex_char_values[at2]}
i+=2
else
--invalid sequence
string&='\\'
string&=s[i]
end if
escaping=false
else
if s[i]='"' then
return({i,string&'"'})
elsif s[i]='\\' then
escaping=true
else
string&=s[i]
end if
end if
i+=1
end while
end function
---------------------------------------------------------------------------
--Lex a signed or unsigned integer literal.
--Returns {newi, text} with text is the string representation of an integer.
function lexer_read_number(string s, integer i, integer stop, srcpos pos)
integer state
atom val
integer sign,base
string text
integer starti
starti=i
--Process negative prefix, because it isn't treated as a unary operator
if s[i]='-' then
text="-"
i+=1
sign=-1
else
text=""
sign=1
end if
--Skip whitespace
while i<=stop and (s[i]=' ' or s[i]='\t') do
i+=1
end while
--Check for base prefix 0x 0o or 0b. No separating space allowed!
base=10
if i+2<=stop and s[i]='0' then
if s[i+1]='x' or s[i+1]='X' then
base=16
i+=2
elsif s[i+1]='o' or s[i+1]='O' then
base=8
i+=2
elsif s[i+1]='b' or s[i+1]='B' then
base=2
i+=2
end if
end if
val=0
while i<=stop do
if s[i]<256 then
state=lexer_table[s[i]+1]
else
state=LEX_CHAR
end if
--color_print("lexing number i=%d %d=%s state %d\n",{i,s[i],{s[i]},state})
if state=LEX_DIGIT then --0-9
integer digit
digit=s[i]-'0'
if digit>=base then
src_error(sprintf_utf("Found digit "&COLYEL&"%s"&COLRED&" which is too big, while parsing a base-%d number",{s[i],base}),pos+i)
end if
text&=s[i]
val=val*base+digit
elsif base=16 and state=LEX_HEXDIGIT then --a-fA-F
text&=s[i]
val=val*16+(toLower(s[i])-'a'+10)
elsif state=LEX_SPACE then --yes, allowed in numbers too
elsif state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_OPERATOR or state=LEX_COMMENT or state=LEX_BINARY or state=LEX_NEWLINE then
--backtrack one character
--known bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead
i-=1
exit
else --currently, state=LEX_CHAR or state=LEX_HEXDIGIT or state=LEX_WARN or state=LEX_STRING
if s[i]='.' then
src_error("Floating point values are not yet implemented",pos+i)
end if
src_error(
sprintf_utf("Found unexpected character "&COLYEL&"%s"&COLRED&" while reading a number, "&COLYEL&"%s"&COLRED&". Names such as variables can't start with a digit. Or maybe you forgot a comma or operator?"
,{s[i],text})
,pos+i
)
end if
i+=1
end while
if base!=10 then
--Special behaviour for non-decimals: silently overflow large 32 bit unsigned values to negatives
if val>=#80000000 and val<#100000000 then
val=val-#100000000
end if
--Convert to text
text=sprintf("%d",{sign*val})
end if
if not int32(sign*val) then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32 bit signed integer. Integers must be between -2147483648 and 2147483647",{text}),pos+starti)
end if
return({i,text})
end function
---------------------------------------------------------------------------
--Clean the rest of a line for seek_include after 'include' occurs
--s is the whole line, i is end of the 'include', pos is the srcpos at start of line
function lexer_read_include_line(TokenList tokens, string s, integer i, integer stop, srcpos pos)
integer at
sequence temp
string filename_string
if not equal(tokens[1][CMD_TEXT],"include") then
src_error(COLYEL&"include"&COLRED&" must be the only statement on the line. Maybe you tried to use this reserved keyword illegally?",tokens[$][CMD_POS])
end if
filename_string=""
i+=1
s=s[i..stop]
if length(s) then
--Need to increment i while stripping
while s[1]=' ' or s[1]='\t' do
s=s[2..length(s)]
i+=1
end while
--A quote mark in the middle of a filename doesn't need any escaping
if s[1]='"' then
temp=lexer_read_string(s,2,length(s),pos+i-1)
s=s[temp[1]+1..$]
i+=temp[1] --1 past the closing "
filename_string=temp[2] --enclosed in quotes
end if
at=find('#',s)
if at then
s=s[1..at-1]
end if
--Trim either whitespace after "filename", or after non-quoted filename
s=trim_whitespace(s)
if length(filename_string) then
if length(s) then
src_error("Garbage after the end of the filename in this "&COLYEL&"include"&COLRED&" statement", pos+i)
end if
s=filename_string
end if
tokens=append(tokens,{s,pos+i-1})
end if
if length(tokens)=1 then
src_error("Expected name of file to follow "&COLYEL&"include"&COLRED,tokens[1][CMD_POS])
end if
if length(tokens)>2 then
src_error(sprintf_utf("Found unexpected syntax "&COLYEL&"%s"&COLRED&" instead of filename to include. Syntax of an include statement is "&COLYEL&"include, filename"&COLRED&" possibly with a path",{tokens[2][CMD_TEXT]}),tokens[2][CMD_POS])
end if
return(tokens)
end function
---------------------------------------------------------------------------
-- s is the text for the whole file, the interval i to stop is a single line,
-- and pos is the srcpos for the beginning of the file
function lex_line(sequence s, integer i, integer stop, srcpos pos)
integer state
TokenList tokens
string ident --identifier text being built up
sequence temp
string masked
integer oldi
integer found
integer textstart --the column at which this identifier starts, otherwise 0
string remem_ident
integer remem_textstart
integer nonnumeral --whether we are definitely inside an identifier, ie. a digit doesn't indicate a number
integer backtracked
integer last_state --A LEX_* constant; the last state that was not LEX_SPACE
integer last_state_i --Value at i where last_state was set
ident=""
nonnumeral=false
last_state=LEX_NEWLINE
last_state_i=0
textstart=0
tokens={}
while i<=stop do
if s[i]<256 then
state=lexer_table[s[i]+1]
else
state=LEX_CHAR
end if
--color_print("lexing i=%d %d=%s state %d nonnum=%d ident=%s\n",{i,s[i],{s[i]},state,nonnumeral,ident})
if state=LEX_CHAR or state=LEX_HEXDIGIT then
ident=ident & toLower(s[i])
if textstart=0 then
textstart=i
end if
nonnumeral=true
elsif state=LEX_SPACE then
elsif state=LEX_DIGIT then
if nonnumeral then
ident=ident & s[i]
else
if length(ident) then --either ident is "-" or it is ""
i=textstart
ident=""
textstart=0
end if
temp=lexer_read_number(s,i,stop,pos)
tokens=append(tokens,{temp[2],pos+i})
i=temp[1]
--nonnumeral remains false
end if
elsif state=LEX_WARN then
ident=ident & toLower(s[i])
if textstart=0 then
textstart=i
end if
nonnumeral=true
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is reserved and not allowed in names (identifiers)!", {s[i]}), pos+i)
else
--We've reached the end of a token... unless it's only the first character
--of a two-character operator.
remem_ident=ident --for backtracking
remem_textstart=textstart
backtracked=false
if length(ident) then
tokens=append(tokens,{ident,pos+textstart})
end if
ident=""
nonnumeral=false
textstart=0
if state=LEX_BEGIN then
tokens=append(tokens,{"begin",pos+i})
elsif state=LEX_END then
if last_state=LEX_COMMA then
--foo(1,). This probably doesn't particularly need to be disallowed
--Could also check for ) but that will be caught later
end if
tokens=append(tokens,{"end",pos+i})
elsif state=LEX_COMMA then
if last_state=LEX_COMMA or last_state=LEX_BEGIN or last_state=LEX_OPERATOR or last_state=LEX_OPERATOR2 then
--foo(, 1) or foo(1, , 2) or ,
src_error(
sprintf_utf("Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",{}),
pos+i)
end if
elsif state=LEX_NEWLINE then
elsif state=LEX_COMMENT then
exit
elsif state=LEX_STRING then
temp=lexer_read_string(s,i+1,stop,pos)
tokens=append(tokens,{temp[2],pos+i})
i=temp[1]
elsif state=LEX_OPERATOR then
--Read the next non-space character. If it's LEX_OPERATOR2 we greedily
--add it, otherwise we do nothing (we only support length 1 and 2
--separators). However, we might have have been too greedy (not a value
--two-char separator), and need to put it back.
textstart=i
masked=s[i..i]
integer lookahead_state
--This while loop is in order to allow whitespace in the middle of separators.
while i length(s)
found=false
for j=1 to length(separator_list) do
if match(separator_list[j],masked)=1 then
tokens=append(tokens,{separator_list[j],pos+textstart})
if length(separator_list[j])=1 then
i=textstart
end if
found=true
exit
end if
end for
if found=false then
--need to backtrack, in particular for -
--color_print("backtracking on '%s'\n", {masked})
backtracked=true
ident=remem_ident & masked[1]
oldi=i+1
i=textstart
if length(remem_ident) then
tokens=tokens[1..$-1]
textstart=remem_textstart
nonnumeral=true
elsif equal(ident,"-") then
--nonnumeral remains false: we've seen exactly "-"
else
--a keyword character followed by garbage
src_error(
sprintf_utf("Expected a two-character operator (like "&COLYEL&":="&COLRED&") but the second character (e.g. "&COLYEL&"="&COLRED&") is missing",{}),
pos+i)
end if
else
textstart=0
if last_state=LEX_COMMA then
-- ,
--(Could check for ( but that will be detected later anyway.)
--Allowed if it's a unary operator. There's only one, since - is not an operator.
if s[i]!='$' then
src_error(
sprintf_utf("Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",{}),
pos+last_state_i)
end if
end if
end if
elsif state=LEX_BINARY then
lexer_binary_error(pos+i)
end if
if backtracked=false then
if equal(remem_ident,"include") then
--include is a special case, as normal lexing rules don't apply for rest of line
if length(ident) then
simple_error("compiler bug!\n")
end if
return lexer_read_include_line(tokens,s,i,stop,pos)
end if
end if
--End of a complete token
end if
if state!=LEX_SPACE then
last_state_i=i
last_state=state
end if
i+=1
end while
if length(ident) then
--ought to check whether ident is 'include'...
tokens=append(tokens,{ident,pos+textstart})
end if
return(tokens)
end function
---------------------------------------------------------------------------
--this also checks for rogue strings and $'s
function translate_plotstrings(TokenList tokens)
integer state
integer start_token
integer i
Token string_func
Token string_token
srcpos pos
state=0
i=1
while i<=length(tokens) do
if compare(tokens[i][CMD_TEXT],"$")=0 then
pos=tokens[i][CMD_POS]
if state=0 then
state=1
start_token=i
else
--jump to error throw
state=1
exit
end if
end if
if state=2 then
if tokens[i][CMD_TEXT][1]='"' then
string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1])
string_token={sprintf("@$string%d",{length(string_list)}),pos}
tokens=tokens[1..start_token-1]&{string_func,{"begin",pos}}&tokens[start_token+1..i-2]&{string_token,{"end",pos}}&tokens[i+1..$]
state=0 --may have multiple strings on one line
else --we saw a + (or illegal =) inside the string number expression
state=1
end if
end if
if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then
string_func={"setstringfromtable",pos}
state=2
elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then
string_func={"appendstringfromtable",pos}
state=2
end if
i+=1
end while
if state!=0 then
--we've seen a surplus $, not right
src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos)
end if
return(tokens)
end function
---------------------------------------------------------------------------
--Given a command tree, get the original text from the file
--OK, this function is horribly complicated just so tracevalue can print
--"hero X (me)" instead of "herox(me)". It could be implemented much simpler if
--macros were expanded in the lexer, but then build_ast and
--convert_macros would have to be as well
function tree_original_text(Node tree)
TokenList script_toks
Node subtree
srcpos leftmost, rightmost
integer left_at, right_at
integer filenum
integer left_point, right_point
integer depth
TokenList tokens
string text
script_toks=all_scripts[cur_script_num][SCRIPT_BODY_TOKS]
--find leftmost and rightmost tokens making up the expression, not including brackets
leftmost=tree[TREE_TRUNK][CMD_POS]
subtree=tree
while length(subtree[TREE_BRANCHES]) do
subtree=subtree[TREE_BRANCHES][1]
if subtree[TREE_TRUNK][CMD_POS]32767 then
src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos)
n=32767
elsif n<-32768 then
src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos)
n=-32768
end if
return(n)
end function
---------------------------------------------------------------------------
function try_string_to_number(Token s)
atom result
result=floor(string_to_object(s[CMD_TEXT],0))
if not string_is_int32(s[CMD_TEXT]) then
src_error(sprintf_utf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS])
end if
return(result)
end function
---------------------------------------------------------------------------
function enforce_constants(string s)
--enforces both constants
object v
v=alpha_tree_data(constant_list,s,{})
if length(v) then
v=v[CONST_VALUE]
if int32(v) then
return(sprintf("%d",{v}))
end if
end if
return(s)
end function
---------------------------------------------------------------------------
function get_cmd()
Token result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=enforce_constants(result[CMD_TEXT])
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_no_constants()
Token result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=result[CMD_TEXT]
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_block(integer convert_constants)
Token this
TokenList result
result={}
this=get_cmd()
if compare("begin",this[CMD_TEXT])!=0 then
src_error(sprintf_utf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS])
end if
while true do
if convert_constants then
this=get_cmd()
else
this=get_cmd_no_constants()
end if
if compare("begin",this[CMD_TEXT])=0 then
src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS])
elsif compare("end",this[CMD_TEXT])=0 then
exit--break out of the while
else
result=append(result,this)
end if
end while
return(result)
end function
---------------------------------------------------------------------------
procedure parse_constant_block(TokenList block)
atom num
string name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
check_undefined_constant(block[i+1],num)
name=block[i+1][CMD_TEXT]
constant_list=alpha_tree_insert(constant_list,name,{num,block[i+1][CMD_POS]})
reserved=alpha_tree_insert(reserved,name,RESERVE_CONSTANT)
end for
end procedure
---------------------------------------------------------------------------
procedure parse_trigger_block(TokenList block)
integer num
string name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name but script trigger definition block ended",block[i][CMD_POS])
end if
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
check_undefined_string(block[i+1],"script trigger name")
name=block[i+1][CMD_TEXT]
trigger_list=append(trigger_list,{num,name})
reserved=alpha_tree_insert(reserved,name,RESERVE_TOPLEVEL)
end for
end procedure
---------------------------------------------------------------------------
procedure parse_version_block(TokenList block)
string req_version
if length(block)<2 then
--Don't have a srcpos
simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (empty 'plotscr version' block)")
end if
plotscr_version=block[1][CMD_TEXT] --version ID of this plotscr.hsd version (equal to the version of HSpeak at the time)
req_version=block[2][CMD_TEXT] --the minimum version of HSpeak which supports this plotscr.hsd
--trim quote marks
if plotscr_version[1]!='"' or req_version[1]!='"' then
simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (expected strings in 'plotscr version')")
end if
plotscr_version=plotscr_version[2..$-1]
req_version=req_version[2..$-1]
if compare(plotscr_version, MIN_PLOTSCR_VERSION) < 0 then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.")
end if
--printf(stdout,"'%s' '%s'\n", {req_version, COMPILER_VERSION&COMPILER_SUB_VERSION})
if compare(req_version, COMPILER_VERSION&COMPILER_SUB_VERSION) > 0 then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from a newer version of the OHRRPGCE than supported by this version of HSpeak. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.")
end if
end procedure
---------------------------------------------------------------------------
procedure create_global(atom id, string name, srcpos pos)
integer at
at=find(id,global_list[PAIR_NUM])
if at then
src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos)
else
if id>=0 and id<=MAXGLOBAL then
global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id)
global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name)
global_list[GLB_POS]=append(global_list[GLB_POS],pos)
reserved=alpha_tree_insert(reserved,name,RESERVE_GLOBAL)
else
src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to %d",{id,MAXGLOBAL}),pos)
end if
end if
end procedure
---------------------------------------------------------------------------
procedure parse_global_block(TokenList block)
atom num
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("expected name but globalvariable block ended",block[i][CMD_POS])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
check_undefined_string(block[i+1],"global variable name")
create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS])
end for
end procedure
---------------------------------------------------------------------------
procedure parse_operator_block(TokenList block)
atom num
integer keyword
string name,truename
srcpos pos
for i=1 to length(block) by 3 do
if i+2>length(block) then
src_error("expected name but defineoperator block ended",block[i][CMD_POS])
end if
num=try_string_to_number(block[i])
mustnt_be_a_number(block[i+1])
mustnt_be_a_number(block[i+2])
name=block[i+1][CMD_TEXT]
truename=block[i+2][CMD_TEXT]
pos=block[i+2][CMD_POS]
operator_list=append(operator_list,{num,name,truename,pos})
keyword=alpha_tree_data(reserved,name,false)
if keyword then
if keyword!=RESERVE_OPERATOR and keyword!=RESERVE_BUILTIN then
src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" may not be used as an operator",{RESERVE_NAMES[keyword],name}),pos)
end if
else
reserved=alpha_tree_insert(reserved,name,RESERVE_OPERATOR)
end if
end for
end procedure
---------------------------------------------------------------------------
--arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to
--VAR_ARGS indicating a variable (unlimited) number of arguments.
function create_function(sequence list, integer id, string name, object arglist, integer func_type, srcpos pos)
integer at
sequence encoded
if func_type=RESERVE_SCRIPT then
at=find(id,column(list,PAIR_NUM))
if at then
src_error(sprintf_utf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{RESERVE_NAMES[func_type],id,list[at][PAIR_NAME]}),pos)
end if
if id=0 or id<-1 then
src_error(sprintf_utf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos)
elsif id=-1 then
--autonumbered
if length(reuse_ids) then
encoded=encode_ohr(name)
if length(encoded)>36 then
encoded=encoded[1..36]
end if
at=find_in_column(encoded,reuse_ids,PAIR_NAME)
if at then
id=reuse_ids[at][PAIR_NUM]
end if
end if
if id<0 then
id=autonumber_id
loop do
autonumber_id-=1
id=autonumber_id
until find_in_column(id,reuse_ids,PAIR_NUM)=0
end loop
end if
end if
end if
list=append(list,{id,name,arglist,pos})
reserved=alpha_tree_insert(reserved,name,func_type)
return(list)
end function
---------------------------------------------------------------------------
function parse_define_block(TokenList block, sequence list, integer func_type)
atom num
string name
atom args
sequence arglist --sequence of strings
srcpos name_pos
integer i
i=1
while i<=length(block) do
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
if i+1>length(block) then
src_error(sprintf_utf("expected %s name but define block ended",{RESERVE_NAMES[func_type]}),block[i][CMD_POS])
else
i+=1
check_undefined_string(block[i],RESERVE_NAMES[func_type]&" name")
name=block[i][CMD_TEXT]
name_pos=block[i][CMD_POS]
if i+1>length(block) then
src_error("expected argument count but define block ended",block[i][CMD_POS])
else
i+=1
args=try_string_to_number(block[i])
arglist={}
if args<0 then
list=create_function(list,num,name,VAR_ARGS,func_type,name_pos)
else
for j=1 to args do
if i+1>length(block) then
src_error("expected argument default but define block ended",block[i][CMD_POS])
else
i+=1
arglist=append(arglist,try_string_to_number(block[i]))
end if
end for
list=create_function(list,num,name,arglist,func_type,name_pos)
end if
i+=1
end if
end if
end while
return(list)
end function
---------------------------------------------------------------------------
procedure preliminary_parse_pass()
Token this
enter_timing_zone("Preliminary pass")
color_print("preliminary pass\n",{})
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if equal("defineconstant",this[CMD_TEXT]) then
parse_constant_block(get_cmd_block(false))
elsif equal("definetrigger",this[CMD_TEXT]) then
parse_trigger_block(get_cmd_block(false))
elsif equal("plotscrversion",this[CMD_TEXT]) then
parse_version_block(get_cmd_block(false))
end if
end while
--Check for very old plotscr.hsd
if equal(plotscr_version,"") then
if find("plotscr.hsd",column(file_list,FILE_NAME)) then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE..")
end if
--If plotscr.hsd is not included then we're compiling with -b, so don't complain
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
--Parse a script or subscript
--parent_name is a fully-scoped name of parent script, or ""
--if this script isn't a subscript of another.
--Returns Token with name of parsed script
function parse_script(Token trigger, string parent_name, integer parent_idx)
Token name
string full_name
TokenList arglist
TokenList body
Token this
string err_string
string previous_script
Token subname
sequence src_position
string msg
integer depth
integer nest_depth
integer temp
integer idx
name=get_cmd()
if parent_idx=0 then
full_name=name[CMD_TEXT]
nest_depth=0
else
full_name=sprintf("%s<-%s",{parent_name,name[CMD_TEXT]})
nest_depth=all_scripts[parent_idx][SCRIPT_NEST_DEPTH]+1
if nest_depth>MAX_NEST_DEPTH then
src_error(
sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is nested too deeply (%d layers deep), which is more than is supported. Reorganise your scripts to not use so much nesting."
,{trigger[CMD_TEXT],name[CMD_TEXT],nest_depth})
,name[CMD_POS]
)
end if
end if
previous_script=current_script
current_script=full_name --name[CMD_TEXT]
--Check not already a script
cur_script_num=parent_idx
temp=lookup_scoped_script_name(name[CMD_TEXT])
cur_script_num=0
if temp then
if parent_idx then
msg="There already is a script or subscript named "&COLYEL&"%s"&COLRED&" in this context/scope, defined on line %d of "&COLPNK&"%s"&COLRED
else
msg="Script "&COLYEL&"%s"&COLRED&" appears more than once! It was previously defined on line %d of "&COLPNK&"%s"&COLRED
end if
src_position=decode_srcpos(all_scripts[temp][SCRIPT_POS])
src_error(
sprintf_utf(msg,{name[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]})
,name[CMD_POS]
)
end if
--Check not a reserved non-script name.
--This check is independent of the above 'name already in scope' check.
if not find_in_column(full_name,script_list,PAIR_NAME) then
--Doesn't appear in a definescript block; if it did the name is already reserved.
--Note that we won't add it to script_list yet; that happens later in check_script_declarations
check_undefined_string(name,"user script name")
end if
--Parse arguments
arglist={}
while true do
if get_cmd_pointer>length(cmd) then
src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{trigger[CMD_TEXT],name[CMD_TEXT]}),name[CMD_POS])
end if
this=get_cmd()
if compare("begin",this[CMD_TEXT])=0 then
exit--break the while
end if
--Note that arglist isn't the actual list of arguments, it's just a string of tokens right now (including defaults).
--The actual arglist is built in process_arglist, while compiling each script.
arglist=append(arglist,this)
end while
--Create all_scripts entry now for recursive calls; SCRIPT_BODY_TOKS filled in later
all_scripts=append(all_scripts,{
trigger
,name[CMD_TEXT]
,name[CMD_POS]
,full_name
,arglist
,{}
,parent_idx
,nest_depth
})
idx=length(all_scripts)
--Every script is nested inside a big fat do() block
body={{"do",this[CMD_POS]}}
depth=0
while true do
body=append(body,this)
if compare("end",this[CMD_TEXT])=0 then
depth-=1
if depth=0 then
exit--break while
end if
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
elsif compare("subscript",this[CMD_TEXT])=0 then
--Recurse, so body of subscript doesn't appear in this script.
--The subscript node stays, for syntax checking, but is removed in normalize_flow_control
subname=parse_script(this,full_name,idx)
body&={{"begin",this[CMD_POS]},{"end",this[CMD_POS]}}
else
temp=alpha_tree_data(reserved,this[CMD_TEXT],RESERVE_FLOW)
if temp<=RESERVE_UNIMPLEMENTED then
err_string=sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",{RESERVE_NAMES[alpha_tree_data(reserved,this[CMD_TEXT],0)],this[CMD_TEXT]}
)
if temp=RESERVE_TOPLEVEL then
err_string&=sprintf_utf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]})
end if
src_error(err_string,this[CMD_POS])
end if
end if
if get_cmd_pointer>length(cmd) then
src_error(
sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED
,{trigger[CMD_TEXT],name[CMD_TEXT]}
)
,name[CMD_POS]
)
end if
this=get_cmd()
end while
all_scripts[idx][SCRIPT_BODY_TOKS]=body
current_script=previous_script
return name
end function
---------------------------------------------------------------------------
procedure parse_top_level()
Token this
sequence triggers
integer triggeridx
enter_timing_zone("Top level pass")
color_print("parsing top-level\n",{})
triggers=column(trigger_list,PAIR_NAME)
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
triggeridx=find(this[CMD_TEXT],triggers)
if compare("defineconstant",this[CMD_TEXT])=0 then
get_cmd_block(true)
elsif compare("definetrigger",this[CMD_TEXT])=0 then
get_cmd_block(true)
elsif compare("globalvariable",this[CMD_TEXT])=0 then
parse_global_block(get_cmd_block(true))
elsif compare("defineoperator",this[CMD_TEXT])=0 then
parse_operator_block(get_cmd_block(true))
elsif compare("definefunction",this[CMD_TEXT])=0 then
function_list=parse_define_block(get_cmd_block(true),function_list,RESERVE_FUNCTION)
elsif compare("definescript",this[CMD_TEXT])=0 then
script_list=parse_define_block(get_cmd_block(true),script_list,RESERVE_SCRIPT)
elsif compare("plotscrversion",this[CMD_TEXT])=0 then
--ignore
get_cmd_block(false)
elsif triggeridx>0 then
if trigger_list[triggeridx][PAIR_NUM]<0 then
src_error(
sprintf_utf(
COLYEL&"%s"&COLRED&" can only occur inside another script. Use "&COLYEL&"script"&COLRED&" instead for regular scripts."
,{this[CMD_TEXT]}
)
,this[CMD_POS]
)
else
parse_script(this,{},0)
end if
else
check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration")
src_error(
sprintf_utf(
"Expected top-level declaration but found "&COLYEL&"%s"&COLRED
,{this[CMD_TEXT]}
)
,this[CMD_POS]
)
end if
end while
cmd={}
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure dump_script_and_function_info(integer fh, sequence list)
sequence this
string id_string
sequence src_position
for i=1 to length(list) do
this=list[i]
if this[PAIR_NUM]>autonumber_id then
id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]})
else
id_string=sprintf("ID=%d",{this[PAIR_NUM]})
end if
src_position=decode_srcpos(this[FUNC_POS])
print_utf(fh,"%s %d\t%s\t%s(",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,id_string
,this[PAIR_NAME]
})
if equal(this[FUNC_ARGS],VAR_ARGS) then
print_utf(fh,"VARIABLE ARGS",{})
else
for j=1 to length(this[FUNC_ARGS]) do
if j>1 then
print_utf(fh,",",{})
end if
if equal(this[FUNC_ARGS][j],NO_DEFAULT) then
print_utf(fh,"REQ",{})
else
print_utf(fh,"%d",{this[FUNC_ARGS][j]})
end if
end for
end if
print_utf(fh,")\n",{})
end for
end procedure
---------------------------------------------------------------------------
function seek_string_by_id(integer id, sequence list, string name)
integer at
at=find_in_column(id,list,PAIR_NUM)
if at then
return(list[at][PAIR_NAME])
else
simple_error(sprintf_utf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id}))
end if
return("")
end function
---------------------------------------------------------------------------
--Used while dumping script binary
function name_lookup(sequence pair, VarList locals)
integer at
integer frame,id
if pair[1]=KIND_NUMBER then
return(sprintf("%d",{pair[2]}))
elsif pair[1]=KIND_LOCAL or pair[1]=KIND_NONLOCAL then
frame=and_bits(pair[2],#FF00)/#100
id=and_bits(pair[2],#FF)
for i=1 to length(locals) do
if locals[i][VAR_FRAME]=frame and locals[i][VAR_ID]=id then
return locals[i][CMD_TEXT]
end if
end for
simple_error(sprintf_utf("HSpeak bug: decompiler found illegal variable ID "&COLYEL&"%d*256+%d"&COLRED,{frame,id}))
elsif pair[1]=KIND_GLOBAL then
at=find(pair[2],global_list[PAIR_NUM])
if at then
return(global_list[PAIR_NAME][at])
else
simple_error(sprintf_utf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]}))
end if
elsif pair[1]=KIND_FLOW then
return(seek_string_by_id(pair[2],flow_list,"flow control structure"))
elsif pair[1]=KIND_SCRIPT then
return(seek_string_by_id(pair[2],script_list,"user script"))
elsif pair[1]=KIND_FUNCTION then
return(seek_string_by_id(pair[2],function_list,"hardcoded function"))
elsif pair[1]=KIND_MATH then
return(seek_string_by_id(pair[2],math_list,"built-in function"))
else
simple_error(sprintf_utf("HSpeak bug: decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]}))
end if
end function
---------------------------------------------------------------------------
function binstring_to_int(sequence encoded)
--bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles
atom temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000
if and_bits(temp,#80000000) then
return(temp-#100000000)
end if
return(temp)
end function
---------------------------------------------------------------------------
function read_word(sequence encoded)
--opposite of output_word
integer temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100
if and_bits(temp,#8000) then
return(temp-#10000)
end if
return(temp)
end function
---------------------------------------------------------------------------
function dump_script_binary(sequence bin, integer offset, integer depth, VarList locals)
string result
sequence kind_and_id
integer kind
integer argcount
integer new_offset
result=""
kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])}
kind=kind_and_id[1]
result&=sprintf_utf("%s%s",{
repeat(' ',depth) --indent
,name_lookup(kind_and_id,locals)
})
if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then
argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11])
if argcount then
result&="(\n"
for i=0 to argcount-1 do
new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3])
result&=dump_script_binary(bin,new_offset,depth+2,locals)
end for
result&=repeat(' ',depth)&")\n"
else
result&="()\n"
end if
else
result&="\n"
end if
return(result)
end function
---------------------------------------------------------------------------
function dump_script_tree(NodeList tree, integer depth)
string result
result=""
for i=1 to length(tree) do
result&=sprintf_utf("%s%s",{
repeat(' ',depth)--indent
,tree[i][TREE_TRUNK][CMD_TEXT]
})
if length(tree[i][TREE_BRANCHES])>0 then
result&="(\n"
result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2)
result&=repeat(' ',depth)&")\n"
else
result&="\n"
end if
end for
return(result)
end function
---------------------------------------------------------------------------
function dump_script_strings(sequence bin)
string result
integer table_start
integer offset
integer len
result=""
table_start=binstring_to_int(bin[9..12])
if table_start=0 then
return("")
end if
bin=bin[table_start+1..$]
offset=1
while offset1 then
debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt")
else
debug_file="hs_debug.txt"
end if
fh=open(debug_file,"w")
if fh!=failure then
wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file})
-------------------------------------
print_utf(fh,"[Scripts]\n",{})
dump_script_and_function_info(fh,script_list)
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Global Variables]\n",{})
for i=1 to length(global_list[PAIR_NUM]) do
src_position=decode_srcpos(global_list[GLB_POS][i])
print_utf(fh,"%s %d\tID=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,global_list[PAIR_NUM][i]
,global_list[PAIR_NAME][i]
})
end for
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Builtin Functions]\n",{})
dump_script_and_function_info(fh,function_list)
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Operators]\n",{})
for i=1 to length(operator_list) do
src_position=decode_srcpos(operator_list[i][OPER_POS])
print_utf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,operator_list[i][PAIR_NAME]
,operator_list[i][OPER_TRUENAME]
,operator_list[i][PAIR_NUM]
})
end for
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Script Dumps]\n",{})
for i=1 to length(all_scripts) do
script=all_scripts[i]
src_position=decode_srcpos(script[SCRIPT_POS])
print_utf(fh,"%s %d\tID=%d\t%s (%s)\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,script[SCRIPT_ID]
,script[SCRIPT_NAME]
,script[SCRIPT_FULL_NAME]
})
idx=script[SCRIPT_PARENT_IDX]
if idx then
src_position=decode_srcpos(script[SCRIPT_POS])
print_utf(fh,"%s %d\tparent=ID %d\t%s (access to %d nonlocals)\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[idx][SCRIPT_ID]
,all_scripts[idx][SCRIPT_NAME]
,script[SCRIPT_NUM_NONLOCALS]
})
end if
src_position=decode_srcpos(script[SCRIPT_TRIGGER_TOK][CMD_POS])
print_utf(fh,"%s %d\tTrigger=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,script[SCRIPT_TRIGGER_ID]
,script[SCRIPT_TRIGGER_TOK][CMD_TEXT]
})
locals=script[SCRIPT_VARIABLES]
for j=1 to length(locals) do
src_position=decode_srcpos(locals[j][CMD_POS])
print_utf(fh,"%s %d\tvar(frame %d id %d)=%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,locals[j][VAR_FRAME]
,locals[j][VAR_ID]
,locals[j][CMD_TEXT]
})
end for
print_utf(fh,"%d bytes compiled\n",{length(script[SCRIPT_BINARY])})
-- print_utf(fh,"%s\n\n",{dump_script_tree(script[SCRIPT_AST],0)})
print_utf(fh,"%s",{dump_script_binary(script[SCRIPT_BINARY][CODE_START_BYTE_OFFSET+1..$],0,0,script[SCRIPT_VARIABLES])})
print_utf(fh,"%s\n\n",{dump_script_strings(script[SCRIPT_BINARY])})
end for
print_utf(fh,"\n",{})
-------------------------------------
close(fh)
else
wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file})
end if
end if
end procedure
---------------------------------------------------------------------------
function get_cmd_depth(integer ptr, TokenList data, integer depth)
TokenList result
Token this
result={}
while true do
--if get_key()=27 then abort(1/0) end if
if ptr>length(data) then
src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
depth-=1
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
end if
if depth=0 then
exit --break out of the while
else
result=append(result,this)
end if
end while
return({ptr,result})
end function
---------------------------------------------------------------------------
--Look up the name of a script in all current scopes, returning all_scripts
--index if found, or 0 if not
function lookup_scoped_script_name(string name)
string s
integer at
integer idx
idx=cur_script_num
while true do
if idx then
s=all_scripts[idx][SCRIPT_FULL_NAME]&"<-"&name
else
s=name
end if
at=find_in_column(s,all_scripts,SCRIPT_FULL_NAME)
if at then
return at
end if
if idx=0 then
return 0
end if
idx=all_scripts[idx][SCRIPT_PARENT_IDX]
end while
end function
---------------------------------------------------------------------------
--identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis
function what_kind_and_id(Token command, VarList local_vars)
integer kind
integer at
atom id
integer keyword
string s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_int32(s) then
kind=KIND_NUMBER
id=string_to_object(s,{})
elsif length(s)=0 then
kind=KIND_PARENS
id=0
elsif s[1] = '@' then
kind=KIND_REFERENCE
id=0 -- ID always resolves to 0 for references here, since it is too early to know all
-- script IDs (really?). The real work is done in binary_compile_recurse
elsif find_in_column(s,local_vars,CMD_TEXT) then
at=find_in_column(s,local_vars,CMD_TEXT)
if local_vars[at][VAR_FRAME]=0 then
kind=KIND_LOCAL
else
kind=KIND_NONLOCAL
end if
id=local_vars[at][VAR_ID] + 256*local_vars[at][VAR_FRAME]
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
id=global_list[PAIR_NUM][find(s,global_list[PAIR_NAME])]
elsif keyword=RESERVE_FLOW then
kind=KIND_FLOW
id=flow_list[find_in_column(s,flow_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
id=function_list[find_in_column(s,function_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
id=script_list[find_in_column(s,script_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
id=math_list[find_in_column(s,math_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_MACRO then
kind=KIND_MACRO
id=0
elsif keyword=RESERVE_KEYWORD then
kind=KIND_KEYWORD
id=0
elsif s[1] = '"' then
src_error(
sprintf_utf(
"The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
{shorten_string(s,15)}),
command[CMD_POS]
)
else
--It may be a subscript, in which case the scoped name needs to be looked up
at=lookup_scoped_script_name(s)
if at then
kind=KIND_SCRIPT
id=all_scripts[at][SCRIPT_ID]
else
src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
end if
return({kind,id})
end function
---------------------------------------------------------------------------
--identify the kind of a text command
function what_kind(Token command, VarList local_vars, integer look_for_operators)
integer at
integer kind
integer keyword
string s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_int32(s) then
kind=KIND_NUMBER
elsif length(s)=0 then
kind=KIND_PARENS
elsif s[1] = '@' then
kind=KIND_REFERENCE
elsif find_in_column(s,local_vars,CMD_TEXT) then
at=find_in_column(s,local_vars,CMD_TEXT)
if local_vars[at][VAR_FRAME]=0 then
kind=KIND_LOCAL
else
kind=KIND_NONLOCAL
end if
elsif look_for_operators and find_in_column(s,operator_list,PAIR_NAME) then
kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
elsif keyword=RESERVE_FLOW or keyword=RESERVE_BEGIN or keyword=RESERVE_END then
kind=KIND_FLOW
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
elsif keyword=RESERVE_MACRO then
kind=KIND_MACRO
elsif keyword=RESERVE_KEYWORD then
kind=KIND_KEYWORD
elsif s[1] = '"' then
src_error(
sprintf_utf(
"The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
{shorten_string(s,15)}),
command[CMD_POS]
)
else
--It may be a subscript, in which case the scoped name needs to be looked up
at=lookup_scoped_script_name(s)
if at then
kind=KIND_SCRIPT
else
src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
end if
return(kind)
end function
---------------------------------------------------------------------------
--Check whether a math operator is not an assignment, a compare operator, or 'random'
function is_simple_math_op(integer id)
return (id >= 1 and id <= 9) or id >= 19
end function
---------------------------------------------------------------------------
--Given name (CMD_TEXT) of a Node, return whether it is a block: all its arguments are
--statements as opposed to expressions (if, for, while, switch, which take a mix need special handling)
function block_command(string name)
return compare("do",name)=0 or compare("then",name)=0 or compare("else",name)=0
end function
---------------------------------------------------------------------------
procedure check_is_expression(Node node, string parent)
if alpha_tree_data(reserved,node[TREE_TRUNK][CMD_TEXT],0)=RESERVE_FLOW then
src_error(
sprintf_utf(COLYEL&"%s"&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&"%s"&COLRED&")"
,{node[TREE_TRUNK][CMD_TEXT],parent})
,node[TREE_TRUNK][CMD_POS])
end if
end procedure
---------------------------------------------------------------------------
--this function not used anywhere
function how_many_args(string name, integer kind)
integer result
integer at
if kind=KIND_PARENS then
result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P
elsif kind=KIND_FLOW then
result=-1 --flow supports an unknown number of args
elsif kind=KIND_OPERATOR then
result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions
elsif kind=KIND_MATH then
at=find_in_column(name[CMD_TEXT],math_list,PAIR_NAME)
result=length(math_list[at][FUNC_ARGS])
elsif kind=KIND_FUNCTION then
at=find_in_column(name[CMD_TEXT],function_list,PAIR_NAME)
if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then
result=-1
else
result=length(function_list[at][FUNC_ARGS])
end if
elsif kind=KIND_SCRIPT then
at=find_in_column(name[CMD_TEXT],script_list,PAIR_NAME)
result=length(script_list[at][FUNC_ARGS])
else
--numbers, variables, etc do not permit args
result=0
end if
return(result)
end function
---------------------------------------------------------------------------
--Something that potentially takes args
function takes_args(integer kind)
if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO or kind=KIND_KEYWORD then
return(true)
else
return(false)
end if
end function
---------------------------------------------------------------------------
--Get next AST node from a list of tokens by pairing up begin,end
--Returns {new_ptr,{token,children}} where children is a TokenList not a NodeList
function get_script_node(integer ptr, TokenList data, VarList vars)
Token command
sequence ptr_and_block
TokenList block
integer kind
integer followed_by_begin
block={}
command=data[ptr]
ptr+=1
if compare("end",command[CMD_TEXT])=0 then
src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS])
elsif compare("begin",command[CMD_TEXT])=0 then
--floaty brackets for order-of-operations-enforcement
ptr-=1
command[CMD_TEXT]=""
end if
kind=what_kind(command,vars,true)
followed_by_begin=false
if ptr<=length(data) then
--there is room for args
followed_by_begin=equal("begin",data[ptr][CMD_TEXT])
--distinguishing between functions with and without args means wait() would be ok but noop() would not
if takes_args(kind) then
if followed_by_begin then
--yes, it has args
ptr+=1--only increment the pointer when we have args
ptr_and_block=get_cmd_depth(ptr,data,1)
ptr=ptr_and_block[1]
block=ptr_and_block[2]
if length(block)=0 and kind=KIND_PARENS then
src_error("found empty parentheses not associated with a function call",command[CMD_POS])
end if
end if
end if
end if
if kind=KIND_FLOW then
--Require certain flow control types to be followed by parentheses.
--We have to check this here or earlier, because after building the parse tree
--there will be no difference between "()" and omitted parentheses.
--(Note that several later checks in normalize_flow_control are redundant to this)
--FIXME: Ideally we wouldn't actually check anything here, but instead preserve presence
--of empty parentheses to check later (it will matter in future anyway according to typing plan)
--FIXME: excludes else, because of 'case(else)'
if find(command[CMD_TEXT],flow_requiring_brackets) then
if not followed_by_begin then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" must be followed by "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{command[CMD_TEXT]}),command[CMD_POS])
end if
end if
--else has a special check for 'else,if' mistake.
--'not followed_by_begin' excludes 'else(...)if'
if equal("else",command[CMD_TEXT]) and ptr<=length(data) and equal("if",data[ptr][CMD_TEXT]) and not followed_by_begin then
src_error(COLYEL&"else if"&COLRED&" may not be written with a separator in-between",command[CMD_POS])
--temp=get_script_node(ptr, data, vars) --hack to allow if necessary
--temp[2][1][CMD_TEXT]="elseif"
--return temp
end if
end if
return({ptr,{command,block}})
end function
---------------------------------------------------------------------------
--Returns a NodeList
function build_ast(TokenList script_body_toks, VarList vars)
integer ptr
sequence ptr_and_seminode
sequence seminode --Not a Node, has a TokenList instead of NodeList
Node node
NodeList result
result={}
ptr=1
while true do
ptr_and_seminode=get_script_node(ptr,script_body_toks,vars)
ptr=ptr_and_seminode[1]
seminode=ptr_and_seminode[2]
if length(seminode[TREE_BRANCHES])>0 then
--this seminode has arguments that need parsing
node={seminode[TREE_TRUNK],build_ast(seminode[TREE_BRANCHES],vars)}
else
node=seminode
end if
result=append(result,node)
if ptr>length(script_body_toks) then
exit --break out of while when there is no more data
end if
end while
return(result)
end function
---------------------------------------------------------------------------
--When variables are inherited by a subscript all frame numbers need to be incremented
function shift_variables_frame(VarList vars)
for i=1 to length(vars) do
vars[i][VAR_FRAME]+=1
end for
return vars
end function
---------------------------------------------------------------------------
--Returns new VarList of locals.
function add_local_variable(VarList vars, Token var)
integer id
sequence src_position
check_undefined_string(var,"local variable name")
--Search for an existing local with the same name. Ignore non-locals, because they are shadowed
for idx=1 to length(vars) do
if equal(vars[idx][CMD_TEXT],var[CMD_TEXT]) then
if vars[idx][VAR_FRAME]=0 then --is local
src_position=decode_srcpos(vars[idx][CMD_POS])
src_error(
sprintf_utf(
"Local variable/argument "&COLYEL&"%s"&COLRED&" is already defined in line %d of "&COLPNK&"%s"&COLRED,
{var[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]}
)
,var[CMD_POS]
)
end if
end if
end for
--Always add to scope of current script (frame == 0)
id=length(vars)-all_scripts[cur_script_num][SCRIPT_NUM_NONLOCALS]
--Extend the Token into a VarToken, and prefix to the list, so that it takes
--precedence over shadowed variables
return {var&{0,id}}&vars
end function
---------------------------------------------------------------------------
--Find the variable declarations in a script
function gather_local_vars(VarList vars, TokenList data)
Token this
integer ptr
ptr=1
while true do
this=data[ptr]
ptr+=1
if compare("variable",this[CMD_TEXT])=0 then
if ptr>length(data) then
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("begin",this[CMD_TEXT])=0 then
while true do
if ptr>length(data) then
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
exit--break the while
end if
vars=add_local_variable(vars,this)
end while
else
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
end if
if ptr>length(data) then
exit --break out of the while
end if
end while
return(vars)
end function
---------------------------------------------------------------------------
--There are only two, hardcoded, macros right now: tracevalue, assert.
function expand_macros(NodeList tree)
NodeList newtree
NodeList newargs
string new_string
Token string_token
sequence src_position
sequence temp
srcpos pos
integer ptr
ptr=1
while ptr<=length(tree) do
--recurse first, because tree[ptr] might be split up (as in 'assert')
if length(tree[ptr][TREE_BRANCHES]) then
tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES])
end if
if equal("tracevalue",tree[ptr][TREE_TRUNK][CMD_TEXT]) then
newargs={}
tree[ptr][TREE_TRUNK][CMD_TEXT]="tracevalueinternal"
pos=tree[ptr][TREE_TRUNK][CMD_POS]
for i=1 to length(tree[ptr][TREE_BRANCHES]) do
new_string=tree_original_text(tree[ptr][TREE_BRANCHES][i])
string_list=append(string_list,new_string)
string_token={sprintf("@$string%d",{length(string_list)}),pos}
newargs&={{string_token,{}},tree[ptr][TREE_BRANCHES][i]}
end for
tree[ptr][TREE_BRANCHES]=newargs
end if
if equal("assert",tree[ptr][TREE_TRUNK][CMD_TEXT]) then
--assert(condition) --> if (not(condition)) then ($assert expression string="condition", assertfailure)
pos=tree[ptr][TREE_TRUNK][CMD_POS]
src_position=decode_srcpos(pos)
if length(tree[ptr][TREE_BRANCHES])!=1 then
src_error(sprintf_utf(COLYEL&"assert"&COLRED&" statement takes exactly one argument: a condition. It has %d",{length(tree[ptr][TREE_BRANCHES])}),pos)
end if
newtree={{{"if",pos},{}}, {{"then",pos},{}}}
newtree[1][TREE_BRANCHES]={{{"not",pos},{tree[ptr][TREE_BRANCHES][1]}}}
new_string=sprintf_utf("%s:%d: %s",{src_position[POS_FILENAME],src_position[POS_LINE],tree_original_text(tree[ptr][TREE_BRANCHES][1])})
string_list=append(string_list,new_string)
string_token={sprintf("@$string%d",{length(string_list)}),pos}
temp={{{"setstringfromtable",pos},{}}, {{"assertfailure",pos},{}}}
temp[1][TREE_BRANCHES]={{{enforce_constants("assertexpressionstring"),pos},{}},{string_token,{}}} --stringnum, string
newtree[2][TREE_BRANCHES]=temp
tree=replace_slice(tree,ptr,ptr,newtree) --tree[ptr..ptr]=newtree
ptr+=1
end if
ptr+=1
end while
return(tree)
end function
---------------------------------------------------------------------------
--tree[ptr] is a partially constructed if Node (lacking else child), followed either by an else or elseif Node.
--Handle else/elseif, parenting to tree[pt], and return the new tree.
function build_else_node(NodeList tree, integer ptr)
NodeList elseargs
string nextcmd
integer eptr
srcpos pos
nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr+1][TREE_TRUNK][CMD_POS]
if equal("else",nextcmd) then
--found else
tree[ptr][TREE_BRANCHES]&={tree[ptr+1]}
tree=delete_element(tree,ptr+1)
else -- equal("elseif",nextcmd)
--Split off the if
elseargs={ {{"if",pos},tree[ptr+1][TREE_BRANCHES]} }
--Grab the rest of this if,elseif,...,else chain, to put inside the else
eptr=ptr+2
while eptr<=length(tree) do
nextcmd=tree[eptr][TREE_TRUNK][CMD_TEXT]
--Stop when we see an else but otherwise we don't check correctness until we recurse
if equal(nextcmd,"else") then
eptr+=1
exit
elsif find(nextcmd,{"elseif","then"}) then
--continue
else
exit
end if
eptr+=1
end while
elseargs&=tree[ptr+2..eptr-1]
tree=tree[1..ptr]&tree[eptr..$]
tree[ptr][TREE_BRANCHES]&={ {{"else",pos},elseargs} }
end if
return tree
end function
---------------------------------------------------------------------------
--Build switch AST Nodes, return new tree
function build_switch_node(NodeList tree, VarList vars, integer ptr)
srcpos pos
srcpos argpos
NodeList args
Token thiscmd
NodeList rawargs
NodeList caseargs
Token casecmd
string lastnodename
sequence collected --either a Node or {}
sequence temp
integer have_else
srcpos elsepos
integer expect_case
integer kind
atom id
string else_error_msg
else_error_msg=COLYEL&"else"&COLRED&" case should be placed last inside a "&COLYEL&"switch"&COLRED&" block"
pos=tree[ptr][TREE_TRUNK][CMD_POS]
args=tree[ptr][TREE_BRANCHES] --start with the condition, length 1
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&"switch"&COLRED&" statement has %d expressions. It should have only one. (The body of the switch should go in its "&COLYEL&"do(...)"&COLRED&" block instead.)"
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(COLYEL&"switch"&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED,pos)
end if
check_is_expression(args[1],"switch")
if ptr>=length(tree) then
src_error(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,pos)
end if
thiscmd=tree[ptr+1][TREE_TRUNK]
if equal("do",thiscmd[CMD_TEXT])=0 then
src_error(sprintf_utf(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{thiscmd[CMD_TEXT]}),thiscmd[CMD_POS])
end if
--found do, processing starts here (build arguments to switch, after the expression)
rawargs=tree[ptr+1][TREE_BRANCHES] --contents of the 'do'
lastnodename=""
have_else=false
expect_case=false
collected={}
for j=1 to length(rawargs) do
thiscmd=rawargs[j][TREE_TRUNK]
argpos=thiscmd[CMD_POS]
if j=1 and equal("case",thiscmd[CMD_TEXT])=0 then
src_error(sprintf_utf(COLYEL&"switch() do("&COLRED&" should be followed with a "&COLYEL&"case"&COLRED&", not with "&COLYEL&"%s"&COLRED&"."
,{thiscmd[CMD_TEXT]}),argpos)
end if
--to resolve the ambiguity of an else, we always suppose that they belong to an 'if' (or elseif) if possible
if equal("else",thiscmd[CMD_TEXT]) and find(lastnodename,{"if","then","elseif"})=0 then
--Old syntax else(...) block
if length(collected) then
args&={collected}
collected={}
end if
if have_else then
src_error(else_error_msg,elsepos)
end if
expect_case=false
if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
--Last case is new-style syntax and wasn't followed by any statements
--Lets just disallow this to prevent a little confusion
src_error(COLYEL&"case(...) else(...)"&COLRED&" is ambiguous. "&
"Write either "&COLYEL&"case(..., else) ..."&COLRED&" or "&COLYEL&"case(...) do() else(...)"&COLRED&" as fits your intentions."
,rawargs[j-1][TREE_TRUNK][CMD_POS])
end if
if j!=length(rawargs) then
if length(rawargs[j][TREE_BRANCHES])=0 then
else_error_msg&=". Did you forget to enclose the else block with brackets, like "&COLYEL&"else(...)"&COLRED&"?"
end if
src_error(else_error_msg,argpos)
end if
--convert it to a do
args&={ {{"do",argpos},rawargs[j][TREE_BRANCHES]} }
elsepos=argpos
have_else=true
elsif equal("case",thiscmd[CMD_TEXT]) then
if length(collected) then
args&={collected}
collected={}
end if
if have_else then
--Note we disallow case(else) case(1) but allow case(else,1)
src_error(else_error_msg,elsepos)
end if
expect_case=false
caseargs=rawargs[j][TREE_BRANCHES]
--Call a couple passes early here, so that constant expressions are allowed
caseargs=collapse_floaty_brackets(caseargs)
caseargs=optimize_script(caseargs,vars)
--Check contents are only either integers, or "else", and append to args
for k=1 to length(caseargs) do
casecmd=caseargs[k][TREE_TRUNK]
if equal(casecmd[CMD_TEXT],"else") then
if have_else then
src_error(COLYEL&"else"&COLRED&" appears multiple times inside this "&COLYEL&"switch"&COLRED&" block",casecmd[CMD_POS])
end if
have_else=true
elsepos=argpos
--don't emit anything: next do is the 'else' one
else
temp=what_kind_and_id(casecmd,vars)
kind=temp[1]
id=temp[2]
if kind=KIND_FLOW or (kind=KIND_MATH and is_simple_math_op(id)=false) then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is not allowed inside a "&COLYEL&"case"&COLRED
,{casecmd[CMD_TEXT]}),casecmd[CMD_POS])
end if
args&={caseargs[k]}
end if
end for
else
--Disallow "case() do() garbage"
if expect_case then
src_error(sprintf_utf("Expected "&COLYEL&"case"&COLRED&" or "&COLYEL&"else"&COLRED&" to follow "&COLYEL&"case(...) do(...)"&COLRED&" but found "&COLYEL&"%s"&COLRED,{thiscmd[CMD_TEXT]}),argpos)
end if
if length(collected)=0 then
--Start collecting into a do. But if it is a do (old syntax), don't double encapsulate it
if equal("do",thiscmd[CMD_TEXT]) then
args&={rawargs[j]}
expect_case=true
else
collected={{"do",argpos},{rawargs[j]}}
end if
else
--Continue collecting
collected[TREE_BRANCHES]&={rawargs[j]}
end if
end if
lastnodename=thiscmd[CMD_TEXT]
end for
if length(collected) then
args&={collected}
end if
if length(args)>1 and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
--Last case (possibly an else) is new-style syntax and wasn't followed by any statements,
--so add the missing do. Don't need to do this, but I'd like to keep things tidy
--(srcpos is of the last case)
args&={ {{"do",argpos},{}} }
end if
if have_else=false then
--Insert a dummy do default block since else has been left out
args&={ {{"do",pos},{}} }
end if
tree[ptr][TREE_BRANCHES]=args
--delete the do block
return delete_element(tree,ptr+1)
end function
---------------------------------------------------------------------------
--Given a variable token for 'for' or 'setvariable', etc. (parent) and list of locals/non-locals 'vars',
--return a an integer ID token, or throw an error if not a variable.
function convert_variable_to_id(Token var, VarList locals, string parent)
sequence kind_and_id
atom id
integer argkind
integer var_at
kind_and_id=what_kind_and_id(var,locals)
argkind=kind_and_id[1]
id=kind_and_id[2]
if argkind=KIND_LOCAL or argkind=KIND_NONLOCAL then
id=-1-id
elsif argkind=KIND_GLOBAL then
--id unmodified
else
--It's not a variable. bad!
src_error(sprintf_utf(
"first argument of "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED
,{parent,KIND_LONGNAMES[argkind],var[CMD_TEXT]}
),var[CMD_POS])
end if
return {sprintf("%d",{id}),var[CMD_POS]}
end function
---------------------------------------------------------------------------
--parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements
--Note that several of these checks are now redundant to the check for 'begin' performed in get_script_node
function normalize_flow_control(NodeList tree, VarList vars, string parent)
integer ptr
string s
string nextcmd
NodeList args
Token ctr
srcpos pos
integer argkind
integer var_at
integer keyword
ptr=1
while ptr<=length(tree) do
s=tree[ptr][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr][TREE_TRUNK][CMD_POS]
args=tree[ptr][TREE_BRANCHES]
keyword=alpha_tree_data(reserved,s,0)
if keyword=RESERVE_FLOW then
if equal(s,"then") or equal(s,"else") or equal(s,"do") then
--these are checked for correct parents below.
elsif block_command(parent)=false then
src_error(COLYEL&s&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&parent&COLRED&")",pos)
end if
end if
if compare("if",s)=0 then
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions"
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(sprintf_utf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
check_is_expression(args[1],s)
if ptr1 then
src_error(sprintf_utf(
COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions"
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(sprintf_utf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
check_is_expression(args[1],s)
if ptr4 then
src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(args)}),args[5][TREE_TRUNK][CMD_POS])
elsif length(args)=3 then
--append default step value
args&={ {{"1",pos},{}} }
end if
ctr=args[1][TREE_TRUNK]
argkind=what_kind(ctr,vars,true)
if argkind=KIND_LOCAL then
used_locals=append(used_locals,ctr[CMD_TEXT])
elsif argkind=KIND_GLOBAL then
--warn, then translate into a numeric reference to a variable
src_warn(sprintf_utf(
"Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop"
,{ctr[CMD_TEXT]}
),ctr[CMD_POS])
end if
args[1][TREE_TRUNK]=convert_variable_to_id(ctr,vars,"for")
check_is_expression(args[2],s)
check_is_expression(args[3],s)
check_is_expression(args[4],s)
--Handle "do"
if ptr1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have only one."
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(sprintf_utf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit script"&COLRED,{}),pos)
end if
elsif compare("break",s)=0 or compare("continue",s)=0 then
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one."
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
--append default value
tree[ptr][TREE_BRANCHES]={{{"1",pos},{}}}
end if
elsif compare("exitscript",s)=0 then
if length(args)>0 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit returning"&COLRED
,{length(args)}
),args[1][TREE_TRUNK][CMD_POS])
end if
elsif compare("switch",s)=0 then
tree=build_switch_node(tree,vars,ptr)
elsif compare("case",s)=0 then
src_error(COLYEL&"case"&COLRED&" is not allowed outside of "&COLYEL&"switch"&COLRED,pos)
elsif compare("subscript",s)=0 or compare("variable",s)=0 then
--Does nothing; remove. (Kept until now to check they isn't placed in a silly place)
tree=delete_element(tree,ptr)
continue
end if
if equal(s,"") then
--Floaty brackets (they haven't been removed yet). Look to parent instead.
s=parent
end if
tree[ptr][TREE_BRANCHES]=normalize_flow_control(tree[ptr][TREE_BRANCHES],vars,s)
ptr+=1
end while
return(tree)
end function
---------------------------------------------------------------------------
--A modification of Dijkstra's shunting-yard algorithm
--(This could probably subsume build_ast too)
function convert_operators(NodeList tree)
NodeList new_tree
sequence stack --{int,Node} pairs
Node opnode
integer ptr
integer at
integer priority
integer nonoperator_before
stack={}
new_tree={}
ptr=1
nonoperator_before=false
while ptr<=length(tree) do
--for each peer branch on the tree
at=find_in_column(tree[ptr][TREE_TRUNK][CMD_TEXT],operator_list,PAIR_NAME)
if at>0 then
if nonoperator_before=false then
--there is no room for the before-operand. Bad!
src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its left-side operand (did not expect to see an operator here)",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
elsif ptr=length(tree) then
--there is no room for the after-operand. Bad!
src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its right-side operand",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
end if
priority=operator_list[at][PAIR_NUM]
--convert the operator to its true function name (which might be the same)
tree[ptr][TREE_TRUNK][CMD_TEXT]=operator_list[at][OPER_TRUENAME]
while length(stack) and priority>=stack[$][1] do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
stack&={{priority,tree[ptr]}}
nonoperator_before=false
else
if nonoperator_before then
--Two non-ops in a row means moved onto the new argument in this argument list, so finish processing operators first
while length(stack) do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
end if
tree[ptr][TREE_BRANCHES]=convert_operators(tree[ptr][TREE_BRANCHES])
new_tree&={tree[ptr]}
nonoperator_before=true
end if
ptr+=1
end while
--copy of above
while length(stack) do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
return new_tree
end function
---------------------------------------------------------------------------
--Ensure correct number of arguments, add defaults, and translate variables to IDs in assignments
function fix_arguments(Node tree, integer kind, integer id, sequence list, VarList vars)
integer at
integer argnum,maxargs
NodeList args
string funcname
srcpos funcpos
funcname=tree[TREE_TRUNK][CMD_TEXT]
funcpos=tree[TREE_TRUNK][CMD_POS]
args=tree[TREE_BRANCHES]
argnum=length(args)
--Lookup ID because name might be scoped
at=find_in_column(id,list,PAIR_NUM)
if equal(list[at][FUNC_ARGS],VAR_ARGS) then
return(args)
else
maxargs=length(list[at][FUNC_ARGS])
end if
if maxargs < argnum then
src_error(sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" takes at most %d arguments but is being passed %d arguments"
,{KIND_LONGNAMES[kind],funcname,maxargs,argnum}
),args[maxargs+1][TREE_TRUNK][CMD_POS])
elsif maxargs > argnum then
--add defaults if not enough args are present
if kind=KIND_MATH then
--special processing for math
if list[at][PAIR_NUM]<=15 or list[at][PAIR_NUM]>=19 then
--math shouldnt have defaults
src_error(sprintf_utf(
"math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d"
,{funcname,argnum,maxargs}
),funcpos)
else
--variable assignment commands can have defaults
if argnum = 0 then
--no defaults for first argument of variable function
src_error(sprintf_utf(
"variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1"
,{funcname,argnum}
),funcpos)
elsif argnum = 1 then
--make defaults for second arg of variable function
if list[at][PAIR_NUM]=16 then
--setvariable
args=append(args,{
{"0",funcpos}
,{}
})
else
--increment and decrement
args=append(args,{
{"1",funcpos}
,{}
})
end if
end if
end if
else
--normal processing for script and function
for i=argnum+1 to maxargs do
if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then
src_error(sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d"
,{KIND_LONGNAMES[kind],funcname,i}
),funcpos)
end if
args=append(args,{
{sprintf("%d",{list[at][FUNC_ARGS][i]}),funcpos}
,{}
})
end for
end if
end if
--Translate variables in the variable manipulation functions
if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<=18 then
args[1][TREE_TRUNK]=convert_variable_to_id(args[1][TREE_TRUNK],vars,funcname)
end if
return(args)
end function
---------------------------------------------------------------------------
--Calls fix_arguments on each non-flow node with arguments
function normalize_arguments(NodeList tree, VarList vars)
sequence kind_and_id
integer kind
atom id
--Unlike the flow normalization and operator translation we do not insert/delete elements from
--the current level, so we can use a "for" safely, and dont need a "while"
for i=1 to length(tree) do
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_SCRIPT then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,script_list,vars)
elsif kind=KIND_FUNCTION then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,function_list,vars)
elsif kind=KIND_MATH then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,math_list,vars)
end if --number, flow, global, local, parens need no argchecking
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order
function output_word(integer n)
integer b1,b2
b1=and_bits(n,#FF)
b2=floor(and_bits(n,#FFFF)/256)
return({b1,b2})
end function
---------------------------------------------------------------------------
function convert_to_bytes(sequence s)
sequence result
result={}
for i=1 to length(s) do
result&=int_to_bytes(s[i])
end for
return(result)
end function
---------------------------------------------------------------------------
--Encode a unicode (UTF32/native Euphoria) string in the format the engine
--requires, which is currently Latin-1
function encode_ohr(string string, integer maxbytes=0)
for i=1 to length(string) do
if string[i]>=256 then
string[i]='?'
end if
end for
if maxbytes and maxbytes7 and equal("$string",str_temp[1..7]) then
value_temp=value(str_temp[8..length(str_temp)])
result&=KIND_NUMBER
result&=table_string(value_temp[2])
else
src_error(sprintf_utf("Reference "&COLYEL&"@%s"&COLRED&" not understood: references must be the name of a global variable or a script",{str_temp}),cmdpos)
end if
end if
end if
elsif kind=KIND_GLOBAL then
result&=kind
result&=id
elsif kind=KIND_LOCAL or kind=KIND_NONLOCAL then
result&=kind
result&=id
elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then
--Use id from what_kind_and_id
if kind=KIND_FUNCTION then
if max_used_function= initial_data_len then
can_reference=false
end if
result[3+i]=recurse_result
end for
--Now either remove or replace the temporary stuff
if can_reference then
compiled_data=compiled_data[1..initial_data_len]
else
compiled_data[initial_data_len+1 .. initial_data_len+length(result)]=result
return(initial_data_len)
end if
else
src_error(sprintf_utf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),cmdpos)
end if
if not fast_mode and can_reference then
at=match(result,compiled_data)
if at>0 then
if initial_data_len!=length(compiled_data) then
simple_error("compiler bug! Unexpected compiled_data length")
end if
--found existing data exactly like this command, so just return a reference to it
if at=1 then
simple_error("compiler bug! node matches root node")
end if
return(at-1)
end if
end if
--append data directly
compiled_data&=result
return(initial_data_len)
end function
---------------------------------------------------------------------------
function binary_compile(Script script)
NodeList tree
sequence result
integer recurse_ret
integer parent_idx
integer parent_id
parent_idx=script[SCRIPT_PARENT_IDX]
if parent_idx=0 then
parent_id=0
else
parent_id=all_scripts[parent_idx][SCRIPT_ID]
end if
string_table={}
compiled_data={}
--binary data is in a mix of 16-bit and 32-bit signed words.
--the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit.
--output header
--0..1: the zero-rooted byte-offset of the first executable code byte
result=output_word(CODE_START_BYTE_OFFSET)
--2..3: the number of local variables (including args but not nonlocals)
result&=output_word(length(script[SCRIPT_VARIABLES]) - script[SCRIPT_NUM_NONLOCALS])
--4..5: the number of arguments the script takes (also in SCRIPTS.TXT)
result&=output_word(length(script[SCRIPT_ARG_TOKS]))
--6..7: the format of the command data (presently used to indicate 32-bit encoding)
result&=output_word(HSZ_FORMAT_VERSION)
--8..11: a 32bit offset to the string literal table (in bytes), we don't know it yet
result&=output_word(0)
result&=output_word(0)
--12..13: parent script ID or 0 if none
result&=output_word(parent_id)
--14..15: Nesting depth
result&=output_word(script[SCRIPT_NEST_DEPTH])
--16..17: number of nonlocals
result&=output_word(script[SCRIPT_NUM_NONLOCALS])
--what follows is command data in the format [kindID,Value,argcount,argpointerlist]
--numbers and variables have no argcount or argpointerlist
--an argpointer is the zero-rooted word-offset of the argument relative
--to the start of the executable commands. I realise that this format is
--unnecessarily complicated. I had hoped to get benefits of being able to
--store frequently reused commands only once and then just point to them,
--but in actual practice, it isnt worth the trouble, since the only
--commands that tend to be redundant are the really short ones.
--the first command is always a "do". there can be only one top-level command
tree=script[SCRIPT_AST]
if length(tree)!=1 then
simple_error(sprintf_utf("compiler bug! script tree has %s root nodes",{length(tree)}))
end if
reenter_timing_zone("binary_compile_recurse")
recurse_ret=binary_compile_recurse(tree[1],script[SCRIPT_VARIABLES])
if recurse_ret!=0 then
simple_error(sprintf_utf("compiler bug! Root binary_compile_recurse call returned %d",{recurse_ret}))
end if
exit_timing_zone()
result&=convert_to_bytes(compiled_data)
compiled_data={}
--append with a table of string literals and give its offset
if length(string_table)>0 then
result[9..12]=int_to_bytes(length(result))
result&=string_table
end if
return(result)
end function
---------------------------------------------------------------------------
--floaty brackets are un-needed after the operators have been translated.
function collapse_floaty_brackets(NodeList tree)
integer i
NodeList graft
i=1
while i<=length(tree) do
if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then
--found a floaty-bracket
graft=tree[i][TREE_BRANCHES]
if length(graft)!=1 then
src_error(sprintf_utf("Found unexpected pair of brackets containing %d expressions. Either the brackets should not exist at all, or should only enclose a single expression",{length(graft)}),tree[i][TREE_TRUNK][CMD_POS])
end if
tree[i]=graft[1]
--delete_element(tree,i)
--tree=insert_sequence(tree,graft,i)
else
if length(tree[i][TREE_BRANCHES]) then
tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES])
end if
i+=1
end if
end while
return(tree)
end function
---------------------------------------------------------------------------
--parent is CMD_TEXT of parent node
function sanity_check(NodeList tree, VarList vars, string parent)
string s
sequence kind_and_id
integer kind
atom id
srcpos pos
for i=1 to length(tree) do
s=tree[i][TREE_TRUNK][CMD_TEXT]
pos=tree[i][TREE_TRUNK][CMD_POS]
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if (compare("if",parent)=0) and i=1 then
if kind=KIND_NUMBER then
if id then
src_warn(sprintf_utf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),pos)
else
src_warn("Condition is always false",pos)
end if
elsif kind=KIND_FLOW then
src_warn(sprintf_utf("Should not use flow control command "&COLYEL&"%s"&COLRED&" as condition for if",{s}),pos)
end if
elsif block_command(parent) then
if kind=KIND_NUMBER then
src_warn(sprintf_utf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here."
,{id}),pos)
elsif kind=KIND_GLOBAL then
src_warn(sprintf_utf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),pos)
elsif kind=KIND_LOCAL or kind=KIND_NONLOCAL then
src_warn(sprintf_utf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),pos)
elsif kind=KIND_MATH and (id<=15 or id=19 or id>=22) then
-- setvariable, increment, decrement, logand, logor have side effects
src_warn(sprintf_utf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded"
,{s}),pos)
end if
end if
if kind=KIND_GLOBAL then
if not find(s,used_globals) then
used_globals=append(used_globals,s)
end if
elsif kind=KIND_LOCAL then
if not find(s,used_locals) then
used_locals=append(used_locals,s)
end if
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT])
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--simulate a result overflowing a signed 32 bit register
function overflow_int32(atom val)
--maybe check whether val is so large that this is could be inaccurate?
atom ret
ret=mod(val,#100000000)
if ret>2147483647 then
ret-=#100000000
end if
return(ret)
end function
---------------------------------------------------------------------------
function optimized_arg(Node tree, VarList vars)
sequence kind_and_id
integer kind
atom id
object arg1,arg2
kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_NUMBER then
return(id)
elsif kind=KIND_MATH and (id<=15 or id>=19) then
arg1=optimized_arg(tree[TREE_BRANCHES][1],vars)
if equal(arg1,{}) then
arg1=tree[TREE_BRANCHES][1]
end if
if length(tree[TREE_BRANCHES])>=2 then
arg2=optimized_arg(tree[TREE_BRANCHES][2],vars)
else
arg2=0
end if
if equal(arg2,{}) then
arg2=tree[TREE_BRANCHES][2]
end if
if atom(arg1) and atom(arg2) then
if id=0 then
--random
if arg1=arg2 then
return(arg1)
end if
elsif id=1 then
--exponent
if arg2>0 then
--if too large, Euphoria will signal an overflow, but I'm also worried
--about overflow_int32 giving inaccurate results
if log(abs(arg1)+1)+log(abs(arg2)+1)arg2)
elsif id=14 then
--lessthanorequalto
return(arg1<=arg2)
elsif id=15 then
--greaterthanorequalto
return(arg1>=arg2)
elsif id=19 then
--not
return(arg1=0)
elsif id=22 then
--logxor
return((arg1=0)!=(arg2=0))
elsif id=23 then
--abs
return(overflow_int32(abs(arg1)))
elsif id=24 then
--sign
return((arg1>0)-(arg1<0))
elsif id=25 then
--sqrt
if arg1<0 then
src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS])
end if
return(floor(sqrt(arg1)+0.5))
end if
end if
--have to allow for side effects of shortcut evaluating operators by only checking first arg
if atom(arg1) then
if id=4 then
--multiply
if arg1=0 then
return 0
elsif arg1=1 then
return arg2
end if
elsif id=6 then
--add
if arg1=0 then
return arg2
end if
elsif id=20 then
--logand
if arg1=0 then
return 0
end if
elsif id=21 then
--logor
if arg1!=0 then
return 1
end if
end if
end if
if atom(arg2) then
if id=4 then
--multiply
if arg2=0 then
return 0
elsif arg2=1 then
return arg1
end if
elsif id=5 then
--subtract
if arg2=0 then
return arg1
end if
elsif id=6 then
--add
if arg2=0 then
return arg1
end if
end if
end if
end if
return({}) --return nonatom
end function
---------------------------------------------------------------------------
--goes through a script simplifying expressions that always have the same value
function optimize_script(NodeList tree, VarList vars)
object arg
for i=1 to length(tree) do
arg=optimized_arg(tree[i],vars)
if atom(arg) then
tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg)
tree[i][TREE_BRANCHES]={}
elsif sequence(arg) and length(arg) then
tree[i]=arg
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--FIXME: if a local is used only in a subscript it is reported as unused
procedure warn_unused_locals(VarList vars)
if find('u',optlist) then
for i=1 to length(vars) do
if vars[i][VAR_FRAME]=0 and not find(vars[i][CMD_TEXT],used_locals) then
src_warn(sprintf_utf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS])
end if
end for
end if
end procedure
---------------------------------------------------------------------------
procedure warn_unused_globals()
integer at
if find('u',optlist) then
for i=1 to length(global_list[PAIR_NUM]) do
at=find(global_list[PAIR_NAME][i],used_globals)
if not at then
src_warn(sprintf_utf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],global_list[PAIR_NUM][i]}),global_list[GLB_POS][i])
end if
end for
end if
end procedure
---------------------------------------------------------------------------
--script_idx is all_scripts index
procedure compile_a_script(integer script_idx)
Script script
NodeList script_tree
VarList local_vars
TokenList args
integer num_nonlocals
integer parent_idx
integer trigger_idx
integer trigger
cur_script_num=script_idx
script=all_scripts[script_idx]
trigger_idx=find_in_column(script[SCRIPT_TRIGGER_TOK][CMD_TEXT],trigger_list,PAIR_NAME)
trigger=trigger_list[trigger_idx][PAIR_NUM]
current_script=script[SCRIPT_FULL_NAME]
parent_idx=script[SCRIPT_PARENT_IDX]
reenter_timing_zone("gather_local_vars")
if parent_idx then
local_vars=shift_variables_frame(all_scripts[parent_idx][SCRIPT_VARIABLES])
else
local_vars={}
end if
num_nonlocals=length(local_vars)
script&={
trigger --trigger type
,num_nonlocals
}
all_scripts[script_idx]=script
args=script[SCRIPT_ARG_TOKS] --The arguments are the first several local variables
for i=1 to length(args) do
--Check for name conflicts
local_vars=add_local_variable(local_vars,args[i])
end for
local_vars=gather_local_vars(local_vars,script[SCRIPT_BODY_TOKS])
if length(local_vars)-num_nonlocals > MAX_LOCALS then
simple_error(
sprintf_utf("Script "&COLYEL&"%s"&COLRED&" has %d arguments and variables but at most %d are supported",
{script[SCRIPT_FULL_NAME],length(local_vars)-num_nonlocals,MAX_LOCALS})
)
end if
exit_timing_zone()
used_locals={}
reenter_timing_zone("build_ast")
script_tree=build_ast(script[SCRIPT_BODY_TOKS],local_vars)
exit_timing_zone()
reenter_timing_zone("convert_operators")
script_tree=convert_operators(script_tree)
exit_timing_zone()
reenter_timing_zone("expand_macros")
script_tree=expand_macros(script_tree)
exit_timing_zone()
reenter_timing_zone("normalize_flow_control")
script_tree=normalize_flow_control(script_tree,local_vars,"")
exit_timing_zone()
reenter_timing_zone("normalize_arguments")
script_tree=normalize_arguments(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("collapse_floaty_brackets")
script_tree=collapse_floaty_brackets(script_tree)
exit_timing_zone()
if not fast_mode then
reenter_timing_zone("optimize_script")
script_tree=optimize_script(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("sanity_check")
script_tree=sanity_check(script_tree,local_vars,"")
exit_timing_zone()
warn_unused_locals(local_vars)
end if
script&={
script_tree
,local_vars
,{} --compiled data to go into the HSZ lumps
}
all_scripts[script_idx]=script
reenter_timing_zone("binary_compile")
all_scripts[script_idx][SCRIPT_BINARY]=binary_compile(script)
exit_timing_zone()
current_script=""
cur_script_num=0
end procedure
---------------------------------------------------------------------------
--Parse argument declarations (initially containing default values)
--in a script argument list, returns {arg_names_list, default_vals_list} pair
--where any default values have been removed from the argument names
function process_arglist(TokenList args)
TokenList arglist
sequence defaults
integer i
integer using_defaults
arglist={}
defaults={}
using_defaults=false
i=1
while i<=length(args) do
if compare("=",args[i][CMD_TEXT])=0 then
src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS])
end if
check_undefined_string(args[i],"argument name")
if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then
src_error(sprintf_utf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
if ilength(args) then
src_error(sprintf_utf("Syntax error: expected default value for script argument "&COLYEL&"%s"&COLRED,{args[i][CMD_TEXT]}),args[i+1][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults&=try_string_to_number({enforce_constants(args[i+2][CMD_TEXT]),args[i+2][CMD_POS]})
using_defaults=true
i+=3
else
if using_defaults=true then
--any argument with a default must be followed with arguments with defaults
src_error(sprintf_utf("Default value missing for argument "&COLYEL&"%s"&COLRED&": the previous argument had a default value, so all following ones must too",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults=append(defaults,NO_DEFAULT)
i+=1
end if
end while
return({arglist,defaults})
end function
---------------------------------------------------------------------------
--Check that every declared script has been defined, with the same number of arguments,
--and each script appears exactly once. Basically, compare script_list and all_scripts
--Also handle argument default values.
--Before this function, scripts only have IDs (stored in script_list) if they are declared in a definescript.
--Afterwards all scripts have IDs assigned (in all_scripts)
procedure check_script_declarations()
integer at
sequence args_and_defaults
sequence scripts_found
sequence src_position
string fullname
srcpos script_pos
scripts_found=repeat(false,length(script_list))
for i=1 to length(all_scripts) do
fullname=all_scripts[i][SCRIPT_FULL_NAME]
current_script=fullname --global
script_pos=all_scripts[i][SCRIPT_POS]
at=find(fullname,column(script_list,PAIR_NAME))
at=find_in_column(fullname,script_list,PAIR_NAME)
args_and_defaults=process_arglist(all_scripts[i][SCRIPT_ARG_TOKS])
if not at then
--define script optional, add to script list
script_list=create_function(script_list,-1,current_script,args_and_defaults[2],RESERVE_SCRIPT,script_pos)
at=length(script_list)
scripts_found&={true}
else
if scripts_found[at]=true then
simple_error(sprintf("Compiler bug; two scripts named %s but this error not handled properly",{current_script}))
end if
scripts_found[at]=true
--The srcpos stored for this script will be from definescript. Repoint it at the actual script
--(doesn't really matter, all_scripts would have ended up with that srcpos anyway)
script_list[at][FUNC_POS]=script_pos
if length(all_scripts[i][SCRIPT_ARG_TOKS]) != length(script_list[at][FUNC_ARGS]) then
src_error(
sprintf_utf(
"Script "&COLYEL&"%s"&COLRED&" has %d arguments named, but has %d arguments in its declaration"
,{current_script,length(all_scripts[i][SCRIPT_ARG_TOKS]),length(script_list[at][FUNC_ARGS])}
)
,script_pos
)
end if
for j=1 to length(script_list[at][FUNC_ARGS]) do
if not equal(args_and_defaults[2][j],NO_DEFAULT) and not equal(args_and_defaults[2][j],script_list[at][FUNC_ARGS][j]) then
src_warn(
sprintf_utf(
"Default value for argument "&COLYEL&"%s"&COLRED&" does not match default given in script definition, %d"
,{all_scripts[i][SCRIPT_ARG_TOKS][j][CMD_TEXT],script_list[at][FUNC_ARGS][j]}
)
,script_pos
)
end if
end for
end if
all_scripts[i][SCRIPT_ARG_TOKS]=args_and_defaults[1]
all_scripts[i]&={script_list[at][PAIR_NUM]} --append SCRIPT_ID
end for
current_script=""
for i=1 to length(scripts_found) do
if scripts_found[i]=false then
src_error(
sprintf_utf("Script "&COLYEL&"%s"&COLRED&" was declared using "&COLYEL&"define script"&COLRED&" but is missing.",{script_list[i][PAIR_NAME]})
,script_list[i][FUNC_POS]
)
end if
end for
end procedure
---------------------------------------------------------------------------
procedure compile_each_script()
sequence count
enter_timing_zone("Compiling scripts")
color_print("compiling scripts",{})
count=repeat(0,length(file_list))
enter_timing_zone("check_script_declarations")
check_script_declarations()
exit_timing_zone()
for i=1 to length(all_scripts) do
-- color_print("%s\n",{all_scripts[i][SCRIPT_NAME]})
reenter_timing_zone("compile_a_script")
compile_a_script(i)
exit_timing_zone()
count[srcpos_file_number(all_scripts[i][SCRIPT_POS])]+=1
color_print(".",{})
end for
color_print("\n",{})
if not fast_mode then
warn_unused_globals()
end if
for i=1 to length(count) do
if count[i] then
wrap_print("compiled %d scripts from "&COLBWHI&"%s"&COLWHI&"\n",{count[i],file_list[i][FILE_NAME]})
end if
end for
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
function generate_scripts_dot_txt()
sequence result
integer print_scripts
result=""
print_scripts = 0
if find('a',optlist) then
print_scripts = 2
elsif find('s',optlist) then
print_scripts = 1
end if
for i=1 to length(script_list) do
result&=encode_ohr(sprintf_utf("%s\r\n%d\r\n%d\r\n",{script_list[i][PAIR_NAME],script_list[i][PAIR_NUM],length(script_list[i][FUNC_ARGS])}))
for j=1 to length(script_list[i][FUNC_ARGS]) do
if equal(script_list[i][FUNC_ARGS][j],NO_DEFAULT) then
result&="0\r\n"
else
result&=sprintf("%d\r\n",{script_list[i][FUNC_ARGS][j]})
end if
end for
if (print_scripts=1 and script_list[i][PAIR_NUM]2048 then
simple_warn(sprintf_utf("Maximum function id was %d, but will not export function names above id 2047",{records-1}))
records=2048
end if
--header size in bytes (offset to location table)
result=output_word(6)
--file format version
result&=output_word(0)
--number of records in location table
result&=output_word(records)
offset=length(result)+records*2
id=0
i=1
encoded_names={}
while i<=length(function_list) do
if function_list[i][PAIR_NUM]0 then
temp=without_extension(fname) --you can't slice a function result directly! :(
fname=sprintf("%s~%d.%s",{temp[1..$-1],suffix,extension_only(fname)})
end if
--color_print("writing %d %s\n",{i,fname})
if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then
simple_error("unable to write a script source lump")
end if
end for
end procedure
---------------------------------------------------------------------------
procedure write_output_file()
sequence header
integer fh
object lh
enter_timing_zone("Writing output file")
header=sprintf("HamsterSpeak\n%s\n%d\n%s\n%d\n%d\n",{COMPILER_VERSION & COMPILER_SUB_VERSION,HSP_FORMAT_VERSION,plotscr_version,HSZ_FORMAT_VERSION,max_used_function})
if length(all_scripts) then
fh=open(dest_file,"wb")
if fh!=-1 then
wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
--write header and version
if write_lump(fh,"HS",header)=false then
simple_error("unable to write header")
end if
--write script index (old file)
if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then
simple_error("unable to write script index")
end if
--write script index (new file)
if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then
simple_error("unable to write binary script index")
end if
--write script commands list
if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then
simple_error("unable to write commands listing")
end if
--write each script
for i=1 to length(all_scripts) do
if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][SCRIPT_ID]}),all_scripts[i][SCRIPT_BINARY])=false then
simple_error(sprintf_utf("internal problem: unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][SCRIPT_NAME]}))
end if
end for
if not find('n',optlist) then --no debug info
--write copy of scripts
wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
lh=begin_lump(fh,"source.lumped")
if equal(lh,false) then
simple_error("unable to write scripts source file")
end if
write_script_files(fh)
if end_lump(lh)=false then
simple_error("unable to finish write of scripts source file")
end if
end if
close(fh)
else
simple_error(sprintf_utf("attempt to open "&COLYEL&"%s"&COLRED&" failed",{dest_file}))
end if
else
color_print("no scripts to output\n",{})
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
init()
load_and_lex_all()
show_source_info()
preliminary_parse_pass()
parse_top_level()
compile_each_script()
dump_debug_report()
write_output_file()
run_time=time()-start_time
color_print("done (%g seconds)\n",{run_time})
if find('t',optlist) then
print_timing_data()
end if
opt_wait_for_key()
if was_warnings = true then
abort(2)
end if