-- HamsterSpeak Compiler
--(C) Copyright 2002 James Paige and Hamster Republic Productions
-- Please read LICENSE.txt for GPL License details and disclaimer of liability
-- This is a compiler for HamsterSpeak scripts used for plotscripting in
-- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com
-- This code is written in Euphoria 4.0. You can get the Free Open-Source
-- Euphoria compiler from http://OpenEuphoria.com . I also highly recommend
-- David Cuny's EE editor
---------------------------------------------------------------------------
--Changelog
--3Pe 2013-01-03 Faster compile times
--3Pd 2012-10-24 Fix \x escape code
--3Pc 2012-10-09 Change srcpos encoding to remove source size limits
--3Pb 2012-10-08 \x escape code
--3Pa 2012-08-24 --reuse-ids option
--3P 2012-08-18 Change way include is lexed: quotes no longer needed
-- [], {}, . now totally disallowed
-- Add < and > operators
-- Add plotscrversion block and new HS header lump
-- New switch syntax: do after case optional, case(else)
-- Added elseif
--3Of 2012-08-18 Lexer bugfix
--3Oe 2012-04-11 Add \n and \t escape codes
--3Od 2012-03-03 Change directory search order slightly
--3Oc 2012-02-07 Increased MAXGLOBAL to 16383
--3Ob 2012-02-07 Make the max script global number set by a constant
--3Oa 2012-01-20 Fix lexer allowing newlines in strings
--3O 2012-01-19 Unicode support
--3Nd 2011-10-16 Fix commands.bin generation bug
--3Nc 2011-08-28 Additional expression optimisations
--3Nb 2011-08-28 Fix overflow_int32 and bitwise operator overflow
--3Na 2011-07-25 Add -x option for Hamster Whisper's benefit
--3N 2011-07-12 Add assert
--3Ma 2010-10-21 Correctly strip \r to fix newlines ... again.
--3M 2010-05-02 abs, sign, sqrt; fix error reporting
--3L 2010-04-29 tracevalue support, involving a few internal changes
--3Ke 2010-04-18 Add -t commandline option
--3Kd 2010-04-14 Quite a few bugfixes in optimizer
--3Kc 2010-04-14 Start checking well-formedness of numbers with lexer
--3Kb 2010-04-12 Whole lot of bugfixes in script declarations and
-- checking for unreserved identifier names
--3Ka 2010-04-12 Throw error if source file contains binary
--3K 2010-04-12 Error column reporting (introducing srcpos); lump
-- original source files into source.lumped
--3Ja 2010-04-07 Wrote a (nearly) proper lexer
--3J 2009-12-05 Multiple function names may now alias the same id
--3I 2009-07-27 Export a backup copy of full source to source.txt lump
-- disable with -n option
--3H 2009-06-18 Export function names to commands.bin lump
--3Gk 2009-05-25 Fix short references crash
--3Gj 2009-01-27 Show future-compatability warning when using square
-- brackets [] and curly brackets {} in names
--3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake
--3Gh 2008-07-14 Disable warnings for unused variables by default, and
-- Add -u option to enable warnings for unused variables.
--3Gg 2008-03-01 Raise global variable limits
--3Gf 2008-02-11 Give user another chance on missing Y key :P
--3Ge 2007-07-01 String conversion rewrite to allow more whitespace
--3Gd 2007-07-01 Allow constants for new syntax default args
--3Gc 2007-05-29 Handle empty paratheses correctly, and use
-- lowercase ".hs" extension
--3Gb 2007-05-09 Throw error on missing include file
--3Ga 2007-05-09 String length not limited to 40
--3G 2007-02-09 Offset to string literal tables given as a 32-bit
-- int, script format version++ to 2.
--3F 2006-12-11 Support for variable number of arguments to functions
-- by giving -1 as the number of args in the define block
--3Ec 2006-11-30 while(true) no longer produces a warning
--3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes
--3Ea 2006-09-03 Actually export triggers in scripts.bin
--3E 2006-08-30 "define trigger" support
--3D 2006-08-16 String literal lookup tables added to end of
-- HSZ lumps replacing setstring + appendstring
--3C 2006-08-15 New syntax for giving default script arguments,
-- and for leaving out definescript
--3B 2006-08-13 HSX lumps renamed to HSZ
--3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format
-- version added to HSX header. Current is 1
--2Na 2006-07-07 Minor update to add new logical operators
--2N 2006-07-07 Added not() logic function
--2M 2006-07-06 Set exit code on warnings
--2L 2006-05-13 Added switch statement (+ case keyword)
--2K 2006-05-01 Added @scriptname and @globalvariable syntax to
-- return script or global ID number at compile-time
-- not run-time (for use with "run script by ID" and
-- "read global" and "write global")
--2J 2006-04-10 Added break, continue, exitscript, exitreturning
-- flow statements. Also fixed some return bugs
--2I 2006-04-04 Extended HSX header to include number of arguments
-- to a script, to really fix arguments-overflow-into
-- -locals bug
--2H 2006-03-29 Display better help and wait for keypress when run
-- by double-clicking the icon. Added -k command line
-- option to skip waiting for keypress
--2G 2005-10-03 Additional operators $+ and $=
-- Mention GPL in help text
--2F 2005-07-24 Strings implemented:
-- $id="..." -> setstring
-- $id+"..." -> appendstring
--2E 2005-02-15 Changed license to GPL
-- Added += and -= operators thanks to a patch from
-- The Mad Cacti and Fyrewulff
--2D 2002-08-03 Only a small change, strip out \r from lines of
-- the script as we read them to avoid errors related
-- to busted newlines
--2C 2002-03-05 Fixed some bugs that could cause crashes when
-- non-printable characters exist in the input file.
-- (naturally the script will not compile right, but
-- at least it will not crash)
--2B 2001-06-06 Added := as a commaless separater so it can be
-- defined as an operator
--2A 2001-05-04 Fixed -w command line option when used with -z
--2 First Release
---------------------------------------------------------------------------
without warning --to avoid annoying warnings
without type_check --for a small speed boost
--with profile_time --time profiling
include hsspiffy.e --various routines, sequence manipulation - James Paige
include graphics.e --standard library, needed for color output
include machine.e --needed for int_to_bytes
include std/math.e --needed for mod
include euphoria/unicode.e
include euphoria/ucstypes.e
include euphoria/io_with_unicode.e
---------------------------------------------------------------------------
--constants--
constant false=0
constant true=1
-- increment COMPILER_VERSION only for major language-altering changes
constant COMPILER_VERSION="3"
-- For small changes increment the second letter of COMPILER_SUB_VERSION
-- For large changes increment the first letter and reset the second letter
--*Please make sure this is 2 chararacters long! Append a space if required.*
constant COMPILER_SUB_VERSION="Pe"
constant COPYRIGHT_DATE="2002"
--This is the oldest version of plotscr.hsd which is supported (feel free to
--increment this whenever using an older plotscr.hsd means you're missing out
--on new but nonessential features; there's no harm)
--*Should be 3 characters long, same captialisation as COMPILER_SUB_VERSION*
constant MIN_PLOTSCR_VERSION="3P "
--these constants are color-flags. May add more colors, up to #F8FF
--(these codes are at the end of the first private use area)
constant COL_FIRST=#F8E0
constant COLYEL=#F8E0
constant COLRED=#F8E1
constant COLPNK=#F8E2
constant COLWHI=#F8E3
constant COLBWHI=#F8E4
constant COL_LAST=#F8FF
--Euphoria's color constants vary depending on the platform, so we avoid them
constant HS_TO_EU_COLORS={YELLOW, RED, BRIGHT_RED, WHITE, BRIGHT_WHITE}
constant LEX_CHAR=0
constant LEX_SPACE=1
constant LEX_BEGIN=2
constant LEX_END=3
constant LEX_KEYWORD=4
constant LEX_COMMENT=5
constant LEX_STRING=6
constant LEX_COMMA=7
constant LEX_WARN=8
constant LEX_BINARY=9
constant LEX_DIGIT=10
constant LEX_KEYWORD2=11
constant FILE_NAME=1
constant FILE_TEXT=2
constant FILE_LINE_EXTENTS=3
constant POS_FILENAME=1
constant POS_LINE=2
constant POS_COLUMN=3
constant POS_TEXT=4
constant CMD_TEXT=1
constant CMD_POS=2
constant RESERVE_CORE=1
constant RESERVE_UNIMPLEMENTED=2
constant RESERVE_FLOW=3
constant RESERVE_FUNCTION=4
constant RESERVE_SCRIPT=5
constant RESERVE_GLOBAL=6
constant RESERVE_BEGIN=7
constant RESERVE_END=8
constant RESERVE_OPERATOR=9
constant RESERVE_BUILTIN=10
constant RESERVE_CONSTANT=11
constant RESERVE_MACRO=12
constant RESERVE_NAMES={"declaration"
,"unimplemented keyword/operator"
,"flow control"
,"hardcoded function"
,"user script"
,"global variable"
,"bracket "&COLYEL&"("&COLRED
,"bracket "&COLYEL&")"&COLRED
,"operator"
,"builtin command"
,"constant"
,"hardcoded function/macro"
}
constant PAIR_NUM=1
constant PAIR_NAME=2
constant PAIR_START=1
constant PAIR_END=2
constant OPER_TRUENAME=3
constant OPER_POS=4
constant FUNC_ARGS=3
constant FUNC_POS=4
constant GLB_POS=3
constant CONST_VALUE=1
constant CONST_POS=2
constant NO_DEFAULT={} --when appears in script_list[FUNC_ARGS], indicates arg with no default value
constant VAR_ARGS=0 --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args
constant KIND_NUMBER=1
constant KIND_FLOW=2
constant KIND_GLOBAL=3
constant KIND_LOCAL=4
constant KIND_MATH=5
constant KIND_FUNCTION=6
constant KIND_SCRIPT=7
constant KIND_REFERENCE=8 --converted to KIND_NUMBER in compiled script
constant KIND_OPERATOR=9 --never appears in compiled script
constant KIND_PARENS=10 --never appears in compiled script
constant KIND_MACRO=11 --never appears in compiled script
constant KIND_LONGNAMES={"number"
,"flow control statement"
,"global variable"
,"local variable"
,"built-in function"
,"hard-coded function"
,"script"
,"reference"
,"untranslated operator"
,"order-of-operations-enforcing parenthesis"
,"hard-coded function/macro"
}
constant TREE_TRUNK=1
constant TREE_BRANCHES=2
constant TIMING_DEPTH=1
constant TIMING_DESCRIPTION=2
constant TIMING_TIME=3
constant TIMING_ACTIVE=4
--Version number for the .hs/.hsp format (not including HSZ files!)
constant HSP_FORMAT_VERSION=1
constant CODE_START_BYTE_OFFSET=12
constant SCRIPT_FORMAT_VERSION=2
constant MAXGLOBAL=16383
---------------------------------------------------------------------------
--globals-- --initializations--
sequence compiler_dir compiler_dir=""
sequence source_file source_file=""
sequence dest_file dest_file=""
sequence optlist optlist={}
sequence file_list file_list={}
integer max_srcpos max_srcpos=0
integer total_lines total_lines=0
sequence cmd cmd={}
sequence reuse_ids reuse_ids={}
sequence constant_list constant_list=alpha_tree_create()
sequence trigger_list trigger_list={}
sequence operator_list operator_list={}
sequence function_list function_list={}
sequence global_list global_list={{},{},{}}
sequence string_list string_list={}
sequence script_list script_list={}
sequence script_cmd script_cmd={}
sequence reserved reserved=alpha_tree_create()
atom start_time start_time=time()
atom run_time run_time=0
sequence timing_data timing_data={}
integer timing_depth timing_depth=-1
integer get_cmd_pointer get_cmd_pointer=0
integer autonumber_id autonumber_id=32767
sequence flow_list flow_list={
{0,"do"}
,{1,"begin"}
,{2,"end"}
,{3,"return"}
,{4,"if"}
,{5,"then"}
,{6,"else"}
,{7,"for"}
,{10,"while"}
,{11,"break"}
,{12,"continue"}
,{13,"exitscript"}
,{14,"exitreturning"}
,{15,"switch"}
,{16,"case"} --never appears in compiled script
}
sequence math_list math_list={
{0,"random",{0,1}}
,{1,"exponent",{0,2}}
,{2,"modulus",{0,1}}
,{3,"divide",{0,1}}
,{4,"multiply",{0,0}}
,{5,"subtract",{0,0}}
,{6,"add",{0,0}}
,{7,"xor",{0,0}}
,{8,"or",{0,0}}
,{9,"and",{0,0}}
,{10,"equal",{0,0}}
,{11,"notequal",{0,0}}
,{12,"lessthan",{0,0}}
,{13,"greaterthan",{0,0}}
,{14,"lessthanorequalto",{0,0}}
,{15,"greaterthanorequalto",{0,0}}
,{16,"setvariable",{0,0}}
,{17,"increment",{0,1}}
,{18,"decrement",{0,1}}
,{19,"not",{0}}
,{20,"logand",{0,0}}
,{21,"logor",{0,0}}
,{22,"logxor",{0,0}}
,{23,"abs",{0}}
,{24,"sign",{0}}
,{25,"sqrt",{0}}
}
--update is_simple_math_op on adding new operators
sequence separator_list separator_list={
"+=","-=","$+","$=","$","+","--","/","*","^^","^"
,"==","<>",">>","<<","<=",">=",">","<",":=","=","&&","||"
}
sequence lexer_table lexer_table={}
sequence lexer_table_key2 lexer_table_key2={}
sequence hex_chars hex_chars="0123456789abcedfABCDEF"
sequence hex_char_values hex_char_values={0,1,2,3,4,5,6,7,8,9,#a,#b,#c,#e,#d,#f,#A,#B,#C,#D,#E,#F}
sequence compiled_data compiled_data={} --used inside binary_compile only; compiled data for the current script
sequence all_scripts all_scripts={}
sequence current_script current_script="" --name of current script
integer cur_script_num cur_script_num=0 --index into script_cmd of current script
integer colors_enabled colors_enabled=true
integer simple_colorcodes simple_colorcodes=false
integer error_file error_file=false
sequence used_globals used_globals={}
sequence used_locals used_locals={}
sequence string_table string_table={}
sequence plotscr_version plotscr_version=""
integer max_used_function max_used_function=0 --maximum id of a function used in a script
integer fast_mode fast_mode=false
integer end_anchor_kludge end_anchor_kludge=false
integer was_warnings was_warnings=false
---------------------------------------------------------------------------
--types--
--A position in the script source. An integer, encoding an index in file_list
--and a character number that file (where newlines are one character).
--See encode_srcpos and decode_srcpos
type srcpos(atom pos)
--need to allow 0 as a dummy value
if pos=floor(pos) and pos>=0 and pos<=max_srcpos then
return true
else
return false
end if
end type
---------------------------------------------------------------------------
--time spent waiting for a user-keypress shouldnt count
function timeless_wait_key()
atom skip_time
integer key
skip_time=time()
key=wait_key()
skip_time=time()-skip_time
start_time+=skip_time
for i=1 to length(timing_data) do
if timing_data[i][TIMING_ACTIVE] then
timing_data[i][TIMING_TIME]+=skip_time
end if
end for
return(key)
end function
---------------------------------------------------------------------------
--Euphoria's built-in sprintf truncates all string elements to 8 bits,
--so this is an Unicode-enabled replacement. Supports %d, %s, %g, %%
--Beware! This will be way slower than sprintf, but luckily it's not
--needed anywhere where speed matters.
function sprintf_utf(sequence s,sequence printf_args)
integer seg_start, seg_end
integer arg_ctr
integer code
sequence ret
if length(printf_args)=0 then
return s
end if
ret=""
arg_ctr=1
seg_start=1
seg_end=find('%',s)
while seg_end do
ret&=s[seg_start..seg_end-1]
code=s[seg_end+1]
if code='s' then
ret&=printf_args[arg_ctr]
arg_ctr+=1
elsif code='d' or code='g' then
ret&=sprintf({'%',code},printf_args[arg_ctr])
arg_ctr+=1
elsif code='%' then
ret&='%'
else
simple_error(sprintf("sprintf_utf: unrecognised format code %%%s\n",{s[seg_end+1]}))
end if
seg_start=seg_end+2
seg_end=find_from('%',s,seg_start)
end while
if seg_start<=length(s) then
ret&=s[seg_start..$]
end if
if arg_ctr!=length(printf_args)+1 then
simple_error(sprintf("sprintf_utf: recieved %d format arguments, only used %d\n",{length(printf_args),arg_ctr-1}))
end if
return ret
end function
---------------------------------------------------------------------------
--fprintf as UTF8
procedure print_utf(object fh,sequence s,sequence printf_args)
if length(printf_args) then
s=sprintf_utf(s,printf_args)
end if
s=toUTF(s,utf_32,utf_8)
puts(fh,s)
end procedure
---------------------------------------------------------------------------
--prints a string with printf to stdout converting color codes
procedure color_print(sequence s,sequence printf_args)
sequence buffer
s=sprintf_utf(s,printf_args)
if simple_colorcodes then
buffer=s
else
buffer=""
for i=1 to length(s) do
if s[i]>=COL_FIRST and s[i]<=COL_LAST then
print_utf(stdout,buffer,{})
buffer=""
if colors_enabled then
text_color(HS_TO_EU_COLORS[s[i]-COL_FIRST+1])
end if
else
buffer&=s[i]
end if
end for
end if
if length(buffer) then
print_utf(stdout,buffer,{})
end if
end procedure
---------------------------------------------------------------------------
procedure opt_wait_for_key()
integer key
if not find('k',optlist) then
color_print("[Press Any Key]\n",{})
key=timeless_wait_key()
end if
end procedure
---------------------------------------------------------------------------
procedure enter_timing_zone(sequence description)
if not find('t',optlist) then
return
end if
timing_depth+=1
timing_data=append(timing_data,{timing_depth,description,-time(),true})
end procedure
---------------------------------------------------------------------------
procedure reenter_timing_zone(sequence description)
if not find('t',optlist) then
return
end if
for i=1 to length(timing_data) do
if equal(timing_data[i][TIMING_DESCRIPTION],description) then
timing_depth+=1
timing_data[i][TIMING_DEPTH]=timing_depth --could change...
timing_data[i][TIMING_TIME]-=time()
timing_data[i][TIMING_ACTIVE]=true
return
end if
end for
enter_timing_zone(description)
end procedure
---------------------------------------------------------------------------
procedure exit_timing_zone()
if not find('t',optlist) then
return
end if
for i=length(timing_data) to 1 by -1 do
--there can only be one active timing job at each depth at a time
if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then
timing_data[i][TIMING_TIME]+=time()
timing_data[i][TIMING_ACTIVE]=false
exit
end if
end for
timing_depth-=1
end procedure
---------------------------------------------------------------------------
procedure print_timing_data()
sequence indent
sequence data
sequence tmp1, tmp2
if run_time=0 then
--so that all percentages show as 0%
run_time=1e100
end if
for i=1 to length(timing_data) do
data=timing_data[i]
indent=repeat(' ',data[TIMING_DEPTH]*4)
tmp1=sprintf("%.2f",{data[TIMING_TIME]})
tmp2=sprintf("%5.2g",{100*data[TIMING_TIME]/run_time})
color_print("%s "&COLBWHI&"%s"&COLWHI&"s %s%% %s\n",{indent,tmp1,tmp2,data[TIMING_DESCRIPTION]})
end for
end procedure
---------------------------------------------------------------------------
function html_char_convert(sequence s)
sequence buffer
sequence result
result=""
buffer=""
for i=1 to length(s) do
if s[i]=' ' and i>1 then
if s[i-1]=' ' then
buffer&=" "
else
buffer&=s[i]
end if
elsif s[i]='<' then
buffer&="<"
elsif s[i]='>' then
buffer&=">"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
function error_string_convert(sequence s)
sequence buffer
sequence result
result=""
buffer=""
for i=1 to length(s) do
if s[i]>=COL_FIRST and s[i]<=COL_LAST then
if s[i]=COLYEL then
buffer&=""
elsif s[i]=COLRED then
buffer&=""
elsif s[i]=COLPNK then
buffer&=""
elsif s[i]=COLWHI then
buffer&=""
elsif s[i]=COLBWHI then
buffer&=""
end if
elsif s[i]='\n' then
buffer&="
\n"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
--prints a long string wrapped at 80 columns
procedure wrap_print(sequence s,sequence arguments)
sequence outstring
s=sprintf_utf(s,arguments)
while length(s) do
outstring=before_wrap_point(s)
s=after_wrap_point(s)
color_print("%s\n",{outstring})
end while
end procedure
---------------------------------------------------------------------------
procedure error_file_print(sequence s)
integer fh
if error_file then
fh=open(compiler_dir&"hs_error.htm","a")
if fh!=failure then
print_utf(fh,error_string_convert(s)&"\n",{})
close(fh)
end if
end if
end procedure
---------------------------------------------------------------------------
--prints out warning message in red with word wrap
procedure simple_warn(sequence s)
sequence pos
if not find('w',optlist) then
--do not warn if -w is set
pos=get_position()
if pos[2]>1 then
print_utf(stdout,"\n",{})
end if
wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s})
error_file_print(sprintf_utf("WARNING: %s",{html_char_convert(s)}))
was_warnings = true
end if
end procedure
---------------------------------------------------------------------------
--prints out an error message in red with word wrap, then aborts
procedure simple_error(sequence s)
sequence pos
pos=get_position()
if pos[2]>1 then
print_utf(stdout,"\n",{})
end if
wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s})
error_file_print(sprintf_utf("ERROR: %s",{html_char_convert(s)}))
if end_anchor_kludge then
error_file_print("\n
\n")
end if
opt_wait_for_key()
abort(1)
end procedure
---------------------------------------------------------------------------
--prints out the copyright info, usage info, and command-line options
procedure check_arg_count(sequence args)
if length(args)=2 then
wrap_print("HamsterSpeak semicompiler v%s%s (C)%s James Paige&Hamster Republic Productions\n",{COMPILER_VERSION,COMPILER_SUB_VERSION,COPYRIGHT_DATE})
wrap_print("Please read LICENSE.txt for GPL License details and disclaimer of liability",{})
wrap_print(COLYEL&"%s [-acdfknstwuyz] source.hss [dest.hs]"&COLWHI&"\n\n",{hs_upper(file_only(args[2]))})
color_print(" -f fast mode. Disables some optimization\n",{})
color_print(" -k do not wait for a keypress when finished\n",{})
color_print(" -n don't add debug info, or a copy of the source code, to .hs file\n",{})
color_print(" -w suppress minor warnings\n",{})
color_print(" -u show warnings for unused variables\n",{})
color_print(" -y overwrite the destination file without asking\n",{})
color_print("\nAdvanced options (not intended for normal use):\n",{})
color_print(" -c colors will be disabled\n",{})
color_print(" -d dump debug report to hs_debug.txt\n",{})
color_print(" -s print the name of each script as it is compiled\n",{})
color_print(" -a same as -s, but including autonumbered scripts\n",{})
color_print(" -t print detailed timing info (for HSpeak developers)\n",{})
color_print(" -z write error messages to hs_error.htm\n",{})
--Undocumented arguments:
-- --unicode-cols
-- used by Hamster Whisper for piped output
-- --reuse-ids scripts.bin
-- Read the provided scripts.bin file from a previous compilation, and reuse script ID mapping
wrap_print("\nFor more info about HamsterSpeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{})
wrap_print("\nThis is a command-line program. You should either run it from the command-line or you should drag and drop your script file onto it.\n",{})
opt_wait_for_key()
abort(0)
end if
end procedure
---------------------------------------------------------------------------
--Build the lexer tables, which for each code point 0<=i<256, contains the type
--of that character in entry i+1 (have to handle NUL)
--Other characters are always LEX_CHAR
procedure init_lexer()
sequence table_insert
--main table
table_insert={" (),\n#\t\"[]{}.",
{LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_COMMA,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN}}
--by default each character is either binary, or an allowed identifier
--character. This is for backwards compatibility
lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,127-32) & repeat(LEX_BINARY,#A1-127) & repeat(LEX_CHAR,256-#A1)
for i=1 to length(table_insert[1]) do
lexer_table[table_insert[1][i]+1]=table_insert[2][i]
end for
for i=0 to 9 do
lexer_table['0'+i+1]=LEX_DIGIT
end for
--add the one-character prefix of each operator
for i=1 to length(separator_list) do
lexer_table[separator_list[i][1]+1]=LEX_KEYWORD
end for
--keyword suffix parsing table
lexer_table_key2=repeat(LEX_CHAR,256)
lexer_table_key2[' '+1]=LEX_SPACE
lexer_table_key2['\t'+1]=LEX_SPACE
--2nd character of each operator
for i=1 to length(separator_list) do
if length(separator_list[i])=2 then
lexer_table_key2[separator_list[i][2]+1]=LEX_KEYWORD2
end if
end for
--when floating point is added will want a table for number parsing
end procedure
---------------------------------------------------------------------------
--initializes global variables, and generally gets things ready to roll
procedure init()
sequence args
integer index
integer key
integer fh
args=command_line()
compiler_dir=path_only(args[2])
check_arg_count(args)
optlist={}
index=3
while index<=length(args) do
if equal(args[index],"--unicode-cols") then
simple_colorcodes=true
args=delete_element(args,index)
elsif equal(args[index],"--reuse-ids") then
color_print("Reusing script ID numbers from previous compile\n",{})
reuse_ids=read_scripts_dot_bin(args[index+1])
args=delete_slice(args,index,index+1)
elsif args[index][1]='-' then
optlist=optlist&hs_lower(args[index][2..length(args[index])])
args=delete_element(args,index)
else
index+=1
end if
end while
check_arg_count(args)
source_file=normalize_filename(args[3])
if length(args)>3 then
dest_file=normalize_filename(args[4])
else
dest_file=normalize_filename(alter_extension(source_file,"hs"))
end if
if find('f',optlist) then
fast_mode=true
color_print("Using fast mode. Some size optimization disabled\n",{})
end if
if find('c',optlist) then
colors_enabled=false
end if
--'x' used by old versions of Hamster Whisper to specify color code format
--However this clashes with Latin-1 characters and doesn't work with UTF-8 anyway
--Replaced by --unicode-cols
--the semi-undocumented command line argument -z writes a file called
--hs_error.htm formatted for HssEd to read
if find('z',optlist) then
error_file=true
if file_exists(compiler_dir&"hs_error.htm") then
fh=open(compiler_dir&"hs_error.htm","w")
if fh!=failure then
puts(fh,"")
close(fh)
end if
end if
end if
wrap_print("Semicompiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file})
if file_exists(dest_file) then
if find('y',optlist) then
--found the -y command line arg, overwrite automatically
key='y'
else
while true do
--prompt the user to overwrite
wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file})
key=timeless_wait_key()
color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key})
if hs_lower(key)='y' then
exit
elsif hs_lower(key)='n' then
simple_error("output file overwrite cancelled by user")
else
wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)})
end if
end while
end if
end if
--why the alpha-tree? because the reserved-word list can get HUGE.
--we want to be able to look up words in it quickly. A btree or some such
--thing would have been even better, but thats alot of trouble :)
--FIXME: variable is missing! It doesn't fit in any of the categories
reserved=alpha_tree_mass_insert(reserved,{
{"defineconstant",RESERVE_CORE}
,{"definetrigger" ,RESERVE_CORE}
,{"defineoperator",RESERVE_CORE}
,{"globalvariable",RESERVE_CORE}
,{"definefunction",RESERVE_CORE}
,{"definescript" ,RESERVE_CORE}
,{"plotscrversion",RESERVE_CORE}
,{"include" ,RESERVE_CORE} --should never appear, in theory
,{"do" ,RESERVE_FLOW}
,{"begin" ,RESERVE_BEGIN}
,{"end" ,RESERVE_END}
,{"return" ,RESERVE_FLOW}
,{"if" ,RESERVE_FLOW}
,{"then" ,RESERVE_FLOW}
,{"else" ,RESERVE_FLOW}
,{"elseif" ,RESERVE_FLOW}
,{"for" ,RESERVE_FLOW}
,{"cfor" ,RESERVE_UNIMPLEMENTED}
,{"foreach" ,RESERVE_UNIMPLEMENTED}
,{"while" ,RESERVE_FLOW}
,{"break" ,RESERVE_FLOW}
,{"continue" ,RESERVE_FLOW}
,{"exitscript" ,RESERVE_FLOW}
,{"exitreturning" ,RESERVE_FLOW}
,{"switch" ,RESERVE_FLOW}
,{"case" ,RESERVE_FLOW}
,{"=" ,RESERVE_UNIMPLEMENTED}
,{"tracevalue" ,RESERVE_MACRO}
,{"assert" ,RESERVE_MACRO}
})--end mass_insert
for i=1 to length(math_list) do
reserved=alpha_tree_insert(reserved,math_list[i][PAIR_NAME],RESERVE_BUILTIN)
end for
init_lexer()
end procedure
---------------------------------------------------------------------------
--returns {filenumber,file position} pair
function decode_srcpos_file(srcpos pos)
integer val
val=pos
--0 is an invalid srcpos
if val=0 then
simple_error("compiler bug: can't decode uninitialised srcpos")
end if
val-=1
for i=1 to length(file_list) do
--an offset of 0 is reserved, but currently unused
if val=0 then
simple_error("compiler bug: 0 srcpos file offset")
end if
if val<=length(file_list[i][FILE_TEXT]) then
return {i,val}
end if
val-=length(file_list[i][FILE_TEXT])+1
end for
simple_error(sprintf_utf("compiler bug: can't decode invalid srcpos %d",{pos}))
end function
---------------------------------------------------------------------------
function srcpos_file_number(srcpos pos)
sequence temp
temp=decode_srcpos_file(pos)
return temp[1]
end function
---------------------------------------------------------------------------
--Returns 1-based character number in file
function srcpos_point(srcpos pos)
sequence temp
temp=decode_srcpos_file(pos)
return temp[2]
end function
---------------------------------------------------------------------------
--Encode offset (1-based characters from start of file) in a file as a srcpos
function encode_srcpos(integer filenum, integer offset)
srcpos pos
pos=1
for i=1 to filenum-1 do
pos+=length(file_list[i][FILE_TEXT])+1
end for
return pos+offset
end function
---------------------------------------------------------------------------
--possibly slow: prehaps avoid heavy use
--returns {file name, line number, column number, line text}: index with POS_*
--column number returned is 0-based
function decode_srcpos(srcpos pos)
integer point, filenum
integer lineno
sequence filetext
sequence lines
sequence temp
temp=decode_srcpos_file(pos)
filenum=temp[1]
point=temp[2]
filetext=file_list[filenum][FILE_TEXT]
lines=file_list[filenum][FILE_LINE_EXTENTS]
lineno=length(lines)
for i=1 to length(lines) do
if point<=lines[i][PAIR_END] then
lineno=i
exit
end if
end for
return({file_list[filenum][FILE_NAME],lineno,point-lines[lineno][PAIR_START],filetext[lines[lineno][PAIR_START]..lines[lineno][PAIR_END]]})
end function
---------------------------------------------------------------------------
function form_error_text(sequence s,srcpos pos)
sequence line
sequence src_position
sequence column_display
integer tab_compensate
src_position=decode_srcpos(pos)
--?pos
--pretty_print(1,src_position,{2})
line=substring_replace(src_position[POS_TEXT],"\t"," ")
tab_compensate=3*count('\t',src_position[POS_TEXT][1..src_position[POS_COLUMN]])
column_display=repeat(' ',src_position[POS_COLUMN]+tab_compensate)
error_file_print(sprintf_utf("\n",{src_position[POS_FILENAME],src_position[POS_LINE]}))
if length(current_script) then
return(
sprintf_utf(
"in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s^\n"
&COLRED&"%s\n"
,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s}
)
)
else
return(
sprintf_utf(
"in line %d of "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s^\n"
&COLRED&"%s\n"
,{src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s}
)
)
end if
--note that the calling procedure must send/deal with closing tags to error_file_print
end function
---------------------------------------------------------------------------
procedure src_warn(sequence s,srcpos pos)
if not find('w',optlist) then
--do not warn if -w is set
simple_warn(form_error_text(s,pos))
error_file_print("\n
\n")
end if
end procedure
---------------------------------------------------------------------------
procedure src_error(sequence s,srcpos pos)
end_anchor_kludge=true
simple_error(form_error_text(s,pos))
end procedure
---------------------------------------------------------------------------
procedure load_source(sequence filename,sequence reading_how,srcpos pos)
integer fh
integer line_start, line_end
sequence file_text
sequence line_extents -- {start of line, end of line} pairs
fh=open(filename,"r")
if fh!=failure then
wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename})
line_extents={}
reenter_timing_zone("read_file")
--Autodetect the encoding
file_text=read_file(fh, TEXT_MODE, UTF)
exit_timing_zone()
close(fh)
--read_file already converts lineendings, so this usually won't be needed
file_text=exclude(file_text,"\r")
line_start=1
line_end=find('\n',file_text)
while line_end do
line_extents=append(line_extents,{line_start,line_end-1})
line_start=line_end+1
line_end=find_from('\n',file_text,line_start)
end while
if line_start<=length(file_text) then
line_extents=append(line_extents,{line_start,length(file_text)})
end if
total_lines+=length(line_extents)
file_list=append(file_list,{filename,file_text,line_extents})
max_srcpos+=length(file_text)+1
else
if pos then
src_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos)
else
simple_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}))
end if
end if
end procedure
---------------------------------------------------------------------------
procedure load_include(sequence include_name,srcpos pos)
reenter_timing_zone("load_source/include")
--try source directory
if file_exists(path_only(source_file)&include_name) then
load_source(normalize_filename(path_only(source_file)&include_name),"including",pos)
else
--try current directory
if file_exists(include_name) then
load_source(include_name,"including",pos)
else
--try compiler_directory
if file_exists(compiler_dir&include_name) then
load_source(normalize_filename(compiler_dir&include_name),"including",pos)
else
--give up
load_source(include_name,"including",pos)
end if
end if
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure show_source_info()
if total_lines then
wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)})
else
simple_error("no data to compile\n")
end if
end procedure
---------------------------------------------------------------------------
procedure lexer_binary_error(srcpos pos)
--better use src_error, to track down garbage if accidentally creeping in
src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos)
end procedure
---------------------------------------------------------------------------
--reads and processes the escape codes in a string and returns it enclosed in quote marks
function lexer_read_string(sequence s,integer i,integer stop,srcpos pos)
sequence string
integer escaping
integer at1, at2
escaping=false
string="\""
while true do
if i>stop then --did not find a closing " so not a valid string
src_error("Expected \" at end of line to end string (multi-line strings are not supported)",pos+stop)
end if
if s[i]<256 and lexer_table[s[i]+1]=LEX_BINARY then
lexer_binary_error(pos+i)
end if
if escaping then
if s[i]='"' or s[i]='\\' or s[i]='\'' then
--including ' for closer compatibility with Python
string&=s[i]
elsif s[i]='n' then
string&=10
elsif s[i]='t' then
string&=9
elsif s[i]='x' then
--escape 8 bit values (later will add \u and \U for 16 bit and 32 bit escapes)
at1=0
if i+2<=stop then
at1=find(s[i+1],hex_chars)
at2=find(s[i+2],hex_chars)
end if
if at1=0 or at2=0 then
src_error("Expected two hexidecimal characters to follow \\x escape code",pos+i)
end if
string&={16*hex_char_values[at1]+hex_char_values[at2]}
i+=2
else
--invalid sequence
string&='\\'
string&=s[i]
end if
escaping=false
else
if s[i]='"' then
return({i,string&'"'})
elsif s[i]='\\' then
escaping=true
else
string&=s[i]
end if
end if
i+=1
end while
end function
---------------------------------------------------------------------------
function lexer_read_number(sequence s,integer i,integer stop,srcpos pos)
integer state
atom val
integer sign
sequence text
integer starti
starti=i
if s[i]='-' then
text="-"
i+=1
sign=-1
else
text=""
sign=1
end if
val=0
while i<=stop do
if s[i]<256 then
state=lexer_table[s[i]+1]
else
state=LEX_CHAR
end if
--color_print("lexing number i=%d %d=%s state %d\n",{i,s[i],{s[i]},state})
if state=LEX_DIGIT then
text&=s[i]
val=val*10+s[i]-'0'
elsif state=LEX_SPACE then --yes, allowed in numbers too
elsif state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_KEYWORD or state=LEX_COMMENT or state=LEX_BINARY then
--backtrack one character
--known bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead
i-=1
exit
else --currently, state=LEX_CHAR or state=LEX_WARN or state=LEX_STRING
if s[i]='.' then
src_error("Floating point values are not yet implemented",pos+i)
end if
src_error(
sprintf_utf("Expected "&COLYEL&"%s"&COLRED&" to be followed by "&COLYEL&","&COLRED&" or "&COLYEL&"("&COLRED&" or "&COLYEL&")"&COLRED&" or an operator"
,{text})
,pos+i
)
end if
i+=1
end while
if not int32(sign*val) then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32 bit signed integer. Integers must be between -2147483648 and 2147483647",{text}),pos+starti)
end if
return({i,text})
end function
---------------------------------------------------------------------------
--Clean the rest of a line for seek_include after 'include' occurs
function lexer_read_include_line(sequence tokens, sequence s, integer i, srcpos pos)
integer at
sequence temp
if not equal(tokens[1][CMD_TEXT],"include") then
src_error(COLYEL&"include"&COLRED&" must be the only statement on the line. Maybe you tried to use this reserved keyword illegally?",tokens[$][CMD_POS])
end if
i+=1
s=s[i..$]
at=find('\n',s)
if at then
s=s[1..at-1]
end if
at=find('#',s)
if at then
s=s[1..at-1]
end if
if length(s) then
while s[1]=' ' or s[1]='\t' do
s=s[2..length(s)]
i+=1
end while
s=trim_whitespace(s)
if s[1]='"' then
temp=lexer_read_string(s,2,length(s),pos+i-1)
s=temp[2]
--so we ignore everything after the string.... ugh
end if
tokens=append(tokens,{s,pos+i-1})
end if
if length(tokens)=1 then
src_error("Expected name of file to follow "&COLYEL&"include"&COLRED,tokens[1][CMD_POS])
end if
if length(tokens)>2 then
src_error(sprintf_utf("Found unexpected syntax "&COLYEL&"%s"&COLRED&" instead of filename to include. Syntax of an include statement is "&COLYEL&"include, filename"&COLRED&" possibly with a path",{tokens[2][CMD_TEXT]}),tokens[2][CMD_POS])
end if
return(tokens)
end function
---------------------------------------------------------------------------
-- s is the text for the whole file, the interval i to stop is a single line,
-- and pos is the srcpos for the beginning of the file
function lex_line(sequence s,integer i,integer stop,srcpos pos)
integer state
sequence tokens
sequence ident --identifier text being built up
sequence temp
sequence masked
integer oldi
integer found
integer textstart --the column at which this identifier starts, otherwise 0
sequence remem_ident
integer remem_textstart
integer nonnumeral --whether we are definitely inside an identifier, ie. a digit doesn't indicate a number
integer backtracked
ident=""
nonnumeral=false
textstart=0
tokens={}
while i<=stop do
if s[i]<256 then
state=lexer_table[s[i]+1]
else
state=LEX_CHAR
end if
--color_print("lexing i=%d %d=%s state %d nonnum=%d ident=%s\n",{i,s[i],{s[i]},state,nonnumeral,ident})
if state=LEX_CHAR then
ident=ident & toLower(s[i])
if textstart=0 then
textstart=i
end if
nonnumeral=true
elsif state=LEX_SPACE then
elsif state=LEX_DIGIT then
if nonnumeral then
ident=ident & s[i]
else
if length(ident) then --either ident is "-" or it is ""
i=textstart
ident=""
textstart=0
end if
temp=lexer_read_number(s,i,stop,pos)
tokens=append(tokens,{temp[2],pos+i})
i=temp[1]
--nonnumeral remains false
end if
elsif state=LEX_WARN then
ident=ident & toLower(s[i])
if textstart=0 then
textstart=i
end if
nonnumeral=true
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is reserved and not allowed in names (identifiers)!", {s[i]}), pos+i)
else
remem_ident=ident --for backtracking
remem_textstart=textstart
backtracked=false
if length(ident) then
tokens=append(tokens,{ident,pos+textstart})
end if
ident=""
nonnumeral=false
textstart=0
if state=LEX_BEGIN then
tokens=append(tokens,{"begin",pos+i})
elsif state=LEX_END then
tokens=append(tokens,{"end",pos+i})
elsif state=LEX_COMMA then
elsif state=LEX_COMMENT then
exit
elsif state=LEX_STRING then
temp=lexer_read_string(s,i+1,stop,pos)
tokens=append(tokens,{temp[2],pos+i})
i=temp[1]
elsif state=LEX_KEYWORD then
--the following mess is in order to allow whitespace in the middle of separators
textstart=i
masked=s[i..i]
--we only support length 1 and 2 separators
while i length(s)
found=false
for j=1 to length(separator_list) do
if match(separator_list[j],masked)=1 then
tokens=append(tokens,{separator_list[j],pos+textstart})
if length(separator_list[j])=1 then
i=textstart
end if
found=true
exit
end if
end for
if found=false then
--need to backtrack, in particular for -
--color_print("backtracking on '%s'\n", {masked})
backtracked=true
ident=remem_ident & masked[1]
oldi=i+1
i=textstart
if length(remem_ident) then
tokens=tokens[1..$-1]
textstart=remem_textstart
nonnumeral=true
elsif equal(ident,"-") then
--nonnumeral remains false: we've seen exactly "-"
else
--a keyword character followed by garbage
src_error(
sprintf_utf("Expected to see the rest of an operator (like "&COLYEL&":="&COLRED&") but found "&COLYEL&"%s"&COLRED&" instead",{{s[oldi]}}),
pos+oldi)
end if
else
textstart=0
end if
elsif state=LEX_BINARY then
lexer_binary_error(pos+i)
end if
if backtracked=false then
if equal(remem_ident,"include") then
--include is a special case, as normal lexing rules don't apply for rest of line
if length(ident) then
simple_error("compiler bug!\n")
end if
return lexer_read_include_line(tokens,s,i,pos)
end if
end if
end if
i+=1
end while
if length(ident) then
--ought to check whether ident is 'include'...
tokens=append(tokens,{ident,pos+textstart})
end if
return(tokens)
end function
---------------------------------------------------------------------------
--this also checks for rogue strings and $'s
function translate_plotstrings(sequence tokens)
integer state
integer start_token
integer i
sequence string_func
sequence string_token
srcpos pos
state=0
i=1
while i<=length(tokens) do
if compare(tokens[i][CMD_TEXT],"$")=0 then
pos=tokens[i][CMD_POS]
if state=0 then
state=1
start_token=i
else
--jump to error throw
state=1
exit
end if
end if
if state=2 then
if tokens[i][CMD_TEXT][1]='"' then
string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1])
string_token={sprintf("@$string%d",{length(string_list)}),pos}
tokens=tokens[1..start_token-1]&{string_func,{"begin",pos}}&tokens[start_token+1..i-2]&{string_token,{"end",pos}}&tokens[i+1..$]
state=0 --may have multiple strings on one line
else --we saw a + (or illegal =) inside the string number expression
state=1
end if
end if
if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then
string_func={"setstringfromtable",pos}
state=2
elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then
string_func={"appendstringfromtable",pos}
state=2
end if
i+=1
end while
if state!=0 then
--we've seen a surplus $, not right
src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos)
end if
return(tokens)
end function
---------------------------------------------------------------------------
--Given a command tree, get the original text from the file
--OK, this function is horribly complicated just so tracevalue can print
--"hero X (me)" instead of "herox(me)". It could be implemented much simpler if
--macros were expanded in the lexer, but then compile_commands and
--convert_macros would have to be as well
function tree_original_text(sequence tree)
sequence script_toks
sequence subtree
srcpos leftmost, rightmost
integer left_at, right_at
integer filenum
integer left_point, right_point
integer depth
sequence tokens
sequence text
script_toks=script_cmd[cur_script_num][4]
--find leftmost and rightmost tokens making up the expression, not including brackets
leftmost=tree[TREE_TRUNK][CMD_POS]
subtree=tree
while length(subtree[TREE_BRANCHES]) do
subtree=subtree[TREE_BRANCHES][1]
if subtree[TREE_TRUNK][CMD_POS]32767 then
src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos)
n=32767
elsif n<-32768 then
src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos)
n=-32768
end if
return(n)
end function
---------------------------------------------------------------------------
function try_string_to_number(sequence s)
atom result
result=floor(string_to_object(s[CMD_TEXT],0))
if not string_is_int32(s[CMD_TEXT]) then
src_error(sprintf_utf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS])
end if
return(result)
end function
---------------------------------------------------------------------------
function enforce_constants(sequence s)
--enforces both constants
object v
v=alpha_tree_data(constant_list,s,{})
if length(v) then
v=v[CONST_VALUE]
if int32(v) then
return(sprintf("%d",{v}))
end if
end if
return(s)
end function
---------------------------------------------------------------------------
function get_cmd()
sequence result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=enforce_constants(result[CMD_TEXT])
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_no_constants()
sequence result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=result[CMD_TEXT]
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_block(integer convert_constants)
sequence this
sequence result
result={}
this=get_cmd()
if compare("begin",this[CMD_TEXT])!=0 then
src_error(sprintf_utf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS])
end if
while true do
if convert_constants then
this=get_cmd()
else
this=get_cmd_no_constants()
end if
if compare("begin",this[CMD_TEXT])=0 then
src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS])
elsif compare("end",this[CMD_TEXT])=0 then
exit--break out of the while
else
result=append(result,this)
end if
end while
return(result)
end function
---------------------------------------------------------------------------
procedure parse_constant_block(sequence block)
atom num
sequence name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
check_undefined_constant(block[i+1],num)
name=block[i+1][CMD_TEXT]
constant_list=alpha_tree_insert(constant_list,name,{num,block[i+1][CMD_POS]})
reserved=alpha_tree_insert(reserved,name,RESERVE_CONSTANT)
end for
end procedure
---------------------------------------------------------------------------
procedure parse_trigger_block(sequence block)
integer num
sequence name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name but script trigger definition block ended",block[i][CMD_POS])
end if
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
check_undefined_string(block[i+1],"script trigger name")
name=block[i+1][CMD_TEXT]
trigger_list=append(trigger_list,{num,name})
reserved=alpha_tree_insert(reserved,name,RESERVE_CORE)
end for
end procedure
---------------------------------------------------------------------------
procedure parse_version_block(sequence block)
sequence req_version
if length(block)<2 then
--Don't have a srcpos
simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (empty 'plotscr version' block)")
end if
plotscr_version=block[1][CMD_TEXT] --version ID of this plotscr.hsd version (equal to the version of HSpeak at the time)
req_version=block[2][CMD_TEXT] --the minimum version of HSpeak which supports this plotscr.hsd
--trim quote marks
if plotscr_version[1]!='"' or req_version[1]!='"' then
simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (expected strings in 'plotscr version')")
end if
plotscr_version=plotscr_version[2..$-1]
req_version=req_version[2..$-1]
if compare(plotscr_version, MIN_PLOTSCR_VERSION) < 0 then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the latest version.")
end if
--printf(stdout,"'%s' '%s'\n", {req_version, COMPILER_VERSION&COMPILER_SUB_VERSION})
if compare(req_version, COMPILER_VERSION&COMPILER_SUB_VERSION) > 0 then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from a newer version of the OHRRPGCE than supported by this version of HSpeak. Please use the latest version of HSpeak.")
end if
end procedure
---------------------------------------------------------------------------
procedure create_global(integer id,sequence name,srcpos pos)
integer at
at=find(id,global_list[PAIR_NUM])
if at then
src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos)
else
if id>=0 and id<=MAXGLOBAL then
global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id)
global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name)
global_list[GLB_POS]=append(global_list[GLB_POS],pos)
reserved=alpha_tree_insert(reserved,name,RESERVE_GLOBAL)
else
src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to %d",{id,MAXGLOBAL}),pos)
end if
end if
end procedure
---------------------------------------------------------------------------
procedure parse_global_block(sequence block)
integer num
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("expected name but globalvariable block ended",block[i][CMD_POS])
end if
num=try_string_to_number(block[i])
check_undefined_string(block[i+1],"global variable name")
create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS])
end for
end procedure
---------------------------------------------------------------------------
procedure parse_operator_block(sequence block)
integer num
sequence name,true
for i=1 to length(block) by 3 do
if i+2>length(block) then
src_error("expected name but defineoperator block ended",block[i][CMD_POS])
end if
num=try_string_to_number(block[i])
mustnt_be_a_number(block[i+1])
mustnt_be_a_number(block[i+2])
name=block[i+1][CMD_TEXT]
true=block[i+2][CMD_TEXT]
operator_list=append(operator_list,{num,name,true,block[i+2][CMD_POS]})
reserved=alpha_tree_insert(reserved,name,RESERVE_OPERATOR)
end for
end procedure
---------------------------------------------------------------------------
--arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to
--VAR_ARGS indicating a variable (unlimited) number of arguments.
function create_function(sequence list,integer id,sequence name,object arglist,integer func_type,srcpos pos)
integer at
sequence encoded
if func_type=RESERVE_SCRIPT then
at=find(id,column(list,PAIR_NUM))
if at then
src_error(sprintf_utf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{RESERVE_NAMES[func_type],id,list[at][PAIR_NAME]}),pos)
end if
if id=0 then
src_error(sprintf_utf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos)
elsif id<0 then
if length(reuse_ids) then
encoded=encode_ohr(name)
if length(encoded)>36 then
encoded=encoded[1..36]
end if
at=find_in_column(encoded,reuse_ids,PAIR_NAME)
if at then
id=reuse_ids[at][PAIR_NUM]
end if
end if
if id<0 then
id=autonumber_id
loop do
autonumber_id-=1
id=autonumber_id
until find_in_column(id,reuse_ids,PAIR_NUM)=0
end loop
end if
end if
end if
list=append(list,{id,name,arglist,pos})
reserved=alpha_tree_insert(reserved,name,func_type)
return(list)
end function
---------------------------------------------------------------------------
function parse_define_block(sequence block,sequence list,integer func_type)
integer num
sequence name
integer args
sequence arglist
srcpos name_pos
integer i
i=1
while i<=length(block) do
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
if i+1>length(block) then
src_error(sprintf_utf("expected %s name but define block ended",{RESERVE_NAMES[func_type]}),block[i][CMD_POS])
else
i+=1
check_undefined_string(block[i],RESERVE_NAMES[func_type]&" name")
name=block[i][CMD_TEXT]
name_pos=block[i][CMD_POS]
if i+1>length(block) then
src_error("expected argument count but define block ended",block[i][CMD_POS])
else
i+=1
args=try_string_to_number(block[i])
arglist={}
if args<0 then
list=create_function(list,num,name,VAR_ARGS,func_type,name_pos)
else
for j=1 to args do
if i+1>length(block) then
src_error("expected argument default but define block ended",block[i][CMD_POS])
else
i+=1
arglist=append(arglist,try_string_to_number(block[i]))
end if
end for
list=create_function(list,num,name,arglist,func_type,name_pos)
end if
i+=1
end if
end if
end while
return(list)
end function
---------------------------------------------------------------------------
procedure primary_parse_pass()
sequence this
enter_timing_zone("Preliminary pass")
color_print("preliminary pass\n",{})
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if equal("defineconstant",this[CMD_TEXT]) then
parse_constant_block(get_cmd_block(false))
elsif equal("definetrigger",this[CMD_TEXT]) then
parse_trigger_block(get_cmd_block(false))
elsif equal("plotscrversion",this[CMD_TEXT]) then
parse_version_block(get_cmd_block(false))
end if
end while
if equal(plotscr_version,"") then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the latest version.")
end if
if find_in_column("script",trigger_list,PAIR_NUM)=0 then
trigger_list=append(trigger_list,{0,"script"})
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure parse_script(sequence trigger)
sequence name
sequence arglist
sequence s
sequence this
sequence err_string
integer depth
integer temp
name=get_cmd()
if not find_in_column(name[CMD_TEXT],script_list,PAIR_NAME) then
--Doesn't appear in a definescript block; if it did the name is already reserved
--Note that we won't add it to script_list yet; that happens later in check_script_declarations
check_undefined_string(name,"user script name")
end if
current_script=name[CMD_TEXT]
arglist={}
while true do
if get_cmd_pointer>length(cmd) then
src_error(sprintf_utf("script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]}),name[CMD_POS])
end if
this=get_cmd()
if compare("begin",this[CMD_TEXT])=0 then
exit--break the while
end if
--Note that arglist isn't the actual list of arguments, it's just a string of tokens right now.
--The actual arglist is built in process_arglist, while compiling each script.
arglist=append(arglist,this)
end while
--every script is nested inside a big fat do() block
s={{"do",this[CMD_POS]}}
depth=0
while true do
s=append(s,this)
if compare("end",this[CMD_TEXT])=0 then
depth-=1
if depth=0 then
exit--break while
end if
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
else
temp=alpha_tree_data(reserved,this[CMD_TEXT],3)
if temp<=RESERVE_UNIMPLEMENTED then
err_string=sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",{RESERVE_NAMES[alpha_tree_data(reserved,this[CMD_TEXT],0)],this[CMD_TEXT]}
)
if temp=RESERVE_CORE then
err_string&=sprintf_utf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]})
end if
src_error(err_string,this[CMD_POS])
end if
end if
if get_cmd_pointer>length(cmd) then
src_error(
sprintf_utf(
"script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED
,{name[CMD_TEXT]}
)
,name[CMD_POS]
)
end if
this=get_cmd()
end while
script_cmd=append(script_cmd,{trigger,name,arglist,s})
current_script=""
end procedure
---------------------------------------------------------------------------
procedure parse_top_level()
sequence this
sequence ignore
sequence triggers
enter_timing_zone("Top level pass")
color_print("parsing top-level\n",{})
triggers=column(trigger_list,PAIR_NAME)
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if compare("defineconstant",this[CMD_TEXT])=0 then
ignore=get_cmd_block(true)
elsif compare("definetrigger",this[CMD_TEXT])=0 then
ignore=get_cmd_block(true)
elsif compare("globalvariable",this[CMD_TEXT])=0 then
parse_global_block(get_cmd_block(true))
elsif compare("defineoperator",this[CMD_TEXT])=0 then
parse_operator_block(get_cmd_block(true))
elsif compare("definefunction",this[CMD_TEXT])=0 then
function_list=parse_define_block(get_cmd_block(true),function_list,RESERVE_FUNCTION)
elsif compare("definescript",this[CMD_TEXT])=0 then
script_list=parse_define_block(get_cmd_block(true),script_list,RESERVE_SCRIPT)
elsif compare("plotscrversion",this[CMD_TEXT])=0 then
--ignore
get_cmd_block(false)
elsif find(this[CMD_TEXT],triggers)>0 then
parse_script(this)
else
check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration")
src_error(
sprintf_utf(
"Expected top-level declaration but found "&COLYEL&"%s"&COLRED
,{this[CMD_TEXT]}
)
,this[CMD_POS]
)
end if
end while
cmd={}
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure dump_script_and_function_info(integer fh,sequence list)
sequence this
sequence id_string
sequence src_position
for i=1 to length(list) do
this=list[i]
if this[PAIR_NUM]>autonumber_id then
id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]})
else
id_string=sprintf("ID=%d",{this[PAIR_NUM]})
end if
src_position=decode_srcpos(this[FUNC_POS])
print_utf(fh,"%s %d\t%s\t%s(",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,id_string
,this[PAIR_NAME]
})
if equal(this[FUNC_ARGS],VAR_ARGS) then
print_utf(fh,"VARIABLE ARGS",{})
else
for j=1 to length(this[FUNC_ARGS]) do
if j>1 then
print_utf(fh,",",{})
end if
if equal(this[FUNC_ARGS][j],NO_DEFAULT) then
print_utf(fh,"NONE",{})
else
print_utf(fh,"%d",{this[FUNC_ARGS][j]})
end if
end for
end if
print_utf(fh,")\n",{})
end for
end procedure
---------------------------------------------------------------------------
function seek_string_by_id(integer id,sequence list,sequence name)
integer at
at=find_in_column(id,list,PAIR_NUM)
if at then
return(list[at][PAIR_NAME])
else
simple_error(sprintf_utf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id}))
end if
return("")
end function
---------------------------------------------------------------------------
function name_lookup(sequence pair)
integer at
if pair[1]=KIND_NUMBER then
return(sprintf("%d",{pair[2]}))
elsif pair[1]=KIND_LOCAL then
return(sprintf("local%d",{pair[2]}))
elsif pair[1]=KIND_GLOBAL then
at=find(pair[2],global_list[PAIR_NUM])
if at then
return(global_list[PAIR_NAME][at])
else
simple_error(sprintf_utf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]}))
end if
elsif pair[1]=KIND_FLOW then
return(seek_string_by_id(pair[2],flow_list,"flow control structure"))
elsif pair[1]=KIND_SCRIPT then
return(seek_string_by_id(pair[2],script_list,"user script"))
elsif pair[1]=KIND_FUNCTION then
return(seek_string_by_id(pair[2],function_list,"hardcoded function"))
elsif pair[1]=KIND_MATH then
return(seek_string_by_id(pair[2],math_list,"built-in function"))
else
simple_error(sprintf_utf("decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]}))
end if
end function
---------------------------------------------------------------------------
function binstring_to_int(sequence encoded)
--bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles
atom temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000
if and_bits(temp,#80000000) then
return(temp-#100000000)
end if
return(temp)
end function
---------------------------------------------------------------------------
function read_word(sequence encoded)
--opposite of output_word
integer temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100
if and_bits(temp,#8000) then
return(temp-#10000)
end if
return(temp)
end function
---------------------------------------------------------------------------
function dump_script_binary(sequence bin,integer offset,integer depth)
sequence result
sequence kind_and_id
integer kind
integer argcount
integer new_offset
result=""
kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])}
kind=kind_and_id[1]
result&=sprintf_utf("%s%s",{
repeat(' ',depth) --indent
,name_lookup(kind_and_id)
})
if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then
argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11])
if argcount then
result&="(\n"
for i=0 to argcount-1 do
new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3])
result&=dump_script_binary(bin,new_offset,depth+2)
end for
result&=repeat(' ',depth)&")\n"
else
result&="()\n"
end if
else
result&="\n"
end if
return(result)
end function
---------------------------------------------------------------------------
function dump_script_tree(sequence tree,integer depth)
sequence result
result=""
for i=1 to length(tree) do
result&=sprintf_utf("%s%s",{
repeat(' ',depth)--indent
,tree[i][TREE_TRUNK][CMD_TEXT]
})
if length(tree[i][TREE_BRANCHES])>0 then
result&="(\n"
result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2)
result&=repeat(' ',depth)&")\n"
else
result&="\n"
end if
end for
return(result)
end function
---------------------------------------------------------------------------
function dump_script_strings(sequence bin)
sequence result
integer table_start
integer offset
integer len
result=""
table_start=binstring_to_int(bin[9..12])
if table_start=0 then
return("")
end if
bin=bin[table_start+1..$]
offset=1
while offset1 then
debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt")
else
debug_file="hs_debug.txt"
end if
fh=open(debug_file,"w")
if fh!=failure then
wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file})
-------------------------------------
print_utf(fh,"[Scripts]\n",{})
dump_script_and_function_info(fh,script_list)
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Global Variables]\n",{})
for i=1 to length(global_list[PAIR_NUM]) do
src_position=decode_srcpos(global_list[GLB_POS][i])
print_utf(fh,"%s %d\tID=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,global_list[PAIR_NUM][i]
,global_list[PAIR_NAME][i]
})
end for
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Builtin Functions]\n",{})
dump_script_and_function_info(fh,function_list)
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Operators]\n",{})
for i=1 to length(operator_list) do
src_position=decode_srcpos(operator_list[i][OPER_POS])
print_utf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,operator_list[i][PAIR_NAME]
,operator_list[i][OPER_TRUENAME]
,operator_list[i][PAIR_NUM]
})
end for
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Script Dumps]\n",{})
for i=1 to length(all_scripts) do
src_position=decode_srcpos(all_scripts[i][4])
print_utf(fh,"%s %d\tID=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[i][1]
,all_scripts[i][3]
})
src_position=decode_srcpos(script_cmd[i][1][CMD_POS])
print_utf(fh,"%s %d\tTrigger=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[i][2]
,script_cmd[i][1][CMD_TEXT]
})
for j=1 to length(all_scripts[i][6]) do
src_position=decode_srcpos(all_scripts[i][6][j][CMD_POS])
print_utf(fh,"%s %d\tvar=%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[i][6][j][CMD_TEXT]
})
end for
print_utf(fh,"%d bytes compiled\n",{length(all_scripts[i][7])})
-- print_utf(fh,"%s\n\n",{dump_script_tree(all_scripts[i][5],0)})
print_utf(fh,"%s",{dump_script_binary(all_scripts[i][7][CODE_START_BYTE_OFFSET+1..$],0,0)})
print_utf(fh,"%s\n\n",{dump_script_strings(all_scripts[i][7])})
end for
print_utf(fh,"\n",{})
-------------------------------------
close(fh)
else
wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file})
end if
end if
end procedure
---------------------------------------------------------------------------
function get_cmd_depth(integer ptr,sequence data,integer depth)
sequence result
sequence this
result={}
while true do
--if get_key()=27 then abort(1/0) end if
if ptr>length(data) then
src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
depth-=1
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
end if
if depth=0 then
exit --break out of the while
else
result=append(result,this)
end if
end while
return({ptr,result})
end function
---------------------------------------------------------------------------
--identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis
function what_kind_and_id(sequence command,sequence local_vars)
integer kind
atom id
integer keyword
sequence s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_int32(s) then
kind=KIND_NUMBER
id=string_to_object(s,{})
elsif length(s) and s[1] = '@' then
kind=KIND_REFERENCE
id=0 -- ID always resolves to 0 for references here, since it is too early to know all
-- script IDs. The real work is done in binary_compile_recurse
elsif find_in_column(s,local_vars,CMD_TEXT) then
kind=KIND_LOCAL
id=find_in_column(s,local_vars,CMD_TEXT)
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
id=global_list[PAIR_NUM][find(s,global_list[PAIR_NAME])]
elsif keyword=RESERVE_FLOW then
kind=KIND_FLOW
id=flow_list[find_in_column(s,flow_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
id=function_list[find_in_column(s,function_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
id=script_list[find_in_column(s,script_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
id=math_list[find_in_column(s,math_list,PAIR_NAME)][PAIR_NUM]
elsif keyword=RESERVE_MACRO then
kind=KIND_MACRO
id=0
elsif length(s) and s[1] = '"' then
src_error(
sprintf_utf(
"The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
{shorten_string(s,15)}),
command[CMD_POS]
)
else
src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
return({kind,id})
end function
---------------------------------------------------------------------------
--identify the kind of a text command
function what_kind(sequence command,sequence local_vars, integer look_for_operators)
integer kind
integer keyword
sequence s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_int32(s) then
kind=KIND_NUMBER
elsif length(s) and s[1] = '@' then
kind=KIND_REFERENCE
elsif find_in_column(s,local_vars,CMD_TEXT) then
kind=KIND_LOCAL
elsif length(s)=0 then
kind=KIND_PARENS
elsif look_for_operators and find_in_column(s,operator_list,PAIR_NAME) then
kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
elsif keyword=RESERVE_FLOW or keyword=RESERVE_BEGIN or keyword=RESERVE_END then
kind=KIND_FLOW
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
elsif keyword=RESERVE_MACRO then
kind=KIND_MACRO
elsif length(s) and s[1] = '"' then
src_error(
sprintf_utf(
"The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
{shorten_string(s,15)}),
command[CMD_POS]
)
else
src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
return(kind)
end function
---------------------------------------------------------------------------
--Check whether a math operator is not an assignment, a compare operator, or 'random'
function is_simple_math_op(integer id)
return (id >= 1 and id <= 9) or id >= 19
end function
---------------------------------------------------------------------------
--this function not used anywhere
function how_many_args(sequence name,integer kind)
integer result
integer at
if kind=KIND_PARENS then
result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P
elsif kind=KIND_FLOW then
result=-1 --flow supports an unknown number of args
elsif kind=KIND_OPERATOR then
result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions
elsif kind=KIND_MATH then
at=find_in_column(name[CMD_TEXT],math_list,PAIR_NAME)
result=length(math_list[at][FUNC_ARGS])
elsif kind=KIND_FUNCTION then
at=find_in_column(name[CMD_TEXT],function_list,PAIR_NAME)
if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then
result=-1
else
result=length(function_list[at][FUNC_ARGS])
end if
elsif kind=KIND_SCRIPT then
at=find_in_column(name[CMD_TEXT],script_list,PAIR_NAME)
result=length(script_list[at][FUNC_ARGS])
else
--numbers, variables, etc do not permit args
result=0
end if
return(result)
end function
---------------------------------------------------------------------------
function takes_args(integer kind)
if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO then
return(true)
else
return(false)
end if
end function
---------------------------------------------------------------------------
function get_script_cmd(integer ptr,sequence data,sequence vars)
sequence command
sequence this
sequence after
integer kind
after={}
command=data[ptr]
ptr+=1
if compare("end",command[CMD_TEXT])=0 then
src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS])
elsif compare("begin",command[CMD_TEXT])=0 then
--floaty brackets for order-of-operations-enforcement
ptr-=1
command[CMD_TEXT]=""
elsif compare("variable",command[CMD_TEXT])=0 then
--must ignore variable declaration
if ptr<=length(data) then
--there is room for args
ptr+=1--only increment the pointer when we have args
after=get_cmd_depth(ptr,data,1)
ptr=after[1] --this is a hack, because we cannot say {n,n}=func()
end if
--non-command
return({ptr,{}})
end if
if ptr<=length(data) then
--there is room for args
kind=what_kind(command,vars,true)
--distinguishing between functions with and without args means wait() would be ok but noop() would not
if takes_args(kind) then
this=data[ptr]
if compare("begin",this[CMD_TEXT])=0 then
--yes, it has args
ptr+=1--only increment the pointer when we have args
after=get_cmd_depth(ptr,data,1)
ptr=after[1] --this is a hack, because we cannot say {n,n}=func()
after=after[2]
if length(after)=0 and kind=KIND_PARENS then
src_error("found empty parentheses not associated with a function call",command[CMD_POS])
end if
end if
end if
end if
return({ptr,{command,after}})
end function
---------------------------------------------------------------------------
function compile_commands(sequence script_data,sequence vars)
integer ptr
sequence this
sequence command
sequence result
result={}
ptr=1
while true do
this=get_script_cmd(ptr,script_data,vars)
ptr=this[1]
command=this[2]
if length(command)>0 then
if length(command[2])>0 then
--this command has arguments that need parsing
command[2]=compile_commands(command[2],vars)
end if
result=append(result,command)
end if
if ptr>length(script_data) then
exit --break out of while when there is no more data
end if
end while
return(result)
end function
---------------------------------------------------------------------------
function gather_local_vars(sequence vars,sequence data)
sequence this
integer at
integer ptr
sequence src_position
ptr=1
while true do
this=data[ptr]
ptr+=1
if compare("variable",this[CMD_TEXT])=0 then
if ptr>length(data) then
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("begin",this[CMD_TEXT])=0 then
while true do
if ptr>length(data) then
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
exit--break the while
end if
check_undefined_string(this,"local variable name")
at=find_in_column(this[CMD_TEXT],vars,CMD_TEXT)
if at then
src_position=decode_srcpos(vars[at][CMD_POS])
src_error(
sprintf_utf(
"local variable/argument "&COLYEL&"%s"&COLRED&" is already defined in line %d of "&COLPNK&"%s"&COLRED,
{this[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]}
)
,this[CMD_POS]
)
else
vars=append(vars,this)
end if
end while
else
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
end if
if ptr>length(data) then
exit --break out of the while
end if
end while
return(vars)
end function
---------------------------------------------------------------------------
--there are only two, hardcoded, macros right now: tracevalue, assert
function expand_macros(sequence tree)
sequence newtree
sequence newargs
sequence new_string
sequence string_token
sequence src_position
sequence temp
srcpos pos
integer ptr
ptr=1
while ptr<=length(tree) do
--recurse first, because tree[ptr] might be split up (as in 'assert')
if length(tree[ptr][TREE_BRANCHES]) then
tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES])
end if
if equal("tracevalue",tree[ptr][TREE_TRUNK][CMD_TEXT]) then
newargs={}
tree[ptr][TREE_TRUNK][CMD_TEXT]="tracevalueinternal"
pos=tree[ptr][TREE_TRUNK][CMD_POS]
for i=1 to length(tree[ptr][TREE_BRANCHES]) do
new_string=tree_original_text(tree[ptr][TREE_BRANCHES][i])
string_list=append(string_list,new_string)
string_token={sprintf("@$string%d",{length(string_list)}),pos}
newargs&={{string_token,{}},tree[ptr][TREE_BRANCHES][i]}
end for
tree[ptr][TREE_BRANCHES]=newargs
end if
if equal("assert",tree[ptr][TREE_TRUNK][CMD_TEXT]) then
--assert(condition) --> if (not(condition)) then ($assert expression string="condition", assertfailure)
pos=tree[ptr][TREE_TRUNK][CMD_POS]
src_position=decode_srcpos(pos)
if length(tree[ptr][TREE_BRANCHES])!=1 then
src_error(sprintf_utf(COLYEL&"assert"&COLRED&" statement takes exactly one argument: a condition. It has %d",{length(tree[ptr][TREE_BRANCHES])}),pos)
end if
newtree={{{"if",pos},{}}, {{"then",pos},{}}}
newtree[1][TREE_BRANCHES]={{{"not",pos},{tree[ptr][TREE_BRANCHES][1]}}}
new_string=sprintf_utf("%s:%d: %s",{src_position[POS_FILENAME],src_position[POS_LINE],tree_original_text(tree[ptr][TREE_BRANCHES][1])})
string_list=append(string_list,new_string)
string_token={sprintf("@$string%d",{length(string_list)}),pos}
temp={{{"setstringfromtable",pos},{}}, {{"assertfailure",pos},{}}}
temp[1][TREE_BRANCHES]={{{enforce_constants("assertexpressionstring"),pos},{}},{string_token,{}}} --stringnum, string
newtree[2][TREE_BRANCHES]=temp
tree=replace_slice(tree,ptr,ptr,newtree) --tree[ptr..ptr]=newtree
ptr+=1
end if
ptr+=1
end while
return(tree)
end function
---------------------------------------------------------------------------
--tree[ptr] is a partially constructed if node (lacking else child), followed either by an else or elseif node.
--Handle else/elseif, parenting to tree[pt], and return the new tree.
function build_else_node(sequence tree, integer ptr)
sequence elseargs
sequence nextcmd
integer eptr
srcpos pos
nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr+1][TREE_TRUNK][CMD_POS]
if equal("else",nextcmd) then
--found else
tree[ptr][TREE_BRANCHES]&={tree[ptr+1]}
tree=delete_element(tree,ptr+1)
else -- equal("elseif",nextcmd)
--Split off the if
elseargs={ {{"if",pos},tree[ptr+1][TREE_BRANCHES]} }
--Grab the rest of this if,elseif,...,else chain, to put inside the else
eptr=ptr+2
while eptr<=length(tree) do
nextcmd=tree[eptr][TREE_TRUNK][CMD_TEXT]
--Stop when we see an else but otherwise we don't check correctness until we recurse
if equal(nextcmd,"else") then
eptr+=1
exit
elsif find(nextcmd,{"elseif","then"}) then
--continue
else
exit
end if
eptr+=1
end while
elseargs&=tree[ptr+2..eptr-1]
tree=tree[1..ptr]&tree[eptr..$]
tree[ptr][TREE_BRANCHES]&={ {{"else",pos},elseargs} }
end if
return tree
end function
---------------------------------------------------------------------------
--Build switch AST nodes, return new tree
function build_switch_node(sequence tree,sequence vars,integer ptr)
srcpos pos
srcpos argpos
sequence args
sequence thiscmd
sequence rawargs
sequence caseargs
sequence casecmd
sequence lastnodename
sequence collected
sequence temp
integer have_else
srcpos elsepos
integer expect_case
integer kind
integer id
sequence else_error_msg
else_error_msg=COLYEL&"else"&COLRED&" case should be placed last inside a "&COLYEL&"switch"&COLRED&" block"
pos=tree[ptr][TREE_TRUNK][CMD_POS]
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf_utf(
COLYEL&"switch"&COLRED&" statement has %d expressions. It should have only one."
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(COLYEL&"switch"&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED,pos)
end if
if ptr>=length(tree) then
src_error(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,pos)
end if
thiscmd=tree[ptr+1][TREE_TRUNK]
if equal("do",thiscmd[CMD_TEXT])=0 then
src_error(sprintf_utf(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{thiscmd[CMD_TEXT]}),thiscmd[CMD_POS])
end if
--found do, processing starts here (build arguments to switch, after the expression)
args=tree[ptr][TREE_BRANCHES] --start with the condition, length 1
rawargs=tree[ptr+1][TREE_BRANCHES] --contents of the 'do'
lastnodename=""
have_else=false
expect_case=false
collected={}
for j=1 to length(rawargs) do
thiscmd=rawargs[j][TREE_TRUNK]
argpos=thiscmd[CMD_POS]
if j=1 and equal("case",thiscmd[CMD_TEXT])=0 then
src_error(sprintf_utf(COLYEL&"switch() do("&COLRED&" should be followed with a "&COLYEL&"case"&COLRED&", not with "&COLYEL&"%s"&COLRED&"."
,{thiscmd[CMD_TEXT]}),argpos)
end if
--to resolve the ambiguity of an else, we always suppose that they belong to an 'if' (or elseif) if possible
if equal("else",thiscmd[CMD_TEXT]) and find(lastnodename,{"if","then","elseif"})=0 then
--Old syntax else(...) block
if length(collected) then
args&={collected}
collected={}
end if
if have_else then
src_error(else_error_msg,elsepos)
end if
expect_case=false
if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
--Last case is new-style syntax and wasn't followed by any statements
--Lets just disallow this to prevent a little confusion
src_error(COLYEL&"case(...) else(...)"&COLRED&" is ambiguous. "&
"Write either "&COLYEL&"case(..., else) ..."&COLRED&" or "&COLYEL&"case(...) do() else(...)"&COLRED&" as fits your intentions."
,rawargs[j-1][TREE_TRUNK][CMD_POS])
end if
if j!=length(rawargs) then
if length(rawargs[j][TREE_BRANCHES])=0 then
else_error_msg&=". Did you forget to enclose the else block with brackets, like "&COLYEL&"else(...)"&COLRED&"?"
end if
src_error(else_error_msg,argpos)
end if
--convert it to a do
args&={ {{"do",argpos},rawargs[j][TREE_BRANCHES]} }
elsepos=argpos
have_else=true
elsif equal("case",thiscmd[CMD_TEXT]) then
if length(collected) then
args&={collected}
collected={}
end if
if have_else then
--Note we disallow case(else) case(1) but allow case(else,1)
src_error(else_error_msg,elsepos)
end if
expect_case=false
caseargs=rawargs[j][TREE_BRANCHES]
--Call a couple passes early here, so that constant expressions are allowed
caseargs=collapse_floaty_brackets(caseargs)
caseargs=optimize_script(caseargs,vars)
--Check contents are only either integers, or "else", and append to args
for k=1 to length(caseargs) do
casecmd=caseargs[k][TREE_TRUNK]
if equal(casecmd[CMD_TEXT],"else") then
if have_else then
src_error(COLYEL&"else"&COLRED&" appears multiple times inside this "&COLYEL&"switch"&COLRED&" block",casecmd[CMD_POS])
end if
have_else=true
elsepos=argpos
--don't emit anything: next do is the 'else' one
else
temp=what_kind_and_id(casecmd,vars)
kind=temp[1]
id=temp[2]
if kind=KIND_FLOW or (kind=KIND_MATH and is_simple_math_op(id)=false) then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is not allowed inside a "&COLYEL&"case"&COLRED
,{casecmd[CMD_TEXT]}),casecmd[CMD_POS])
end if
args&={caseargs[k]}
end if
end for
else
--Disallow "case() do() garbage"
if expect_case then
src_error(sprintf_utf("Expected "&COLYEL&"case"&COLRED&" or "&COLYEL&"else"&COLRED&" to follow "&COLYEL&"case(...) do(...)"&COLRED&" but found "&COLYEL&"%s"&COLRED,{thiscmd[CMD_TEXT]}),argpos)
end if
if length(collected)=0 then
--Start collecting into a do. But if it is a do (old syntax), don't double encapsulate it
if equal("do",thiscmd[CMD_TEXT]) then
args&={rawargs[j]}
expect_case=true
else
collected={{"do",argpos},{rawargs[j]}}
end if
else
--Continue collecting
collected[TREE_BRANCHES]&={rawargs[j]}
end if
end if
lastnodename=thiscmd[CMD_TEXT]
end for
if length(collected) then
args&={collected}
end if
if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
--Last case (possibly an else) is new-style syntax and wasn't followed by any statements,
--so add the missing do. Don't need to do this, but I'd like to keep things tidy
--(srcpos is of the last case)
args&={ {{"do",argpos},{}} }
end if
if have_else=false then
--Insert a dummy do default block since else has been left out
args&={ {{"do",pos},{}} }
end if
tree[ptr][TREE_BRANCHES]=args
--delete the do block
return delete_element(tree,ptr+1)
end function
---------------------------------------------------------------------------
--parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements
function normalize_flow_control(sequence tree,sequence vars,sequence parent)
integer ptr
sequence s
sequence nextcmd
srcpos pos
integer argkind
integer var_at
ptr=1
while ptr<=length(tree) do
s=tree[ptr][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr][TREE_TRUNK][CMD_POS]
if compare("if",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf_utf(
COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" for complex conditions"
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf_utf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
if ptr1 then
src_error(sprintf_utf(
COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" for complex conditions"
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf_utf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
if ptr4 then
src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(tree[ptr][TREE_BRANCHES])}),tree[ptr][TREE_BRANCHES][5][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=3 then
--append default step value
tree[ptr][TREE_BRANCHES]&={ {{"1",tree[ptr][TREE_TRUNK][CMD_POS]},{}} }
end if
argkind=what_kind(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK],vars,true)
if argkind=KIND_LOCAL then
--translate into a numeric reference to a variable
used_locals=append(used_locals,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT])
var_at=find_in_column(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],vars,CMD_TEXT)
tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at})
elsif argkind=KIND_GLOBAL then
--warn, then translate into a numeric reference to a variable
src_warn(sprintf_utf(
"Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop"
,{tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]}
),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS])
var_at=find(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME])
tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]})
else
--only variables allowed as the first argument of a "for"
src_error(
sprintf_utf("first argument of "&COLYEL&"for"&COLRED&" statement must be a variable, not %s "&COLYEL&"%s"&COLRED,{
KIND_LONGNAMES[argkind]
,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]
})
,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS]
)
end if
if ptr1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have only one."
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf_utf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit script"&COLRED,{}),pos)
end if
elsif compare("break",s)=0 or compare("continue",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one."
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
--append default value
tree[ptr][TREE_BRANCHES]={{{"1",tree[ptr][TREE_TRUNK][CMD_POS]},{}}}
end if
elsif compare("exitscript",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>0 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit returning"&COLRED
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS])
end if
elsif compare("switch",s)=0 then
tree=build_switch_node(tree,vars,ptr)
elsif compare("case",s)=0 then
src_error(COLYEL&"case"&COLRED&" is not allowed outside of "&COLYEL&"switch"&COLRED,pos)
end if
tree[ptr][TREE_BRANCHES]=normalize_flow_control(tree[ptr][TREE_BRANCHES],vars,tree[ptr][TREE_TRUNK][CMD_TEXT])
ptr+=1
end while
return(tree)
end function
---------------------------------------------------------------------------
--A modification of Dijkstra's shunting-yard algorithm
--(This could probably subsume compile_commands too)
function convert_operators(sequence tree)
sequence new_tree
sequence stack
sequence opnode
integer ptr
integer at
integer priority
integer nonoperator_before
stack={}
new_tree={}
ptr=1
nonoperator_before=false
while ptr<=length(tree) do
--for each peer branch on the tree
at=find_in_column(tree[ptr][TREE_TRUNK][CMD_TEXT],operator_list,PAIR_NAME)
if at>0 then
if nonoperator_before=false then
--there is no room for the before-operand. Bad!
src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its left-side operand (did not expect to see an operator here)",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
elsif ptr=length(tree) then
--there is no room for the after-operand. Bad!
src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its right-side operand",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
end if
priority=operator_list[at][PAIR_NUM]
--convert the operator to its true function name (which might be the same)
tree[ptr][TREE_TRUNK][CMD_TEXT]=operator_list[at][OPER_TRUENAME]
while length(stack) and priority>=stack[$][1] do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
stack&={{priority,tree[ptr]}}
nonoperator_before=false
else
if nonoperator_before then
--Two non-ops in a row means moved onto the new argument in this argument list, so finish processing operators first
while length(stack) do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
end if
tree[ptr][TREE_BRANCHES]=convert_operators(tree[ptr][TREE_BRANCHES])
new_tree&={tree[ptr]}
nonoperator_before=true
end if
ptr+=1
end while
--copy of above
while length(stack) do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
return new_tree
end function
---------------------------------------------------------------------------
function fix_arguments(sequence tree,integer kind,sequence list,sequence vars)
integer at,var_at
integer argkind
integer argnum,maxargs
argnum=length(tree[TREE_BRANCHES])
at=find_in_column(tree[TREE_TRUNK][CMD_TEXT],list,PAIR_NAME)
if equal(list[at][FUNC_ARGS],VAR_ARGS) then
return(tree[TREE_BRANCHES])
else
maxargs=length(list[at][FUNC_ARGS])
end if
if maxargs < argnum then
src_error(sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" takes at most %d arguments but is being passed %d arguments"
,{KIND_LONGNAMES[kind],tree[TREE_TRUNK][CMD_TEXT],maxargs,argnum}
),tree[TREE_BRANCHES][maxargs+1][TREE_TRUNK][CMD_POS])
elsif maxargs > argnum then
--add defaults if not enough args are present
if kind=KIND_MATH then
--special processing for math
if list[at][PAIR_NUM]<16 or list[at][PAIR_NUM]>=19 then
--math shouldnt have defaults
src_error(sprintf_utf(
"math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d"
,{tree[TREE_TRUNK][CMD_TEXT],argnum,maxargs}
),tree[TREE_TRUNK][CMD_POS])
else
--variable assignment commands can have defaults
if argnum = 0 then
--no defaults for first argument of variable function
src_error(sprintf_utf(
"variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1"
,{tree[TREE_TRUNK][CMD_TEXT],argnum}
),tree[TREE_TRUNK][CMD_POS])
elsif argnum = 1 then
--make defaults for second arg of variable function
if list[at][PAIR_NUM]=16 then
--setvariable
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{"0",tree[TREE_TRUNK][CMD_POS]}
,{}
})
else
--increment and decrement
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{"1",tree[TREE_TRUNK][CMD_POS]}
,{}
})
end if
end if
end if
else
--normal processing for script and function
for i=argnum+1 to maxargs do
if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then
src_error(sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d"
,{KIND_LONGNAMES[kind],tree[TREE_TRUNK][CMD_TEXT],i}
),tree[TREE_TRUNK][CMD_POS])
end if
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{sprintf("%d",{list[at][FUNC_ARGS][i]}),tree[TREE_TRUNK][CMD_POS]}
,{}
})
end for
end if
end if
--this is as good a time as any to make sure that var manipulation functions point to real variables
if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<19 then
argkind=what_kind(tree[TREE_BRANCHES][1][TREE_TRUNK],vars,false)
if argkind=KIND_LOCAL then
--its local. translate it to a numeric reference
var_at=find_in_column(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],vars,CMD_TEXT)
tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at})
elsif argkind=KIND_GLOBAL then
--its global. translate it to a numeric reference
var_at=find(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME])
tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]})
else
--its not a variable. bad!
src_error(sprintf_utf(
"first argument of variable manipulation function "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED
,{tree[TREE_TRUNK][CMD_TEXT],KIND_LONGNAMES[argkind],tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]}
),tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_POS])
end if
end if
return(tree[TREE_BRANCHES])
end function
---------------------------------------------------------------------------
function normalize_arguments(sequence tree,sequence vars)
integer kind
--unlike the flow normalization and operator translation we do not insert/delete elements from the current level, so we can use a "for" safely, and dont need a "while"
for i=1 to length(tree) do
kind=what_kind(tree[i][TREE_TRUNK],vars,false)
if kind=KIND_SCRIPT then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,script_list,vars)
elsif kind=KIND_FUNCTION then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,function_list,vars)
elsif kind=KIND_MATH then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,math_list,vars)
end if --number, flow, global, local, parens need no argchecking
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order
function output_word(integer n)
integer b1,b2
b1=and_bits(n,#FF)
b2=floor(and_bits(n,#FFFF)/256)
return({b1,b2})
end function
---------------------------------------------------------------------------
function convert_to_bytes(sequence s)
sequence result
result={}
for i=1 to length(s) do
result&=int_to_bytes(s[i])
end for
return(result)
end function
---------------------------------------------------------------------------
--Encode a unicode (UTF32/native Euphoria) string in the format the engine
--requires, which is currently Latin-1
function encode_ohr(sequence string, integer maxbytes=0)
for i=1 to length(string) do
if string[i]>=256 then
string[i]='?'
end if
end for
if maxbytes and maxbytes7 and equal("$string",str_temp[1..7]) then
value_temp=value(str_temp[8..length(str_temp)])
result&=KIND_NUMBER
result&=table_string(value_temp[2])
else
src_error(sprintf_utf("reference "&COLYEL&"@%s"&COLRED&" could not be resolved: must be the name of a global variable or a script",{str_temp}),tree[TREE_TRUNK][CMD_POS])
end if
end if
end if
elsif kind=KIND_GLOBAL then
at=find(s,global_list[PAIR_NAME])
result&=kind
result&=global_list[PAIR_NUM][at]
elsif kind=KIND_LOCAL then
at=find_in_column(s,vars,CMD_TEXT)
result&=kind
result&=at-1
elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then
if kind=KIND_SCRIPT then
at=find_in_column(s,script_list,PAIR_NAME)
id=script_list[at][PAIR_NUM]
elsif kind=KIND_FUNCTION then
at=find_in_column(s,function_list,PAIR_NAME)
id=function_list[at][PAIR_NUM]
if max_used_function= initial_data_len then
can_reference=false
end if
result[3+i]=recurse_result
end for
--Now either remove or replace the temporary stuff
if can_reference then
compiled_data=compiled_data[1..initial_data_len]
else
compiled_data[initial_data_len+1 .. initial_data_len+length(result)]=result
return(initial_data_len)
end if
else
src_error(sprintf_utf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),tree[TREE_TRUNK][CMD_POS])
end if
if not fast_mode and can_reference then
at=match(result,compiled_data)
if at>0 then
if initial_data_len!=length(compiled_data) then
simple_error("compiler bug! Unexpected compiled_data length")
end if
--found existing data exactly like this command, so just return a reference to it
if at=1 then
simple_error("compiler bug! node matches root node")
end if
return(at-1)
end if
end if
--append data directly
compiled_data&=result
return(initial_data_len)
end function
---------------------------------------------------------------------------
function binary_compile(integer id,sequence tree,sequence vars)
sequence result
integer at
integer recurse_ret
string_table={}
compiled_data={}
--binary data is in a mix of 16-bit and 32-bit signed words.
--the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit.
--output header
--the first word is the zero-rooted byte-offset of the first executable code byte
result=output_word(CODE_START_BYTE_OFFSET)
--the second word is the number of local variables
result&=output_word(length(vars))
--the third word is the number of arguments the script takes (also in SCRIPTS.TXT)
at=find_in_column(id,script_list,PAIR_NUM)
result&=output_word(length(script_list[at][FUNC_ARGS]))
--the fourth word is the format of the command data (presently used to indicate 32-bit encoding)
result&=output_word(SCRIPT_FORMAT_VERSION)
--the fifth&sixth words are a 32bit pointer to the string literal table (in bytes), we don't know it yet
result&=output_word(0)
result&=output_word(0)
--what follows is command data in the format [kindID,Value,argcount,argpointerlist]
--numbers and variables have no argcount or argpointerlist
--an argpointer is the zero-rooted word-offset of the argument relative
--to the start of the executable commands. I realise that this format is
--unnecessarily complicated. I had hoped to get benefits of being able to
--store frequently reused commands only once and then just point to them,
--but in actual practice, it isnt worth the trouble, since the only
--commands that tend to be redundant are the really short ones.
--the first command is always a "do". there can be only one top-level command
if length(tree)!=1 then
simple_error(sprintf_utf("compiler bug! script tree has %s root nodes",{length(tree)}))
end if
reenter_timing_zone("binary_compile_recurse")
recurse_ret=binary_compile_recurse(tree[1],vars)
if recurse_ret!=0 then
simple_error(sprintf_utf("compiler bug! Root binary_compile_recurse call returned %d",{recurse_ret}))
end if
exit_timing_zone()
result&=convert_to_bytes(compiled_data)
compiled_data={}
--append with a table of string literals and give its offset
if length(string_table)>0 then
result[9..12]=int_to_bytes(length(result))
result&=string_table
end if
return(result)
end function
---------------------------------------------------------------------------
--floaty brackets are un-needed after the operators have been translated.
function collapse_floaty_brackets(sequence tree)
integer i
sequence graft
i=1
while i<=length(tree) do
if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then
--found a floaty-bracket
graft=tree[i][TREE_BRANCHES]
if length(graft)!=1 then
src_error(sprintf_utf("Found unexpected pair of brackets containing %d expressions. Either the brackets should not exist at all, or should only enclose a single expression",{length(graft)}),tree[i][TREE_TRUNK][CMD_POS])
end if
tree[i]=graft[1]
--delete_element(tree,i)
--tree=insert_sequence(tree,graft,i)
else
if length(tree[i][TREE_BRANCHES]) then
tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES])
end if
i+=1
end if
end while
return(tree)
end function
---------------------------------------------------------------------------
function sanity_check(sequence tree,sequence vars,sequence parent)
sequence s
sequence kind_and_id
integer kind
atom id
for i=1 to length(tree) do
s=tree[i][TREE_TRUNK][CMD_TEXT]
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if (compare("if",parent)=0) and i=1 then
if kind=KIND_NUMBER then
if id then
src_warn(sprintf_utf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),tree[i][TREE_TRUNK][CMD_POS])
else
src_warn("Condition is always false",tree[i][TREE_TRUNK][CMD_POS])
end if
elsif kind=KIND_FLOW then
src_warn(sprintf_utf("Should not use flow control command "&COLYEL&"%s"&COLRED&" as condition for if",{s}),tree[i][TREE_TRUNK][CMD_POS])
end if
elsif compare("do",parent)=0 or compare("then",parent)=0 or compare("else",parent)=0 then
if kind=KIND_NUMBER then
src_warn(sprintf_utf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here."
,{id}),tree[i][TREE_TRUNK][CMD_POS])
elsif kind=KIND_GLOBAL then
src_warn(sprintf_utf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),tree[i][TREE_TRUNK][CMD_POS])
elsif kind=KIND_LOCAL then
src_warn(sprintf_utf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{vars[id][CMD_TEXT]}),tree[i][TREE_TRUNK][CMD_POS])
elsif kind=KIND_MATH and (id<=15 or id>=19) then
src_warn(sprintf_utf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded"
,{s}),tree[i][TREE_TRUNK][CMD_POS])
end if
end if
if kind=KIND_GLOBAL then
if not find(s,used_globals) then
used_globals=append(used_globals,s)
end if
elsif kind=KIND_LOCAL then
if not find(s,used_locals) then
used_locals=append(used_locals,s)
end if
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT])
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--simulate a result overflowing a signed 32 bit register
function overflow_int32(atom val)
--maybe check whether val is so large that this is could be inaccurate?
atom ret
ret=mod(val,#100000000)
if ret>2147483647 then
ret-=#100000000
end if
return(ret)
end function
---------------------------------------------------------------------------
function optimized_arg(sequence tree,sequence vars)
sequence kind_and_id
integer kind
atom id
object arg1,arg2
kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_NUMBER then
return(id)
elsif kind=KIND_MATH and (id<=15 or id>=19) then
arg1=optimized_arg(tree[TREE_BRANCHES][1],vars)
if equal(arg1,{}) then
arg1=tree[TREE_BRANCHES][1]
end if
if length(tree[TREE_BRANCHES])>=2 then
arg2=optimized_arg(tree[TREE_BRANCHES][2],vars)
else
arg2=0
end if
if equal(arg2,{}) then
arg2=tree[TREE_BRANCHES][2]
end if
if atom(arg1) and atom(arg2) then
if id=0 then
--random
if arg1=arg2 then
return(arg1)
end if
elsif id=1 then
--exponent
if arg2>0 then
--if too large, Euphoria will signal an overflow, but I'm also worried
--about overflow_int32 giving inaccurate results
if log(abs(arg1)+1)+log(abs(arg2)+1)arg2)
elsif id=14 then
--lessthanorequalto
return(arg1<=arg2)
elsif id=15 then
--greaterthanorequalto
return(arg1>=arg2)
elsif id=19 then
--not
return(arg1=0)
elsif id=22 then
--logxor
return((arg1=0)!=(arg2=0))
elsif id=23 then
--abs
return(overflow_int32(abs(arg1)))
elsif id=24 then
--sign
return((arg1>0)-(arg1<0))
elsif id=25 then
--sqrt
if arg1<0 then
src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS])
end if
return(floor(sqrt(arg1)+0.5))
end if
end if
--have to allow for side effects of shortcut evaluating operators by only checking first arg
if atom(arg1) then
if id=4 then
--multiply
if arg1=0 then
return 0
elsif arg1=1 then
return arg2
end if
elsif id=6 then
--add
if arg1=0 then
return arg2
end if
elsif id=20 then
--logand
if arg1=0 then
return 0
end if
elsif id=21 then
--logor
if arg1!=0 then
return 1
end if
end if
end if
if atom(arg2) then
if id=4 then
--multiply
if arg2=0 then
return 0
elsif arg2=1 then
return arg1
end if
elsif id=5 then
--subtract
if arg2=0 then
return arg1
end if
elsif id=6 then
--add
if arg2=0 then
return arg1
end if
end if
end if
end if
return({}) --return nonatom
end function
---------------------------------------------------------------------------
--goes through a script simplifying expressions that always have the same value
function optimize_script(sequence tree,sequence vars)
object arg
for i=1 to length(tree) do
arg=optimized_arg(tree[i],vars)
if atom(arg) then
tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg)
tree[i][TREE_BRANCHES]={}
elsif sequence(arg) and length(arg) then
tree[i]=arg
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
procedure warn_unused_locals(sequence vars)
if find('u',optlist) then
for i=1 to length(vars) do
if not find(vars[i][CMD_TEXT],used_locals) then
src_warn(sprintf_utf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS])
end if
end for
end if
end procedure
---------------------------------------------------------------------------
procedure warn_unused_globals()
integer at
if find('u',optlist) then
for i=1 to length(global_list[PAIR_NUM]) do
at=find(global_list[PAIR_NAME][i],used_globals)
if not at then
src_warn(sprintf_utf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],global_list[PAIR_NUM][i]}),global_list[GLB_POS][i])
end if
end for
end if
end procedure
---------------------------------------------------------------------------
procedure compile_a_script(integer id,sequence trigger_data,sequence name_data,sequence arg_data,sequence script_data)
sequence script_tree
sequence local_vars
sequence binary
integer trigger
trigger=trigger_list[find(trigger_data[CMD_TEXT],column(trigger_list,PAIR_NAME))][PAIR_NUM]
current_script=name_data[CMD_TEXT]
local_vars=arg_data --start with argument names (so we can check for conflicts)
reenter_timing_zone("gather_local_vars")
local_vars=gather_local_vars(local_vars,script_data)
exit_timing_zone()
used_locals={}
reenter_timing_zone("compile_commands")
script_tree=compile_commands(script_data,local_vars)
exit_timing_zone()
reenter_timing_zone("convert_operators")
script_tree=convert_operators(script_tree)
exit_timing_zone()
reenter_timing_zone("expand_macros")
script_tree=expand_macros(script_tree)
exit_timing_zone()
reenter_timing_zone("normalize_flow_control")
script_tree=normalize_flow_control(script_tree,local_vars,"")
exit_timing_zone()
reenter_timing_zone("normalize_arguments")
script_tree=normalize_arguments(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("collapse_floaty_brackets")
script_tree=collapse_floaty_brackets(script_tree)
exit_timing_zone()
if not fast_mode then
reenter_timing_zone("optimize_script")
script_tree=optimize_script(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("sanity_check")
script_tree=sanity_check(script_tree,local_vars,"")
exit_timing_zone()
warn_unused_locals(local_vars)
end if
reenter_timing_zone("binary_compile")
binary=binary_compile(id,script_tree,local_vars)
exit_timing_zone()
all_scripts=append(all_scripts,{
id --id
,trigger --trigger type
,name_data[CMD_TEXT] --name
,name_data[CMD_POS] --source line
,script_tree
,local_vars
,binary --compiled data to go into the HSZ lumps
})
current_script=""
end procedure
---------------------------------------------------------------------------
--Parse argument declarations in a script argument list
function process_arglist(sequence args)
sequence arglist
sequence defaults
integer i
integer using_defaults
arglist={}
defaults={}
using_defaults=false
i=1
while i<=length(args) do
if compare("=",args[i][CMD_TEXT])=0 then
src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS])
end if
check_undefined_string(args[i],"argument name")
if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then
src_error(sprintf_utf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
if ilength(args) then
src_error(sprintf_utf("Syntax error: expected default value for script argument "&COLYEL&"%s"&COLRED,{args[i][CMD_TEXT]}),args[i+1][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults&=try_string_to_number({enforce_constants(args[i+2][CMD_TEXT]),args[i+2][CMD_POS]})
using_defaults=true
i+=3
else
if using_defaults=true then
--any argument with a default must be followed with arguments with defaults
src_error(sprintf_utf("Default value missing for argument "&COLYEL&"%s"&COLRED&": the previous argument had a default value, so all following ones must too",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults=append(defaults,NO_DEFAULT)
i+=1
end if
end while
return({arglist,defaults})
end function
---------------------------------------------------------------------------
--Check that every declared script has been defined, with the same number of arguments,
--and each script appears exactly once. Basically, compare script_list and script_cmd
procedure check_script_declarations()
integer at
sequence args_and_defaults
sequence scripts_found
sequence src_position
scripts_found=repeat(false,length(script_list))
for i=1 to length(script_cmd) do
at=find(script_cmd[i][2][CMD_TEXT],column(script_list,PAIR_NAME))
current_script=script_cmd[i][2][CMD_TEXT]
args_and_defaults=process_arglist(script_cmd[i][3])
if not at then
--define script optional, add to script list
script_list=create_function(script_list,-1,script_cmd[i][2][CMD_TEXT],args_and_defaults[2],RESERVE_SCRIPT,script_cmd[i][2][CMD_POS])
scripts_found&={true}
else
if scripts_found[at]=true then
src_position=decode_srcpos(script_list[at][FUNC_POS])
src_error(
sprintf_utf(
"Script "&COLYEL&"%s"&COLRED&" appears more than once! It was previously defined on line %d of "&COLPNK&"%s"&COLRED
,{script_cmd[i][2][CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]}
)
,script_cmd[i][2][CMD_POS]
)
end if
scripts_found[at]=true
--The srcpos stored for this script will be from definescript. Repoint it at the actual script
--(doesn't really matter, all_scripts would have ended up with that srcpos anyway)
script_list[at][FUNC_POS]=script_cmd[i][2][CMD_POS]
if length(script_cmd[i][3]) != length(script_list[at][FUNC_ARGS]) then
src_error(
sprintf_utf(
"Script "&COLYEL&"%s"&COLRED&" has %d arguments named, but has %d arguments in its declaration"
,{script_cmd[i][2][CMD_TEXT],length(script_cmd[i][3]),length(script_list[at][FUNC_ARGS])}
)
,script_cmd[i][2][CMD_POS]
)
end if
for j=1 to length(script_list[at][FUNC_ARGS]) do
if not equal(args_and_defaults[2][j],NO_DEFAULT) and not equal(args_and_defaults[2][j],script_list[at][FUNC_ARGS][j]) then
src_warn(
sprintf_utf(
"Default value for argument "&COLYEL&"%s"&COLRED&" does not match default given in script definition, %d"
,{script_cmd[i][3][j][CMD_TEXT],script_list[at][FUNC_ARGS][j]}
)
,script_cmd[i][2][CMD_POS]
)
end if
end for
end if
script_cmd[i][3]=args_and_defaults[1]
end for
current_script=""
for i=1 to length(scripts_found) do
if scripts_found[i]=false then
src_error(
sprintf_utf("Script "&COLYEL&"%s"&COLRED&" was declared but is missing",{script_list[i][PAIR_NAME]})
,script_list[i][FUNC_POS]
)
end if
end for
end procedure
---------------------------------------------------------------------------
procedure compile_each_script()
sequence count
integer at
enter_timing_zone("Compiling scripts")
color_print("compiling scripts",{})
count=repeat(0,length(file_list))
enter_timing_zone("check_script_declarations")
check_script_declarations()
exit_timing_zone()
for i=1 to length(script_cmd) do
at=find(script_cmd[i][2][CMD_TEXT],column(script_list,PAIR_NAME))
-- color_print("%s\n",{script_cmd[i][2][CMD_TEXT]})
reenter_timing_zone("compile_a_script")
cur_script_num=i
compile_a_script(
script_list[at][PAIR_NUM] --ID
,script_cmd[i][1] --trigger
,script_cmd[i][2] --Name
,script_cmd[i][3] --argnames
,script_cmd[i][4] --data
)
cur_script_num=0
exit_timing_zone()
count[srcpos_file_number(script_cmd[i][2][CMD_POS])]+=1
color_print(".",{})
end for
color_print("\n",{})
if not fast_mode then
warn_unused_globals()
end if
for i=1 to length(count) do
if count[i] then
wrap_print("compiled %d scripts from "&COLBWHI&"%s"&COLWHI&"\n",{count[i],file_list[i][FILE_NAME]})
end if
end for
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
function generate_scripts_dot_txt()
sequence result
integer print_scripts
result=""
print_scripts = 0
if find('a',optlist) then
print_scripts = 2
elsif find('s',optlist) then
print_scripts = 1
end if
for i=1 to length(script_list) do
result&=encode_ohr(sprintf_utf("%s\r\n%d\r\n%d\r\n",{script_list[i][PAIR_NAME],script_list[i][PAIR_NUM],length(script_list[i][FUNC_ARGS])}))
for j=1 to length(script_list[i][FUNC_ARGS]) do
if equal(script_list[i][FUNC_ARGS][j],NO_DEFAULT) then
result&="0\r\n"
else
result&=sprintf("%d\r\n",{script_list[i][FUNC_ARGS][j]})
end if
end for
if (print_scripts=1 and script_list[i][PAIR_NUM]2048 then
simple_warn(sprintf_utf("Maximum function id was %d, but will not export function names above id 2047",{records-1}))
records=2048
end if
--header size in bytes (offset to location table)
result=output_word(6)
--file format version
result&=output_word(0)
--number of records in location table
result&=output_word(records)
offset=length(result)+records*2
id=0
i=1
encoded_names={}
while i<=length(function_list) do
if function_list[i][PAIR_NUM]0 then
temp=without_extension(fname) --you can't slice a function result directly! :(
fname=sprintf("%s~%d.%s",{temp[1..$-1],suffix,extension_only(fname)})
end if
--color_print("writing %d %s\n",{i,fname})
if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then
simple_error("unable to write a script source lump")
end if
end for
end procedure
---------------------------------------------------------------------------
procedure write_output_file()
sequence header
integer fh
object lh
enter_timing_zone("Writing output file")
header=sprintf("HamsterSpeak\n%s\n%d\n%s\n%d\n%d\n",{COMPILER_VERSION & COMPILER_SUB_VERSION,HSP_FORMAT_VERSION,plotscr_version,SCRIPT_FORMAT_VERSION,max_used_function})
if length(all_scripts) then
fh=open(dest_file,"wb")
if fh!=-1 then
wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
--write header and version
if write_lump(fh,"HS",header)=false then
simple_error("unable to write header")
end if
--write script index (old file)
if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then
simple_error("unable to write script index")
end if
--write script index (new file)
if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then
simple_error("unable to write binary script index")
end if
--write script commands list
if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then
simple_error("unable to write commands listing")
end if
--write each script
for i=1 to length(all_scripts) do
if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][1]}),all_scripts[i][7])=false then
simple_error(sprintf_utf("unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][3]}))
end if
end for
if not find('n',optlist) then --no debug info
--write copy of scripts
wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
lh=begin_lump(fh,"source.lumped")
if equal(lh,false) then
simple_error("unable to write scripts source file")
end if
write_script_files(fh)
if end_lump(lh)=false then
simple_error("unable to finish write of scripts source file")
end if
end if
close(fh)
else
simple_error(sprintf_utf("attempt to open"&COLYEL&"%s"&COLRED&" failed",{dest_file}))
end if
else
color_print("no scripts to output\n",{})
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
init()
load_and_lex()
show_source_info()
primary_parse_pass()
parse_top_level()
compile_each_script()
dump_debug_report()
write_output_file()
run_time=time()-start_time
color_print("done (%g seconds)\n",{run_time})
if find('t',optlist) then
print_timing_data()
end if
opt_wait_for_key()
if was_warnings = true then
abort(2)
end if