-- HamsterSpeak Compiler
--(C) Copyright 2002 James Paige and Hamster Republic Productions
-- Please read LICENSE.txt for GPL License details and disclaimer of liability
-- This is a compiler for HamsterSpeak scripts used for plotscripting in
-- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com
-- This code is written in Euphoria 3.1. You can get the Free Open-Source
-- Euphoria compiler from http://RapidEuphoria.com . I also highly recommend
-- David Cuny's EE editor which you can download from the same site.
---------------------------------------------------------------------------
--Changelog
--3Ma 2010-10-21 Correctly strip \r to fix newlines ... again.
--3M 2010-05-02 abs, sign, sqrt; fix error reporting
--3L 2010-04-29 tracevalue support, involving a few internal changes
--3Ke 2010-04-18 Add -t commandline option
--3Kd 2010-04-14 Quite a few bugfixes in optimizer
--3Kc 2010-04-14 Start checking well-formedness of numbers with lexer
--3Kb 2010-04-12 Whole lot of bugfixes in script declarations and
-- checking for unreserved identifier names
--3Ka 2010-04-12 Throw error if source file contains binary
--3K 2010-04-12 Error column reporting (introducing srcpos); lump
-- original source files into source.lumped
--3Ja 2010-04-07 Wrote a (nearly) proper lexer
--3J 2009-12-05 Multiple function names may now alias the same id
--3I 2009-07-27 Export a backup copy of full source to source.txt lump
-- disable with -n option
--3H 2009-06-18 Export function names to commands.bin lump
--3Gk 2009-05-25 Fix short references crash
--3Gj 2009-01-27 Show future-compatability warning when using square
-- brackets [] and curly brackets {} in names
--3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake
--3Gh 2008-07-14 Disable warnings for unused variables by default, and
-- Add -u option to enable warnings for unused variables.
--3Gg 2008-03-01 Raise global variable limits
--3Gf 2008-02-11 Give user another chance on missing Y key :P
--3Ge 2007-07-01 String conversion rewrite to allow more whitespace
--3Gd 2007-07-01 Allow constants for new syntax default args
--3Gc 2007-05-29 Handle empty paratheses correctly, and use
-- lowercase ".hs" extension
--3Gb 2007-05-09 Throw error on missing include file
--3Ga 2007-05-09 String length not limited to 40
--3G 2007-02-09 Offset to string literal tables given as a 32-bit
-- int, script format version++ to 2.
--3F 2006-12-11 Support for variable number of arguments to functions
-- by giving -1 as the number of args in the define block
--3Ec 2006-11-30 while(true) no longer produces a warning
--3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes
--3Ea 2006-09-03 Actually export triggers in scripts.bin
--3E 2006-08-30 "define trigger" support
--3D 2006-08-16 String literal lookup tables added to end of
-- HSZ lumps replacing setstring + appendstring
--3C 2006-08-15 New syntax for giving default script arguments,
-- and for leaving out definescript
--3B 2006-08-13 HSX lumps renamed to HSZ
--3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format
-- version added to HSX header. Current is 1
--2Na 2006-07-07 Minor update to add new logical operators
--2N 2006-07-07 Added not() logic function
--2M 2006-07-06 Set exit code on warnings
--2L 2006-05-13 Added switch statement (+ case keyword)
--2K 2006-05-01 Added @scriptname and @globalvariable syntax to
-- return script or global ID number at compile-time
-- not run-time (for use with "run script by ID" and
-- "read global" and "write global")
--2J 2006-04-10 Added break, continue, exitscript, exitreturning
-- flow statements. Also fixed some return bugs
--2I 2006-04-04 Extended HSX header to include number of arguments
-- to a script, to really fix arguments-overflow-into
-- -locals bug
--2H 2006-03-29 Display better help and wait for keypress when run
-- by double-clicking the icon. Added -k command line
-- option to skip waiting for keypress
--2G 2005-10-03 Additional operators $+ and $=
-- Mention GPL in help text
--2F 2005-07-24 Strings implemented:
-- $id="..." -> setstring
-- $id+"..." -> appendstring
--2E 2005-02-15 Changed license to GPL
-- Added += and -= operators thanks to a patch from
-- The Mad Cacti and Fyrewulff
--2D 2002-08-03 Only a small change, strip out \r from lines of
-- the script as we read them to avoid errors related
-- to busted newlines
--2C 2002-03-05 Fixed some bugs that could cause crashes when
-- non-printable characters exist in the input file.
-- (naturally the script will not compile right, but
-- at least it will not crash)
--2B 2001-06-06 Added := as a commaless separater so it can be
-- defined as an operator
--2A 2001-05-04 Fixed -w command line option when used with -z
--2 First Release
---------------------------------------------------------------------------
without warning --to avoid annoying warnings
without type_check --for a small speed boost
--with profile_time --time profiling
include hsspiffy.e --various routines, sequence manipulation - James Paige
include graphics.e --standard library, needed for color output
include machine.e --needed for int_to_bytes
---------------------------------------------------------------------------
--constants--
constant false=0
constant true=1
-- increment COMPILER_VERSION only for major language-altering changes
constant COMPILER_VERSION=3
-- For small changes increment the second letter of COMPILER_SUB_VERSION
-- For large changes increment the first letter and reset the second letter
--*Please make sure this is 2 chararacters long! Append a space if required.*
constant COMPILER_SUB_VERSION="Ma"
constant COPYRIGHT_DATE="2002"
--these constants are color-flags.
constant COLYEL=239 + YELLOW
constant COLRED=239 + RED
constant COLPNK=239 + BRIGHT_RED
constant COLWHI=239 + WHITE
constant COLBWHI=239 + BRIGHT_WHITE
constant LEX_CHAR=0
constant LEX_SPACE=1
constant LEX_BEGIN=2
constant LEX_END=3
constant LEX_KEYWORD=4
constant LEX_COMMENT=5
constant LEX_STRING=6
constant LEX_COMMA=7
constant LEX_WARN=8
constant LEX_BINARY=9
constant LEX_DIGIT=10
constant LEX_KEYWORD2=11
constant FILE_NAME=1
constant FILE_TEXT=2
constant FILE_LINE_EXTENTS=3
constant POS_FILENAME=1
constant POS_LINE=2
constant POS_COLUMN=3
constant POS_TEXT=4
constant CMD_TEXT=1
constant CMD_POS=2
constant RESERVE_CORE=1
constant RESERVE_UNIMPLEMENTED=2
constant RESERVE_FLOW=3
constant RESERVE_FUNCTION=4
constant RESERVE_SCRIPT=5
constant RESERVE_GLOBAL=6
constant RESERVE_BEGIN=7
constant RESERVE_END=8
constant RESERVE_OPERATOR=9
constant RESERVE_BUILTIN=10
constant RESERVE_CONSTANT=11
constant RESERVE_MACRO=12
constant RESERVE_NAMES={"declaration"
,"unimplemented keyword/operator"
,"flow control"
,"hardcoded function"
,"user script"
,"global variable"
,"bracket "&COLYEL&"("&COLRED
,"bracket "&COLYEL&")"&COLRED
,"operator"
,"builtin command"
,"constant"
,"hardcoded function/macro"
}
constant PAIR_NUM=1
constant PAIR_NAME=2
constant PAIR_START=1
constant PAIR_END=2
constant OPER_TRUENAME=3
constant OPER_POS=4
constant FUNC_ARGS=3
constant FUNC_POS=4
constant GLB_POS=3
constant CONST_VALUE=1
constant CONST_POS=2
constant NO_DEFAULT={} --when appears in script_list[FUNC_ARGS], indicates arg with no default value
constant VAR_ARGS=0 --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args
constant KIND_NUMBER=1
constant KIND_FLOW=2
constant KIND_GLOBAL=3
constant KIND_LOCAL=4
constant KIND_MATH=5
constant KIND_FUNCTION=6
constant KIND_SCRIPT=7
constant KIND_REFERENCE=8 --converted to KIND_NUMBER in compiled script
constant KIND_OPERATOR=9 --never appears in compiled script
constant KIND_PARENS=10 --never appears in compiled script
constant KIND_MACRO=11 --never appears in compiled script
constant KIND_LONGNAMES={"number"
,"flow control statement"
,"global variable"
,"local variable"
,"built-in function"
,"hard-coded function"
,"script"
,"reference"
,"untranslated operator"
,"order-of-operations-enforcing parenthesis"
,"hard-coded function/macro"
}
constant TREE_TRUNK=1
constant TREE_BRANCHES=2
constant TIMING_DEPTH=1
constant TIMING_DESCRIPTION=2
constant TIMING_TIME=3
constant TIMING_ACTIVE=4
constant CODE_START_BYTE_OFFSET=12
constant SCRIPT_FORMAT_VERSION=2
---------------------------------------------------------------------------
--globals-- --initializations--
sequence compiler_dir compiler_dir=""
sequence source_file source_file=""
sequence dest_file dest_file=""
sequence optlist optlist={}
sequence file_list file_list={}
integer total_lines total_lines=0
sequence cmd cmd={}
sequence constant_list constant_list=alpha_tree_create()
sequence trigger_list trigger_list={}
sequence operator_list operator_list={}
sequence function_list function_list={}
sequence global_list global_list={{},{},{}}
sequence string_list string_list={}
sequence script_list script_list={}
sequence script_cmd script_cmd={}
sequence reserved reserved=alpha_tree_create()
atom start_time start_time=time()
atom run_time run_time=0
sequence timing_data timing_data={}
integer timing_depth timing_depth=-1
integer get_cmd_pointer get_cmd_pointer=0
integer autonumber_id autonumber_id=32767
sequence flow_list flow_list={
{0,"do"}
,{1,"begin"}
,{2,"end"}
,{3,"return"}
,{4,"if"}
,{5,"then"}
,{6,"else"}
,{7,"for"}
,{10,"while"}
,{11,"break"}
,{12,"continue"}
,{13,"exitscript"}
,{14,"exitreturning"}
,{15,"switch"}
,{16,"case"} --never appears in compiled script
}
sequence math_list math_list={
{0,"random",{0,1}}
,{1,"exponent",{0,2}}
,{2,"modulus",{0,1}}
,{3,"divide",{0,1}}
,{4,"multiply",{0,0}}
,{5,"subtract",{0,0}}
,{6,"add",{0,0}}
,{7,"xor",{0,0}}
,{8,"or",{0,0}}
,{9,"and",{0,0}}
,{10,"equal",{0,0}}
,{11,"notequal",{0,0}}
,{12,"lessthan",{0,0}}
,{13,"greaterthan",{0,0}}
,{14,"lessthanorequalto",{0,0}}
,{15,"greaterthanorequalto",{0,0}}
,{16,"setvariable",{0,0}}
,{17,"increment",{0,1}}
,{18,"decrement",{0,1}}
,{19,"not",{0}}
,{20,"logand",{0,0}}
,{21,"logor",{0,0}}
,{22,"logxor",{0,0}}
,{23,"abs",{0}}
,{24,"sign",{0}}
,{25,"sqrt",{0}}
}
sequence separator_list separator_list={
"+=","-=","$+","$=","$","+","--","/","*","^^","^"
,"==","<>",">>","<<","<=",">=",":=","=","&&","||"
}
sequence lexer_table lexer_table={}
sequence lexer_table_key2 lexer_table_key2={}
sequence all_scripts all_scripts={}
sequence current_script current_script="" --name of current script
integer cur_script_num cur_script_num=0 --index into script_cmd of current script
integer colors_enabled colors_enabled=true
integer error_file error_file=false
sequence used_globals used_globals={}
sequence used_locals used_locals={}
sequence string_table string_table={}
integer fast_mode fast_mode=false
integer end_anchor_kludge end_anchor_kludge=false
integer was_warnings was_warnings=false
---------------------------------------------------------------------------
--types--
--A position in the script source. A (large) integer, where
--the bottom 22 bits store the character number in the file, 1-based
--(where newlines are one character) and higher bits the file index
--in file_list minus one.
--See decode_srcpos
type srcpos(atom pos)
integer fileno
fileno=floor(pos/power(2,23))+1
--need to allow 0 as a dummy value
if pos=0 or (pos>0 and pos=floor(pos) and fileno<=length(file_list)
and and_bits(pos,power(2,23)-1)<=length(file_list[fileno][FILE_TEXT])) then
--odd, or and and only seem to shortcut when used as if condition, not
--in a general expression
return true
else
return false
end if
end type
---------------------------------------------------------------------------
--time spent waiting for a user-keypress shouldnt count
function timeless_wait_key()
atom skip_time
integer key
skip_time=time()
key=wait_key()
skip_time=time()-skip_time
start_time+=skip_time
for i=1 to length(timing_data) do
if timing_data[i][TIMING_ACTIVE] then
timing_data[i][TIMING_TIME]+=skip_time
end if
end for
return(key)
end function
---------------------------------------------------------------------------
--prints a string with printf to stdout converting color codes
procedure color_print(sequence s,sequence printf_args)
sequence buffer
s=sprintf(s,printf_args)
buffer=""
for i=1 to length(s) do
if s[i]<=254 and s[i]>=239 then
puts(stdout,buffer)
buffer=""
if colors_enabled then
text_color(s[i]-239)
end if
else
buffer&=s[i]
end if
end for
if length(buffer) then
puts(stdout,buffer)
end if
end procedure
---------------------------------------------------------------------------
procedure opt_wait_for_key()
integer key
if not find('k',optlist) then
color_print("[Press Any Key]\n",{})
key=timeless_wait_key()
end if
end procedure
---------------------------------------------------------------------------
procedure enter_timing_zone(sequence description)
if not find('t',optlist) then
return
end if
timing_depth+=1
timing_data=append(timing_data,{timing_depth,description,-time(),true})
end procedure
---------------------------------------------------------------------------
procedure reenter_timing_zone(sequence description)
if not find('t',optlist) then
return
end if
for i=1 to length(timing_data) do
if equal(timing_data[i][TIMING_DESCRIPTION],description) then
timing_depth+=1
timing_data[i][TIMING_DEPTH]=timing_depth --could change...
timing_data[i][TIMING_TIME]-=time()
timing_data[i][TIMING_ACTIVE]=true
return
end if
end for
enter_timing_zone(description)
end procedure
---------------------------------------------------------------------------
procedure exit_timing_zone()
if not find('t',optlist) then
return
end if
for i=length(timing_data) to 1 by -1 do
--there can only be one active timing job at each depth at a time
if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then
timing_data[i][TIMING_TIME]+=time()
timing_data[i][TIMING_ACTIVE]=false
exit
end if
end for
timing_depth-=1
end procedure
---------------------------------------------------------------------------
procedure print_timing_data()
sequence indent
sequence data
if run_time=0 then
--so that all percentages show as 0%
run_time=1e100
end if
for i=1 to length(timing_data) do
data=timing_data[i]
indent=repeat(' ',data[TIMING_DEPTH]*4)
color_print("%s "&COLBWHI&"%.2f"&COLWHI&"s %4.2g%% %s\n",{indent,data[TIMING_TIME],100*data[TIMING_TIME]/run_time,data[TIMING_DESCRIPTION]})
end for
end procedure
---------------------------------------------------------------------------
function html_char_convert(sequence s)
sequence buffer
sequence result
result=""
buffer=""
for i=1 to length(s) do
if s[i]=' ' and i>1 then
if s[i-1]=' ' then
buffer&=" "
else
buffer&=s[i]
end if
elsif s[i]='<' then
buffer&="<"
elsif s[i]='>' then
buffer&=">"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
function error_string_convert(sequence s)
sequence buffer
sequence result
result=""
buffer=""
for i=1 to length(s) do
if s[i]<=254 and s[i]>=239 then
if s[i]=COLYEL then
buffer&=""
elsif s[i]=COLRED then
buffer&=""
elsif s[i]=COLPNK then
buffer&=""
elsif s[i]=COLWHI then
buffer&=""
elsif s[i]=COLBWHI then
buffer&=""
end if
elsif s[i]='\n' then
buffer&="
\n"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
--prints a long string wrapped at 80 columns
procedure wrap_print(sequence s,sequence arguments)
sequence outstring
s=sprintf(s,arguments)
while length(s) do
outstring=before_wrap_point(s)
s=after_wrap_point(s)
color_print("%s\n",{outstring})
end while
end procedure
---------------------------------------------------------------------------
procedure error_file_print(sequence s)
integer fh
if error_file then
fh=open(compiler_dir&"hs_error.htm","a")
if fh!=failure then
puts(fh,error_string_convert(s)&"\n")
close(fh)
end if
end if
end procedure
---------------------------------------------------------------------------
--prints out warning message in red with word wrap
procedure simple_warn(sequence s)
sequence pos
if not find('w',optlist) then
--do not warn if -w is set
pos=get_position()
if pos[2]>1 then
printf(stdout,"\n",{})
end if
wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s})
error_file_print(sprintf("WARNING: %s",{html_char_convert(s)}))
was_warnings = true
end if
end procedure
---------------------------------------------------------------------------
--prints out an error message in red with word wrap, then aborts
procedure simple_error(sequence s)
sequence pos
pos=get_position()
if pos[2]>1 then
printf(stdout,"\n",{})
end if
wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s})
error_file_print(sprintf("ERROR: %s",{html_char_convert(s)}))
if end_anchor_kludge then
error_file_print("\n
\n")
end if
opt_wait_for_key()
abort(1)
end procedure
---------------------------------------------------------------------------
--prints out the copyright info, usage info, and command-line options
procedure check_arg_count(sequence args)
if length(args)=2 then
wrap_print("HamsterSpeak semicompiler v%d%s (C)%s James Paige&Hamster Republic Productions\n",{COMPILER_VERSION,COMPILER_SUB_VERSION,COPYRIGHT_DATE})
wrap_print("Please read LICENSE.txt for GPL License details and disclaimer of liability",{})
wrap_print(COLYEL&"%s [-acdfknstwuyz] source.hss [dest.hs]"&COLWHI&"\n\n",{hs_upper(file_only(args[2]))})
color_print(" -c colors will be disabled\n",{})
color_print(" -d dump debug report to hs_debug.txt\n",{})
color_print(" -f fast mode. disables some optimization\n",{})
color_print(" -k do not wait for a keypress when finished\n",{})
color_print(" -n don't add debug info (including copy of the source code)\n",{})
color_print(" -s print the name of each script as it is compiled\n",{})
color_print(" -a same as -s, but including autonumbered scripts\n",{})
color_print(" -t print detailed timing info (for HSpeak developers)\n",{})
color_print(" -w suppress minor warnings\n",{})
color_print(" -u show warnings for unused variables\n",{})
color_print(" -y overwrite the destination file without asking\n",{})
color_print(" -z write error messages to hs_error.htm\n",{})
wrap_print("\nFor more info about Hamsterspeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{})
wrap_print("\nThis is a command-line program. You should either run it from the command-line (DOS prompt) or you should drag and drop your script file onto it.\n",{})
opt_wait_for_key()
abort(0)
end if
end procedure
---------------------------------------------------------------------------
--build the lexer tables, which for each ASCII character i, contains the type
--of that character in entry i+1 (have to handle NUL)
procedure init_lexer()
sequence table_insert
--main table
table_insert={" (),\n#\t\"[]{}",
{LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_COMMA,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN}}
--by default each character is either binary, or an allowed identifier
--character. This is for backwards compatibility
lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,256-32)
for i=1 to length(table_insert[1]) do
lexer_table[table_insert[1][i]+1]=table_insert[2][i]
end for
for i=0 to 9 do
lexer_table['0'+i+1]=LEX_DIGIT
end for
--add the one-character prefix of each operator
for i=1 to length(separator_list) do
lexer_table[separator_list[i][1]+1]=LEX_KEYWORD
end for
--keyword suffix parsing table
lexer_table_key2=repeat(LEX_CHAR,256)
lexer_table_key2[' '+1]=LEX_SPACE
lexer_table_key2['\t'+1]=LEX_SPACE
--2nd character of each operator
for i=1 to length(separator_list) do
if length(separator_list[i])=2 then
lexer_table_key2[separator_list[i][2]+1]=LEX_KEYWORD2
end if
end for
--when floating point is added will want a table for number parsing
end procedure
---------------------------------------------------------------------------
--initializes global variables, and generally gets things ready to roll
procedure init()
sequence args
integer index
integer key
integer fh
args=command_line()
compiler_dir=path_only(args[2])
check_arg_count(args)
optlist={}
index=3
while index<=length(args) do
if args[index][1]='-' then
optlist=optlist&hs_lower(args[index][2..length(args[index])])
args=delete_element(args,index)
else
index+=1
end if
end while
check_arg_count(args)
source_file=normalize_filename(args[3])
if length(args)>3 then
dest_file=normalize_filename(args[4])
else
dest_file=normalize_filename(alter_extension(source_file,"hs"))
end if
if find('f',optlist) then
fast_mode=true
color_print("Using fast mode. Some size optimization disabled\n",{})
end if
if find('c',optlist) then
colors_enabled=false
end if
--the semi-undocumented command line argument -z writes a file called
--hs_error.htm formatted for HssEd to read
if find('z',optlist) then
error_file=true
if file_exists(compiler_dir&"hs_error.htm") then
fh=open(compiler_dir&"hs_error.htm","w")
if fh!=failure then
puts(fh,"")
close(fh)
end if
end if
end if
wrap_print("Semicompiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file})
if file_exists(dest_file) then
if find('y',optlist) then
--found the -y command line arg, overwrite automatically
key='y'
else
while true do
--prompt the user to overwrite
wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file})
key=timeless_wait_key()
color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key})
if hs_lower(key)='y' then
exit
elsif hs_lower(key)='n' then
simple_error("output file overwrite cancelled by user")
else
wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)})
end if
end while
end if
end if
--why the alpha-tree? because the reserved-word list can get HUGE.
--we want to be able to look up words in it quickly. A btree or some such
--thing would have been even better, but thats alot of trouble :)
--FIXME: variable is missing! It doesn't fit in any of the categories
reserved=alpha_tree_mass_insert(reserved,{
{"defineconstant",RESERVE_CORE}
,{"definetrigger" ,RESERVE_CORE}
,{"defineoperator",RESERVE_CORE}
,{"globalvariable",RESERVE_CORE}
,{"definefunction",RESERVE_CORE}
,{"definescript" ,RESERVE_CORE}
,{"include" ,RESERVE_CORE} --should never appear, in theory
,{"do" ,RESERVE_FLOW}
,{"begin" ,RESERVE_BEGIN}
,{"end" ,RESERVE_END}
,{"return" ,RESERVE_FLOW}
,{"if" ,RESERVE_FLOW}
,{"then" ,RESERVE_FLOW}
,{"else" ,RESERVE_FLOW}
,{"for" ,RESERVE_FLOW}
,{"cfor" ,RESERVE_UNIMPLEMENTED}
,{"foreach" ,RESERVE_UNIMPLEMENTED}
,{"while" ,RESERVE_FLOW}
,{"break" ,RESERVE_FLOW}
,{"continue" ,RESERVE_FLOW}
,{"exitscript" ,RESERVE_FLOW}
,{"exitreturning" ,RESERVE_FLOW}
,{"switch" ,RESERVE_FLOW}
,{"case" ,RESERVE_FLOW}
,{"=" ,RESERVE_UNIMPLEMENTED}
,{"tracevalue" ,RESERVE_MACRO}
})--end mass_insert
for i=1 to length(math_list) do
reserved=alpha_tree_insert(reserved,math_list[i][PAIR_NAME],RESERVE_BUILTIN)
end for
init_lexer()
end procedure
---------------------------------------------------------------------------
function srcpos_file_number(srcpos pos)
return(floor(pos/power(2,23))+1)
end function
---------------------------------------------------------------------------
--returns 1-based character number in file
function srcpos_point(srcpos pos)
return(and_bits(pos,power(2,23)-1))
end function
---------------------------------------------------------------------------
--possibly slow: prehaps avoid heavy use
--returns {file name, line number, column number, line text}: index with POS_*
--column number returned is 0-based
function decode_srcpos(srcpos pos)
integer point, filenum
integer lineno
sequence filetext
sequence lines
point=and_bits(pos,power(2,23)-1)
filenum=floor(pos/power(2,23))+1
filetext=file_list[filenum][FILE_TEXT]
lines=file_list[filenum][FILE_LINE_EXTENTS]
if point>length(filetext) then
simple_error(sprintf("compiler bug: invalid source position %d in file of length %d",{point,length(filetext)}))
end if
lineno=length(lines)
for i=1 to length(lines) do
if point<=lines[i][PAIR_END] then
lineno=i
exit
end if
end for
return({file_list[filenum][FILE_NAME],lineno,point-lines[lineno][PAIR_START],filetext[lines[lineno][PAIR_START]..lines[lineno][PAIR_END]]})
end function
---------------------------------------------------------------------------
function form_error_text(sequence s,srcpos pos)
sequence line
sequence src_position
sequence column_display
integer tab_compensate
src_position=decode_srcpos(pos)
--?pos
--pretty_print(1,src_position,{2})
line=substring_replace(src_position[POS_TEXT],"\t"," ")
tab_compensate=3*count('\t',src_position[POS_TEXT][1..src_position[POS_COLUMN]])
column_display=repeat(' ',src_position[POS_COLUMN]+tab_compensate)
error_file_print(sprintf("\n",{src_position[POS_FILENAME],src_position[POS_LINE]}))
if length(current_script) then
return(
sprintf(
"in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s^\n"
&COLRED&"%s\n"
,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s}
)
)
else
return(
sprintf(
"in line %d of "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s^\n"
&COLRED&"%s\n"
,{src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s}
)
)
end if
--note that the calling procedure must send/deal with closing tags to error_file_print
end function
---------------------------------------------------------------------------
procedure src_warn(sequence s,srcpos pos)
if not find('w',optlist) then
--do not warn if -w is set
simple_warn(form_error_text(s,pos))
error_file_print("\n
\n")
end if
end procedure
---------------------------------------------------------------------------
procedure src_error(sequence s,srcpos pos)
end_anchor_kludge=true
simple_error(form_error_text(s,pos))
end procedure
---------------------------------------------------------------------------
procedure load_source(sequence filename,sequence reading_how,srcpos pos)
integer fh
object line
sequence file_text
sequence line_extents -- {start of line, end of line} pairs
fh=open(filename,"r")
if fh!=failure then
wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename})
file_text={}
line_extents={}
line=gets(fh)
while sequence(line) do
line=exclude(line,"\n\r")
line_extents=append(line_extents,{length(file_text)+1,length(file_text)+length(line)})
file_text&=line
file_text&='\n'
line=gets(fh)
end while
close(fh)
total_lines+=length(line_extents)
file_list=append(file_list,{filename,file_text,line_extents})
else
if pos then
src_error(sprintf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos)
else
simple_error(sprintf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}))
end if
end if
end procedure
---------------------------------------------------------------------------
procedure load_include(sequence include_name,srcpos pos)
reenter_timing_zone("load_source/include")
--try current directory
if file_exists(include_name) then
load_source(include_name,"including",pos)
else
--try source directory
if file_exists(path_only(source_file)&include_name) then
load_source(normalize_filename(path_only(source_file)&include_name),"including",pos)
else
--try compiler_directory
if file_exists(compiler_dir&include_name) then
load_source(normalize_filename(compiler_dir&include_name),"including",pos)
else
--give up
load_source(include_name,"including",pos)
end if
end if
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure show_source_info()
if total_lines then
wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)})
else
simple_error("no data to compile\n")
end if
if length(file_list)>512 then
simple_warn("You have included over 512 files! In-game error messages will only report line numbers for errors in the first 512 files.")
end if
end procedure
---------------------------------------------------------------------------
procedure lexer_binary_error(srcpos pos)
--better use src_error, to track down garbage if accidentally creeping in
src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos)
end procedure
---------------------------------------------------------------------------
--reads and processes the escape codes in a string and returns it enclosed in quote marks
function lexer_read_string(sequence s,integer i,srcpos pos)
sequence string
integer escaping
escaping=false
string="\""
while true do
if i>length(s) then --did not find a closing " so not a valid string
src_error("Expected \" at end of line to end string (multi-line strings are not supported)",pos+length(s))
end if
if escaping then
if s[i]='"' or s[i]='\\' then
string&=s[i]
else
--invalid sequence
string&='\\'
string&=s[i]
end if
escaping=false
else
if s[i]='"' then
return({i,string&'"'})
elsif s[i]='\\' then
escaping=true
else
string&=s[i]
end if
end if
if lexer_table[s[i]+1]=LEX_BINARY then
lexer_binary_error(pos+i)
end if
i+=1
end while
end function
---------------------------------------------------------------------------
function lexer_read_number(sequence s,integer i,srcpos pos)
integer state
atom val
integer sign
sequence text
integer starti
starti=i
if s[i]='-' then
text="-"
i+=1
sign=-1
else
text=""
sign=1
end if
val=0
while i<=length(s) do
state=lexer_table[s[i]+1]
--color_print("lexing %d=%s state %d\n",{s[i],{s[i]},state})
if state=LEX_DIGIT then
text&=s[i]
val=val*10+s[i]-'0'
elsif state=LEX_CHAR or state=LEX_WARN or state=LEX_STRING then
if s[i]='.' then
src_error("Floating point values are not yet implemented",pos+i)
end if
src_error(
sprintf("Expected "&COLYEL&"%s"&COLRED&" to be followed by "&COLYEL&","&COLRED&" or "&COLYEL&"("&COLRED&" or "&COLYEL&")"&COLRED&" or an operator"
,{text})
,pos+i
)
elsif state=LEX_SPACE then --yes, allowed in numbers too
else -- state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_KEYWORD or state=LEX_COMMENT or state=LEX_BINARY
--backtrack one character
--known bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead
i-=1
exit
end if
i+=1
end while
if not int32(sign*val) then
src_error(sprintf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32 bit signed integer. Integers must be between -2147483648 and 2147483647",{text}),pos+starti)
end if
return({i,text})
end function
---------------------------------------------------------------------------
function lex_line(sequence s,integer i,integer stop,srcpos pos)
integer state
sequence tokens
sequence ident --identifier text being built up
sequence temp
sequence masked
integer found
integer textstart --the column at which this identifier starts, otherwise 0
sequence remem_ident
integer remem_textstart
integer nonnumeral --whether we are definitely inside an identifier, ie. a digit doesn't indicate a number
ident=""
nonnumeral=false
textstart=0
tokens={}
while i<=stop do
state=lexer_table[s[i]+1]
--color_print("lexing %d=%s state %d\n",{s[i],{s[i]},state})
if state=LEX_CHAR then
ident=ident & hs_lower(s[i])
if textstart=0 then
textstart=i
end if
nonnumeral=true
elsif state=LEX_SPACE then
elsif state=LEX_DIGIT then
if nonnumeral then
ident=ident & s[i]
else
if length(ident) then --either ident is "-" or it is ""
i=textstart
ident=""
textstart=0
end if
temp=lexer_read_number(s,i,pos)
tokens=append(tokens,{temp[2],pos+i})
i=temp[1]
--nonnumeral remains false
end if
elsif state=LEX_WARN then
ident=ident & hs_lower(s[i])
if textstart=0 then
textstart=i
end if
nonnumeral=true
src_warn(sprintf("FUTURE COMPATIBILITY PROBLEM: "&COLYEL&"%s"&COLRED&" should not be used in names!", {s[i]}), pos+i)
else
remem_ident=ident --for backtracking
remem_textstart=textstart
if length(ident) then
tokens=append(tokens,{ident,pos+textstart})
end if
ident=""
nonnumeral=false
textstart=0
if state=LEX_BEGIN then
tokens=append(tokens,{"begin",pos+i})
elsif state=LEX_END then
tokens=append(tokens,{"end",pos+i})
elsif state=LEX_COMMA then
elsif state=LEX_COMMENT then
exit
elsif state=LEX_STRING then
temp=lexer_read_string(s,i+1,pos)
tokens=append(tokens,{temp[2],pos+i})
i=temp[1]
elsif state=LEX_KEYWORD then
--the following mess is in order to allow whitespace in the middle of separators
textstart=i
masked=s[i..i]
--we only support length 1 and 2 separators
while i length(s)
found=false
for j=1 to length(separator_list) do
if match(separator_list[j],masked)=1 then
tokens=append(tokens,{separator_list[j],pos+textstart})
if length(separator_list[j])=1 then
i=textstart
end if
found=true
exit
end if
end for
if found=false then
--need to backtrack, in particular for -
ident=remem_ident & masked[1]
i=textstart
if length(remem_ident) then
tokens=tokens[1..$-1]
textstart=remem_textstart
nonnumeral=true
else
--nonnumeral remains false: we've seen exactly "-"
end if
else
textstart=0
end if
elsif state=LEX_BINARY then
lexer_binary_error(pos+i)
end if
end if
i+=1
end while
if length(ident) then
tokens=append(tokens,{ident,pos+textstart})
end if
return(tokens)
end function
---------------------------------------------------------------------------
--this also checks for rogue strings and $'s
function translate_plotstrings(sequence tokens)
integer state
integer start_token
integer i
sequence string_func
sequence string_token
srcpos pos
state=0
i=1
while i<=length(tokens) do
if compare(tokens[i][CMD_TEXT],"$")=0 then
pos=tokens[i][CMD_POS]
if state=0 then
state=1
start_token=i
else
--jump to error throw
state=1
exit
end if
end if
if state=2 then
if tokens[i][CMD_TEXT][1]='"' then
string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1])
string_token={sprintf("@$string%d",{length(string_list)}),pos}
tokens=tokens[1..start_token-1]&{string_func,{"begin",pos}}&tokens[start_token+1..i-2]&{string_token,{"end",pos}}&tokens[i+1..$]
state=0 --may have multiple strings on one line
else --we saw a + (or illegal =) inside the string number expression
state=1
end if
else
if tokens[i][CMD_TEXT][1]='"' then
src_error(
sprintf(
"The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
{shorten_string(tokens[i][CMD_TEXT],15)}),
tokens[i][CMD_POS]
)
end if
end if
if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then
string_func={"setstringfromtable",pos}
state=2
elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then
string_func={"appendstringfromtable",pos}
state=2
end if
i+=1
end while
if state!=0 then
--we've seen a surplus $, not right
src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos)
end if
return(tokens)
end function
---------------------------------------------------------------------------
--Given a command tree, get the original text from the file
--OK, this function is horribly complicated just so tracevalue can print
--"hero X (me)" instead of "herox(me)". It could be implemented much simpler if
--macros were expanded in the lexer, but then compile_commands and
--convert_macros would have to be as well
function tree_original_text(sequence tree)
sequence script_toks
sequence subtree
srcpos leftmost, rightmost
integer left_at, right_at
integer filenum
integer left_point, right_point
integer depth
sequence tokens
sequence text
script_toks=script_cmd[cur_script_num][4]
--find leftmost and rightmost tokens making up the expression, not including brackets
leftmost=tree[TREE_TRUNK][CMD_POS]
subtree=tree
while length(subtree[TREE_BRANCHES]) do
subtree=subtree[TREE_BRANCHES][1]
if subtree[TREE_TRUNK][CMD_POS]2 then
src_error(sprintf("Found garbage "&COLYEL&"%s"&COLRED&" after name of file to include. Try enclosing the filename in \"quote marks\"?",{s[3][CMD_TEXT]}),s[3][CMD_POS])
end if
if s[at+1][CMD_TEXT][1]='"' then --each element of s guaranteed nonzero length
--is a string
return(s[at+1][CMD_TEXT][2..$-1])
end if
--can we throw a warning if quote marks should have been used?
return(s[at+1][CMD_TEXT])
end if
return("")
end function
---------------------------------------------------------------------------
procedure read_commands(integer file_num,sequence include_stack)
sequence broken
sequence include_name
sequence lines
integer include_num
srcpos pos
pos=(file_num-1)*power(2,23)
include_stack=append(include_stack,file_list[file_num][FILE_NAME])
lines=file_list[file_num][FILE_LINE_EXTENTS]
for i=1 to length(lines) do
broken=lex_line(file_list[file_num][FILE_TEXT],lines[i][PAIR_START],lines[i][PAIR_END],pos)
include_name=seek_include(broken)
if length(include_name) then
if find(include_name,include_stack) then
src_error(sprintf("File "&COLPNK&"%s"&COLRED&" is being included recursively",{include_name}),broken[1][CMD_POS])
end if
include_num=find(include_name,column(file_list,FILE_NAME))
if include_num then
src_warn(sprintf("File "&COLPNK&"%s"&COLRED&" is being included multiple times",{include_name}),broken[1][CMD_POS])
else
load_include(include_name,broken[2][CMD_POS])
include_num=length(file_list)
end if
read_commands(include_num,include_stack)
else
--this is probably not the best place to do this translation, but for now we simply emulate old string translation
--this also checks for stray strings and $'s
cmd&=translate_plotstrings(broken)
end if
end for
end procedure
---------------------------------------------------------------------------
procedure load_and_lex()
enter_timing_zone("Loading & lexing files")
--load the first file
reenter_timing_zone("load_source/include")
load_source(source_file,"reading",0)
exit_timing_zone()
--lex while loading all included files
read_commands(1,{})
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
--Warning: you normally want to use check_undefined_string instead of this
procedure check_for_reserved(sequence s,srcpos pos,sequence expect)
if alpha_tree_seek(reserved,s) then
if compare("top-level declaration",expect)=0 then
src_error(
sprintf(
"Expected %s, but found %s "&COLYEL&"%s"&COLRED&". Perhaps there is an extra "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" earlier in the file"
,{expect,RESERVE_NAMES[alpha_tree_data(reserved,s,0)],s}
)
,pos
)
elsif compare("user script name",expect)=0 then
src_error(sprintf("Expected %s, but found %s "&COLYEL&"%s"&COLRED,{expect,RESERVE_NAMES[alpha_tree_data(reserved,s,0)],s}),pos)
else
src_error(sprintf("Expected %s, but found %s "&COLYEL&"%s"&COLRED,{expect,RESERVE_NAMES[alpha_tree_data(reserved,s,0)],s}),pos)
end if
end if
end procedure
---------------------------------------------------------------------------
procedure mustnt_be_a_number(sequence s)
--note that identifiers are allowed to start with -
if length(exclude(s[CMD_TEXT],"-0123456789"))=0 and count('-',s[CMD_TEXT])!=length(s[CMD_TEXT]) then
src_error(sprintf("Expected a name, but found a number "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS])
end if
end procedure
---------------------------------------------------------------------------
procedure check_undefined_constant(sequence s,atom value)
sequence const_data
sequence src_position
sequence err_string
sequence whats_happening
if alpha_tree_seek(constant_list,s[CMD_TEXT]) then
--constant is already defined
const_data=alpha_tree_data(constant_list,s[CMD_TEXT],0)
src_position=decode_srcpos(const_data[CONST_POS])
if const_data[CONST_VALUE]=value then
whats_happening="will be ignored because"
else
whats_happening="is being redefined;"
end if
err_string=sprintf("Constant "&COLYEL&"%s"&COLRED&" %s it is already defined in line %d of "&COLPNK&"%s"&COLRED&" with the value "&COLYEL&"%d"&COLRED
,{
s[CMD_TEXT]
,whats_happening
,src_position[POS_LINE]
,src_position[POS_FILENAME]
,const_data[CONST_VALUE]
}
)
if const_data[CONST_VALUE]=value then
src_warn(err_string,s[CMD_POS])
else
src_error(err_string,s[CMD_POS])
end if
--if it's in constant_list, then it's also in reserved.
else
check_for_reserved(s[CMD_TEXT],s[CMD_POS],"constant name")
end if
mustnt_be_a_number(s)
end procedure
---------------------------------------------------------------------------
procedure check_undefined_string(sequence s,sequence seeking)
check_for_reserved(s[CMD_TEXT],s[CMD_POS],seeking)
mustnt_be_a_number(s)
end procedure
---------------------------------------------------------------------------
function force_16_bit(atom n,srcpos pos)
if n>32767 then
src_warn(sprintf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos)
n=32767
elsif n<-32768 then
src_warn(sprintf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos)
n=-32768
end if
return(n)
end function
---------------------------------------------------------------------------
function try_string_to_number(sequence s)
atom result
result=floor(string_to_object(s[CMD_TEXT],0))
if not string_is_int32(s[CMD_TEXT]) then
src_error(sprintf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS])
end if
return(result)
end function
---------------------------------------------------------------------------
function enforce_constants(sequence s)
--enforces both constants
object v
v=alpha_tree_data(constant_list,s,{})
if length(v) then
v=v[CONST_VALUE]
if int32(v) then
return(sprintf("%d",{v}))
end if
end if
return(s)
end function
---------------------------------------------------------------------------
function get_cmd()
sequence result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=enforce_constants(result[CMD_TEXT])
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_no_constants()
sequence result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=result[CMD_TEXT]
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_block(integer convert_constants)
sequence this
sequence result
result={}
this=get_cmd()
if compare("begin",this[CMD_TEXT])!=0 then
src_error(sprintf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS])
end if
while true do
if convert_constants then
this=get_cmd()
else
this=get_cmd_no_constants()
end if
if compare("begin",this[CMD_TEXT])=0 then
src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS])
elsif compare("end",this[CMD_TEXT])=0 then
exit--break out of the while
else
result=append(result,this)
end if
end while
return(result)
end function
---------------------------------------------------------------------------
procedure parse_constant_block(sequence block)
atom num
sequence name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
check_undefined_constant(block[i+1],num)
name=block[i+1][CMD_TEXT]
constant_list=alpha_tree_insert(constant_list,name,{num,block[i+1][CMD_POS]})
reserved=alpha_tree_insert(reserved,name,RESERVE_CONSTANT)
end for
end procedure
---------------------------------------------------------------------------
procedure parse_trigger_block(sequence block)
integer num
sequence name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name but script trigger definition block ended",block[i][CMD_POS])
end if
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
check_undefined_string(block[i+1],"script trigger name")
name=block[i+1][CMD_TEXT]
trigger_list=append(trigger_list,{num,name})
reserved=alpha_tree_insert(reserved,name,RESERVE_CORE)
end for
end procedure
---------------------------------------------------------------------------
procedure create_global(integer id,sequence name,srcpos pos)
integer at
at=find(id,global_list[PAIR_NUM])
if at then
src_error(sprintf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos)
else
if id>=0 and id<=4095 then
global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id)
global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name)
global_list[GLB_POS]=append(global_list[GLB_POS],pos)
reserved=alpha_tree_insert(reserved,name,RESERVE_GLOBAL)
else
src_error(sprintf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to 4095",{id}),pos)
end if
end if
end procedure
---------------------------------------------------------------------------
procedure parse_global_block(sequence block)
integer num
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("expected name but globalvariable block ended",block[i][CMD_POS])
end if
num=try_string_to_number(block[i])
check_undefined_string(block[i+1],"global variable name")
create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS])
end for
end procedure
---------------------------------------------------------------------------
procedure parse_operator_block(sequence block)
integer num
sequence name,true
for i=1 to length(block) by 3 do
if i+2>length(block) then
src_error("expected name but defineoperator block ended",block[i][CMD_POS])
end if
num=try_string_to_number(block[i])
mustnt_be_a_number(block[i+1])
mustnt_be_a_number(block[i+2])
name=block[i+1][CMD_TEXT]
true=block[i+2][CMD_TEXT]
operator_list=append(operator_list,{num,name,true,block[i+2][CMD_POS]})
reserved=alpha_tree_insert(reserved,name,RESERVE_OPERATOR)
end for
end procedure
---------------------------------------------------------------------------
--arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to
--VAR_ARGS indicating a variable (unlimited) number of arguments.
function create_function(sequence list,integer id,sequence name,object arglist,integer func_type,srcpos pos)
integer at
if func_type=RESERVE_SCRIPT then
at=find(id,column(list,PAIR_NUM))
if at then
src_error(sprintf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{RESERVE_NAMES[func_type],id,list[at][PAIR_NAME]}),pos)
end if
if id=0 then
src_error(sprintf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos)
elsif id<0 then
id=autonumber_id
autonumber_id-=1
end if
end if
list=append(list,{id,name,arglist,pos})
reserved=alpha_tree_insert(reserved,name,func_type)
return(list)
end function
---------------------------------------------------------------------------
function parse_define_block(sequence block,sequence list,integer func_type)
integer num
sequence name
integer args
sequence arglist
srcpos name_pos
integer i
i=1
while i<=length(block) do
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
if i+1>length(block) then
src_error(sprintf("expected %s name but define block ended",{RESERVE_NAMES[func_type]}),block[i][CMD_POS])
else
i+=1
check_undefined_string(block[i],RESERVE_NAMES[func_type]&" name")
name=block[i][CMD_TEXT]
name_pos=block[i][CMD_POS]
if i+1>length(block) then
src_error("expected argument count but define block ended",block[i][CMD_POS])
else
i+=1
args=try_string_to_number(block[i])
arglist={}
if args<0 then
list=create_function(list,num,name,VAR_ARGS,func_type,name_pos)
else
for j=1 to args do
if i+1>length(block) then
src_error("expected argument default but define block ended",block[i][CMD_POS])
else
i+=1
arglist=append(arglist,try_string_to_number(block[i]))
end if
end for
list=create_function(list,num,name,arglist,func_type,name_pos)
end if
i+=1
end if
end if
end while
return(list)
end function
---------------------------------------------------------------------------
procedure primary_parse_pass()
sequence this
enter_timing_zone("Preliminary pass")
color_print("preliminary pass\n",{})
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if compare("defineconstant",this[CMD_TEXT])=0 then
parse_constant_block(get_cmd_block(false))
end if
if compare("definetrigger",this[CMD_TEXT])=0 then
parse_trigger_block(get_cmd_block(false))
end if
end while
if find("script",column(trigger_list,PAIR_NUM))=0 then
trigger_list=append(trigger_list,{0,"script"})
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure parse_script(sequence trigger)
sequence name
sequence arglist
sequence s
sequence this
sequence err_string
integer depth
integer temp
name=get_cmd()
if not find(name[CMD_TEXT],column(script_list,PAIR_NAME)) then
--Doesn't appear in a definescript block; if it did the name is already reserved
--Note that we won't add it to script_list yet; that happens later in check_script_declarations
check_undefined_string(name,"user script name")
end if
current_script=name[CMD_TEXT]
arglist={}
while true do
if get_cmd_pointer>length(cmd) then
src_error(sprintf("script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]}),name[CMD_POS])
end if
this=get_cmd()
if compare("begin",this[CMD_TEXT])=0 then
exit--break the while
end if
--Note that arglist isn't the actual list of arguments, it's just a string of tokens right now.
--The actual arglist is built in process_arglist, while compiling each script.
arglist=append(arglist,this)
end while
--every script is nested inside a big fat do() block
s={{"do",this[CMD_POS]}}
depth=0
while true do
s=append(s,this)
if compare("end",this[CMD_TEXT])=0 then
depth-=1
if depth=0 then
exit--break while
end if
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
else
temp=alpha_tree_data(reserved,this[CMD_TEXT],3)
if temp<=RESERVE_UNIMPLEMENTED then
err_string=sprintf(
"%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",{RESERVE_NAMES[alpha_tree_data(reserved,this[CMD_TEXT],0)],this[CMD_TEXT]}
)
if temp=RESERVE_CORE then
err_string&=sprintf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]})
end if
src_error(err_string,this[CMD_POS])
end if
end if
if get_cmd_pointer>length(cmd) then
src_error(
sprintf(
"script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED
,{name[CMD_TEXT]}
)
,name[CMD_POS]
)
end if
this=get_cmd()
end while
script_cmd=append(script_cmd,{trigger,name,arglist,s})
current_script=""
end procedure
---------------------------------------------------------------------------
procedure parse_top_level()
sequence this
sequence ignore
sequence triggers
enter_timing_zone("Top level pass")
color_print("parsing top-level\n",{})
triggers=column(trigger_list,PAIR_NAME)
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if compare("defineconstant",this[CMD_TEXT])=0 then
ignore=get_cmd_block(true)
elsif compare("definetrigger",this[CMD_TEXT])=0 then
ignore=get_cmd_block(true)
elsif compare("globalvariable",this[CMD_TEXT])=0 then
parse_global_block(get_cmd_block(true))
elsif compare("defineoperator",this[CMD_TEXT])=0 then
parse_operator_block(get_cmd_block(true))
elsif compare("definefunction",this[CMD_TEXT])=0 then
function_list=parse_define_block(get_cmd_block(true),function_list,RESERVE_FUNCTION)
elsif compare("definescript",this[CMD_TEXT])=0 then
script_list=parse_define_block(get_cmd_block(true),script_list,RESERVE_SCRIPT)
elsif find(this[CMD_TEXT],triggers)>0 then
parse_script(this)
else
check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration")
src_error(
sprintf(
"Expected top-level declaration but found "&COLYEL&"%s"&COLRED
,{this[CMD_TEXT]}
)
,this[CMD_POS]
)
end if
end while
cmd={}
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure dump_script_and_function_info(integer fh,sequence list)
sequence this
sequence id_string
sequence src_position
for i=1 to length(list) do
this=list[i]
if this[PAIR_NUM]>autonumber_id then
id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]})
else
id_string=sprintf("ID=%d",{this[PAIR_NUM]})
end if
src_position=decode_srcpos(this[FUNC_POS])
printf(fh,"%s %d\t%s\t%s(",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,id_string
,this[PAIR_NAME]
})
if equal(this[FUNC_ARGS],VAR_ARGS) then
printf(fh,"VARIABLE ARGS",{})
else
for j=1 to length(this[FUNC_ARGS]) do
if j>1 then
printf(fh,",",{})
end if
if equal(this[FUNC_ARGS][j],NO_DEFAULT) then
printf(fh,"NONE",{})
else
printf(fh,"%d",{this[FUNC_ARGS][j]})
end if
end for
end if
printf(fh,")\n",{})
end for
end procedure
---------------------------------------------------------------------------
function seek_string_by_id(integer id,sequence list,sequence name)
integer at
at=find(id,column(list,PAIR_NUM))
if at then
return(list[at][PAIR_NAME])
else
simple_error(sprintf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id}))
end if
return("")
end function
---------------------------------------------------------------------------
function name_lookup(sequence pair)
integer at
if pair[1]=KIND_NUMBER then
return(sprintf("%d",{pair[2]}))
elsif pair[1]=KIND_LOCAL then
return(sprintf("local%d",{pair[2]}))
elsif pair[1]=KIND_GLOBAL then
at=find(pair[2],global_list[PAIR_NUM])
if at then
return(global_list[PAIR_NAME][at])
else
simple_error(sprintf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]}))
end if
elsif pair[1]=KIND_FLOW then
return(seek_string_by_id(pair[2],flow_list,"flow control structure"))
elsif pair[1]=KIND_SCRIPT then
return(seek_string_by_id(pair[2],script_list,"user script"))
elsif pair[1]=KIND_FUNCTION then
return(seek_string_by_id(pair[2],function_list,"hardcoded function"))
elsif pair[1]=KIND_MATH then
return(seek_string_by_id(pair[2],math_list,"built-in function"))
else
simple_error(sprintf("decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]}))
end if
end function
---------------------------------------------------------------------------
function binstring_to_int(sequence encoded)
--bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles
atom temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000
if and_bits(temp,#80000000) then
return(temp-#100000000)
end if
return(temp)
end function
---------------------------------------------------------------------------
function dump_script_binary(sequence bin,integer offset,integer depth)
sequence result
sequence kind_and_id
integer kind
integer argcount
integer new_offset
result=""
kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])}
kind=kind_and_id[1]
result&=sprintf("%s%s",{
repeat(' ',depth)--indent
,name_lookup(kind_and_id)
})
if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then
argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11])
if argcount then
result&="(\n"
for i=0 to argcount-1 do
new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3])
result&=dump_script_binary(bin,new_offset,depth+2)
end for
result&=repeat(' ',depth)&")\n"
else
result&="()\n"
end if
else
result&="\n"
end if
return(result)
end function
---------------------------------------------------------------------------
function dump_script_tree(sequence tree,integer depth)
sequence result
result=""
for i=1 to length(tree) do
result&=sprintf("%s%s",{
repeat(' ',depth)--indent
,tree[i][TREE_TRUNK][CMD_TEXT]
})
if length(tree[i][TREE_BRANCHES])>0 then
result&="(\n"
result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2)
result&=repeat(' ',depth)&")\n"
else
result&="\n"
end if
end for
return(result)
end function
---------------------------------------------------------------------------
function dump_script_strings(sequence bin)
sequence result
integer table_start
integer offset
integer len
result=""
table_start=binstring_to_int(bin[9..12])
if table_start=0 then
return("")
end if
bin=bin[table_start+1..$]
offset=1
while offset1 then
debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt")
else
debug_file="hs_debug.txt"
end if
fh=open(debug_file,"w")
if fh!=failure then
wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file})
-------------------------------------
printf(fh,"[Scripts]\n",{})
dump_script_and_function_info(fh,script_list)
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Global Variables]\n",{})
for i=1 to length(global_list[PAIR_NUM]) do
src_position=decode_srcpos(global_list[GLB_POS][i])
printf(fh,"%s %d\tID=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,global_list[PAIR_NUM][i]
,global_list[PAIR_NAME][i]
})
end for
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Builtin Functions]\n",{})
dump_script_and_function_info(fh,function_list)
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Operators]\n",{})
for i=1 to length(operator_list) do
src_position=decode_srcpos(operator_list[i][OPER_POS])
printf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,operator_list[i][PAIR_NAME]
,operator_list[i][OPER_TRUENAME]
,operator_list[i][PAIR_NUM]
})
end for
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Script Dumps]\n",{})
for i=1 to length(all_scripts) do
src_position=decode_srcpos(all_scripts[i][4])
printf(fh,"%s %d\tID=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[i][1]
,all_scripts[i][3]
})
src_position=decode_srcpos(script_cmd[i][1][CMD_POS])
printf(fh,"%s %d\tTrigger=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[i][2]
,script_cmd[i][1][CMD_TEXT]
})
for j=1 to length(all_scripts[i][6]) do
src_position=decode_srcpos(all_scripts[i][6][j][CMD_POS])
printf(fh,"%s %d\tvar=%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[i][6][j][CMD_TEXT]
})
end for
printf(fh,"%d bytes compiled\n",{length(all_scripts[i][7])})
-- printf(fh,"%s\n\n",{dump_script_tree(all_scripts[i][5],0)})
printf(fh,"%s",{dump_script_binary(all_scripts[i][7][CODE_START_BYTE_OFFSET+1..$],0,0)})
printf(fh,"%s\n\n",{dump_script_strings(all_scripts[i][7])})
end for
printf(fh,"\n",{})
-------------------------------------
close(fh)
else
wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file})
end if
end if
end procedure
---------------------------------------------------------------------------
function get_cmd_depth(integer ptr,sequence data,integer depth)
sequence result
sequence this
result={}
while true do
--if get_key()=27 then abort(1/0) end if
if ptr>length(data) then
src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
depth-=1
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
end if
if depth=0 then
exit --break out of the while
else
result=append(result,this)
end if
end while
return({ptr,result})
end function
---------------------------------------------------------------------------
--identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis
function what_kind_and_id(sequence command,sequence local_vars)
integer kind
atom id
integer keyword
sequence s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_int32(s) then
kind=KIND_NUMBER
id=string_to_object(s,{})
elsif length(s) and s[1] = '@' then
kind=KIND_REFERENCE
id=0 -- ID always resolves to 0 for references here, since it is too early to know all
-- script IDs. The real work is done in binary_compile_recurse
elsif find(s,column(local_vars,CMD_TEXT)) then
kind=KIND_LOCAL
id=find(s,column(local_vars,CMD_TEXT))
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
id=global_list[PAIR_NUM][find(s,global_list[PAIR_NAME])]
elsif keyword=RESERVE_FLOW then
kind=KIND_FLOW
id=flow_list[find(s,column(flow_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
id=function_list[find(s,column(function_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
id=script_list[find(s,column(script_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
id=math_list[find(s,column(math_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_MACRO then
kind=KIND_MACRO
id=0
else
src_error(sprintf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
return({kind,id})
end function
---------------------------------------------------------------------------
--identify the kind of a text command
function what_kind(sequence command,sequence local_vars, integer look_for_operators)
integer kind
integer keyword
sequence s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_int32(s) then
kind=KIND_NUMBER
elsif length(s) and s[1] = '@' then
kind=KIND_REFERENCE
elsif find(s,column(local_vars,CMD_TEXT)) then
kind=KIND_LOCAL
elsif length(s)=0 then
kind=KIND_PARENS
elsif look_for_operators and find(s,column(operator_list,PAIR_NAME)) then
kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
elsif keyword=RESERVE_FLOW or keyword=RESERVE_BEGIN or keyword=RESERVE_END then
kind=KIND_FLOW
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
elsif keyword=RESERVE_MACRO then
kind=KIND_MACRO
else
src_error(sprintf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
return(kind)
end function
---------------------------------------------------------------------------
--this function not used anywhere
function how_many_args(sequence name,integer kind)
integer result
integer at
if kind=KIND_PARENS then
result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P
elsif kind=KIND_FLOW then
result=-1 --flow supports an unknown number of args
elsif kind=KIND_OPERATOR then
result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions
elsif kind=KIND_MATH then
at=find(name[CMD_TEXT],column(math_list,PAIR_NAME))
result=length(math_list[at][FUNC_ARGS])
elsif kind=KIND_FUNCTION then
at=find(name[CMD_TEXT],column(function_list,PAIR_NAME))
if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then
result=-1
else
result=length(function_list[at][FUNC_ARGS])
end if
elsif kind=KIND_SCRIPT then
at=find(name[CMD_TEXT],column(script_list,PAIR_NAME))
result=length(script_list[at][FUNC_ARGS])
else
--numbers, variables, etc do not permit args
result=0
end if
return(result)
end function
---------------------------------------------------------------------------
function takes_args(integer kind)
if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO then
return(true)
else
return(false)
end if
end function
---------------------------------------------------------------------------
function get_script_cmd(integer ptr,sequence data,sequence vars)
sequence command
sequence this
sequence after
integer kind
after={}
command=data[ptr]
ptr+=1
if compare("end",command[CMD_TEXT])=0 then
src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS])
elsif compare("begin",command[CMD_TEXT])=0 then
--floaty brackets for order-of-operations-enforcement
ptr-=1
command[CMD_TEXT]=""
elsif compare("variable",command[CMD_TEXT])=0 then
--must ignore variable declaration
if ptr<=length(data) then
--there is room for args
ptr+=1--only increment the pointer when we have args
after=get_cmd_depth(ptr,data,1)
ptr=after[1] --this is a hack, because we cannot say {n,n}=func()
end if
--non-command
return({ptr,{}})
end if
if ptr<=length(data) then
--there is room for args
kind=what_kind(command,vars,true)
--distinguishing between functions with and without args means wait() would be ok but noop() would not
if takes_args(kind) then
this=data[ptr]
if compare("begin",this[CMD_TEXT])=0 then
--yes, it has args
ptr+=1--only increment the pointer when we have args
after=get_cmd_depth(ptr,data,1)
ptr=after[1] --this is a hack, because we cannot say {n,n}=func()
after=after[2]
if length(after)=0 and kind=KIND_PARENS then
src_error("found empty parentheses not associated with a function call",command[CMD_POS])
end if
end if
end if
end if
return({ptr,{command,after}})
end function
---------------------------------------------------------------------------
function compile_commands(sequence script_data,sequence vars)
integer ptr
sequence this
sequence command
sequence result
result={}
ptr=1
while true do
this=get_script_cmd(ptr,script_data,vars)
ptr=this[1]
command=this[2]
if length(command)>0 then
if length(command[2])>0 then
--this command has arguments that need parsing
command[2]=compile_commands(command[2],vars)
end if
result=append(result,command)
end if
if ptr>length(script_data) then
exit --break out of while when there is no more data
end if
end while
return(result)
end function
---------------------------------------------------------------------------
function gather_local_vars(sequence vars,sequence data)
sequence this
integer at
integer ptr
sequence src_position
ptr=1
while true do
this=data[ptr]
ptr+=1
if compare("variable",this[CMD_TEXT])=0 then
if ptr>length(data) then
src_error(sprintf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("begin",this[CMD_TEXT])=0 then
while true do
if ptr>length(data) then
src_error(sprintf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
exit--break the while
end if
check_undefined_string(this,"local variable name")
at=find(this[CMD_TEXT],column(vars,CMD_TEXT))
if at then
src_position=decode_srcpos(vars[at][CMD_POS])
src_error(
sprintf(
"local variable/argument "&COLYEL&"%s"&COLRED&" is already defined in line %d of "&COLPNK&"%s"&COLRED,
{this[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]}
)
,this[CMD_POS]
)
else
vars=append(vars,this)
end if
end while
else
src_error(sprintf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
end if
if ptr>length(data) then
exit --break out of the while
end if
end while
return(vars)
end function
---------------------------------------------------------------------------
--there is only one, hardcoded, macro right now: tracevalue
function expand_macros(sequence tree)
sequence newargs
sequence string_token
srcpos pos
for ptr=1 to length(tree) do
if equal("tracevalue",tree[ptr][TREE_TRUNK][CMD_TEXT]) then
newargs={}
tree[ptr][TREE_TRUNK][CMD_TEXT]="tracevalueinternal"
pos=tree[ptr][TREE_TRUNK][CMD_POS]
for i=1 to length(tree[ptr][TREE_BRANCHES]) do
string_list=append(string_list,tree_original_text(tree[ptr][TREE_BRANCHES][i]))
string_token={sprintf("@$string%d",{length(string_list)}),pos}
newargs&={{string_token,{}},tree[ptr][TREE_BRANCHES][i]}
end for
tree[ptr][TREE_BRANCHES]=newargs
end if
if length(tree[ptr][TREE_BRANCHES]) then
tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES])
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements
function normalize_flow_control(sequence tree,sequence vars,sequence parent)
integer ptr
sequence s
srcpos pos
srcpos argpos
integer argkind
integer var_at
ptr=1
while ptr<=length(tree) do
s=tree[ptr][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr][TREE_TRUNK][CMD_POS]
if compare("if",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf(
COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"and"&COLRED&" and "&COLYEL&"or"&COLRED&" for complex conditions"
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
if ptr1 then
src_error(sprintf(
COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"and"&COLRED&" and "&COLYEL&"or"&COLRED&" for complex conditions"
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
if ptr4 then
src_error(sprintf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(tree[ptr][TREE_BRANCHES])}),tree[ptr][TREE_BRANCHES][5][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=3 then
--append default step value
tree[ptr][TREE_BRANCHES]=append(tree[ptr][TREE_BRANCHES],{{"1",tree[ptr][TREE_TRUNK][CMD_POS]},{}})
end if
argkind=what_kind(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK],vars,true)
if argkind=KIND_LOCAL then
--translate into a numeric reference to a variable
used_locals=append(used_locals,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT])
var_at=find(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],column(vars,CMD_TEXT))
tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at})
elsif argkind=KIND_GLOBAL then
--warn, then translate into a numeric reference to a variable
src_warn(sprintf(
"Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop"
,{tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]}
),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS])
var_at=find(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME])
tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]})
else
--only variables allowed as the first argument of a "for"
src_error(
sprintf("first argument of "&COLYEL&"for"&COLRED&" statement must be a variable, not %s "&COLYEL&"%s"&COLRED,{
KIND_LONGNAMES[argkind]
,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]
})
,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS]
)
end if
if ptr1 then
src_error(sprintf(
COLYEL&s&COLRED&" statement has %d arguments. It should have only one."
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit script"&COLRED,{}),pos)
end if
elsif compare("break",s)=0 or compare("continue",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf(
COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one."
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
--append default value
tree[ptr][TREE_BRANCHES]={{{"1",tree[ptr][TREE_TRUNK][CMD_POS]},{}}}
end if
elsif compare("exitscript",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>0 then
src_error(sprintf(
COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit returning"&COLRED
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS])
end if
elsif compare("switch",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf(
COLYEL&s&COLRED&" statement has %d expressions. It should have only one."
,{length(tree[ptr][TREE_BRANCHES])}
),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS])
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf(
COLYEL&s&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED
,{length(tree[ptr][TREE_BRANCHES])}
),pos)
end if
if ptr argnum then
--add defaults if not enough args are present
if kind=KIND_MATH then
--special processing for math
if list[at][PAIR_NUM]<16 or list[at][PAIR_NUM]>=19 then
--math shouldnt have defaults
src_error(sprintf(
"math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d"
,{tree[TREE_TRUNK][CMD_TEXT],argnum,maxargs}
),tree[TREE_TRUNK][CMD_POS])
else
--variable assignment commands can have defaults
if argnum = 0 then
--no defaults for first argument of variable function
src_error(sprintf(
"variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1"
,{tree[TREE_TRUNK][CMD_TEXT],argnum}
),tree[TREE_TRUNK][CMD_POS])
elsif argnum = 1 then
--make defaults for second arg of variable function
if list[at][PAIR_NUM]=16 then
--setvariable
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{"0",tree[TREE_TRUNK][CMD_POS]}
,{}
})
else
--increment and decrement
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{"1",tree[TREE_TRUNK][CMD_POS]}
,{}
})
end if
end if
end if
else
--normal processing for script and function
for i=argnum+1 to maxargs do
if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then
src_error(sprintf(
"%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d"
,{KIND_LONGNAMES[kind],tree[TREE_TRUNK][CMD_TEXT],i}
),tree[TREE_TRUNK][CMD_POS])
end if
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{sprintf("%d",{list[at][FUNC_ARGS][i]}),tree[TREE_TRUNK][CMD_POS]}
,{}
})
end for
end if
end if
--this is as good a time as any to make sure that var manipulation functions point to real variables
if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<19 then
argkind=what_kind(tree[TREE_BRANCHES][1][TREE_TRUNK],vars,false)
if argkind=KIND_LOCAL then
--its local. translate it to a numeric reference
var_at=find(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],column(vars,CMD_TEXT))
tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at})
elsif argkind=KIND_GLOBAL then
--its global. translate it to a numeric reference
var_at=find(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME])
tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]})
else
--its not a variable. bad!
src_error(sprintf(
"first argument of variable manipulation function "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED
,{tree[TREE_TRUNK][CMD_TEXT],KIND_LONGNAMES[argkind],tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]}
),tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_POS])
end if
end if
return(tree[TREE_BRANCHES])
end function
---------------------------------------------------------------------------
function normalize_arguments(sequence tree,sequence vars)
integer kind
--unlike the flow normalization and operator translation we do not insert/delete elements from the current level, so we can use a "for" safely, and dont need a "while"
for i=1 to length(tree) do
kind=what_kind(tree[i][TREE_TRUNK],vars,false)
if kind=KIND_SCRIPT then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,script_list,vars)
elsif kind=KIND_FUNCTION then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,function_list,vars)
elsif kind=KIND_MATH then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,math_list,vars)
end if --number, flow, global, local, parens need no argchecking
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order
function output_word(integer n)
integer b1,b2
b1=and_bits(n,#FF)
b2=floor(and_bits(n,#FFFF)/256)
return({b1,b2})
end function
---------------------------------------------------------------------------
function convert_to_bytes(sequence s)
sequence result
result={}
for i=1 to length(s) do
result&=int_to_bytes(s[i])
end for
return(result)
end function
---------------------------------------------------------------------------
--add a string to the table for this script, return its offset
function table_string(integer id)
integer len
integer offset
offset=length(string_table)/4
len=length(string_list[id])
string_table&=int_to_bytes(len)
string_table&=string_list[id]
if remainder(len,4) then
string_table&=repeat(0,4-remainder(len,4))
end if
return(offset)
end function
---------------------------------------------------------------------------
--return value is a sequence when new command data was appended, and an integer when an offset is returned
function binary_compile_recurse(sequence tree,sequence vars,sequence done_code)
sequence result
integer kind,id
integer at
sequence s
integer offset
sequence value_temp, str_temp
object sub_result
sequence done_code_plus_result
result={}
kind=what_kind(tree[TREE_TRUNK],vars,false)
s=tree[TREE_TRUNK][CMD_TEXT]
if kind=KIND_NUMBER then
value_temp=value(s)
result&=kind
result&=value_temp[2]
elsif kind=KIND_REFERENCE then
str_temp = s[2..length(s)]
--is it a global variable?
at=find(str_temp,global_list[PAIR_NAME])
if at then
--yes, it is a global, compile to global ID
result&=KIND_NUMBER
result&=global_list[PAIR_NUM][at]
else
--is it a script?
at=find(str_temp,column(script_list,PAIR_NAME))
if at then
--yes, it is a script. Compile to a script ID
result&=KIND_NUMBER
result&=script_list[at][PAIR_NUM]
else
--is it a string literal?
if length(str_temp)>7 and equal("$string",str_temp[1..7]) then
value_temp=value(str_temp[8..length(str_temp)])
result&=KIND_NUMBER
result&=table_string(value_temp[2])
else
src_error(sprintf("reference "&COLYEL&"@%s"&COLRED&" could not be resolved: must be the name of a global variable or a script",{str_temp}),tree[TREE_TRUNK][CMD_POS])
end if
end if
end if
elsif kind=KIND_GLOBAL then
at=find(s,global_list[PAIR_NAME])
result&=kind
result&=global_list[PAIR_NUM][at]
elsif kind=KIND_LOCAL then
at=find(s,column(vars,CMD_TEXT))
result&=kind
result&=at-1
elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then
if kind=KIND_SCRIPT then
at=find(s,column(script_list,PAIR_NAME))
id=script_list[at][PAIR_NUM]
elsif kind=KIND_FUNCTION then
at=find(s,column(function_list,PAIR_NAME))
id=function_list[at][PAIR_NUM]
elsif kind=KIND_FLOW then
at=find(s,column(flow_list,PAIR_NAME))
id=flow_list[at][PAIR_NUM]
elsif kind=KIND_MATH then
at=find(s,column(math_list,PAIR_NAME))
id=math_list[at][PAIR_NUM]
end if
result&=kind
result&=id
result&=length(tree[TREE_BRANCHES])
for i=1 to length(tree[TREE_BRANCHES]) do
--add placeholders for each argoffset
result&=1234.5678 --this nonsense value will never be matched when looking for duplicate data
end for
for i=1 to length(tree[TREE_BRANCHES]) do
--actually evaluate each argument and set the real offsets
done_code_plus_result=done_code&result --prefabricating this is faster, since we use it twice
offset=length(done_code_plus_result) --offset is in double words
sub_result=binary_compile_recurse(tree[TREE_BRANCHES][i],vars,done_code_plus_result)
if sequence(sub_result) then
--if new data was added, append it
result&=sub_result
else
--if a matching reference was available use it
offset=sub_result
end if
result[3+i]=offset
end for
else
src_error(sprintf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),tree[TREE_TRUNK][CMD_POS])
end if
if not fast_mode then
at=match(result,done_code)
if at>0 then
--found existing data exactly like this command, so just return a reference to it
return(at-1)
end if
end if
--return the data for this command to be appended
return(result)
end function
---------------------------------------------------------------------------
function binary_compile(integer id,sequence tree,sequence vars)
sequence result
sequence compiled_data
integer at
string_table={}
--binary data is in a mix of 16-bit and 32-bit signed words.
--the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit.
--output header
--the first word is the zero-rooted byte-offset of the first executable code byte
result=output_word(CODE_START_BYTE_OFFSET)
--the second word is the number of local variables
result&=output_word(length(vars))
--the third word is the number of arguments the script takes (also in SCRIPTS.TXT)
at=find(id,column(script_list,PAIR_NUM))
result&=output_word(length(script_list[at][FUNC_ARGS]))
--the fourth word is the format of the command data (presently used to indicate 32-bit encoding)
result&=output_word(SCRIPT_FORMAT_VERSION)
--the fifth&sixth words are a 32bit pointer to the string literal table (in bytes), we don't know it yet
result&=output_word(0)
result&=output_word(0)
--what follows is command data in the format [kindID,Value,argcount,argpointerlist]
--numbers and variables have no argcount or argpointerlist
--an argpointer is the zero-rooted word-offset of the argument relative
--to the start of the executable commands. I realise that this format is
--unnecessarily complicated. I had hoped to get benefits of being able to
--store frequently reused commands only once and then just point to them,
--but in actual practice, it isnt worth the trouble, since the only
--commands that tend to be redundant are the really short ones.
--the first command is always a "do". there can be only one top-level command
if length(tree)!=1 then
simple_error(sprintf("compiler bug! script tree has %s root nodes",{length(tree)}))
end if
reenter_timing_zone("binary_compile_recurse")
compiled_data=binary_compile_recurse(tree[1],vars,"")
exit_timing_zone()
result&=convert_to_bytes(compiled_data)
--append with a table of string literals and give its offset
if length(string_table)>0 then
result[9..12]=int_to_bytes(length(result))
result&=string_table
end if
return(result)
end function
---------------------------------------------------------------------------
--floaty brackets are un-needed after the operators have been translated.
function collapse_floaty_brackets(sequence tree)
integer i
sequence graft
i=1
while i<=length(tree) do
if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then
--found a floaty-bracket
graft=tree[i][TREE_BRANCHES]
tree=delete_element(tree,i)
tree=insert_sequence(tree,graft,i)
else
if length(tree[i][TREE_BRANCHES]) then
tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES])
end if
i+=1
end if
end while
return(tree)
end function
---------------------------------------------------------------------------
function sanity_check(sequence tree,sequence vars,sequence parent)
sequence s
sequence kind_and_id
integer kind
atom id
for i=1 to length(tree) do
s=tree[i][TREE_TRUNK][CMD_TEXT]
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if (compare("if",parent)=0) and i=1 then
if kind=KIND_NUMBER then
if id then
src_warn(sprintf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),tree[i][TREE_TRUNK][CMD_POS])
else
src_warn("Condition is always false",tree[i][TREE_TRUNK][CMD_POS])
end if
elsif kind=KIND_FLOW then
src_warn(sprintf("Should not use flow control command "&COLYEL&"%s"&COLRED&" as condition for if",{s}),tree[i][TREE_TRUNK][CMD_POS])
end if
elsif compare("do",parent)=0 or compare("then",parent)=0 or compare("else",parent)=0 then
if kind=KIND_NUMBER then
src_warn(sprintf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here."
,{id}),tree[i][TREE_TRUNK][CMD_POS])
elsif kind=KIND_GLOBAL then
src_warn(sprintf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),tree[i][TREE_TRUNK][CMD_POS])
elsif kind=KIND_LOCAL then
src_warn(sprintf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{vars[id][CMD_TEXT]}),tree[i][TREE_TRUNK][CMD_POS])
elsif kind=KIND_MATH and (id<=15 or id>=19) then
src_warn(sprintf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded"
,{s}),tree[i][TREE_TRUNK][CMD_POS])
end if
end if
if kind=KIND_GLOBAL then
if not find(s,used_globals) then
used_globals=append(used_globals,s)
end if
elsif kind=KIND_LOCAL then
if not find(s,used_locals) then
used_locals=append(used_locals,s)
end if
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT])
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--simulate a result overflowing a signed 32 bit register
function overflow_int32(atom val)
--maybe check whether val is so large that this is could be inaccurate?
return(and_bits(remainder(val,#80000000),#FFFFFFFF))
end function
---------------------------------------------------------------------------
function optimized_arg(sequence tree,sequence vars)
sequence kind_and_id
integer kind
atom id
object arg1,arg2
kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_NUMBER then
return(id)
elsif kind=KIND_MATH and (id<=15 or id>=19) then
arg1=optimized_arg(tree[TREE_BRANCHES][1],vars)
if length(tree[TREE_BRANCHES])>=2 then
arg2=optimized_arg(tree[TREE_BRANCHES][2],vars)
else
arg2=0
end if
if atom(arg1) and atom(arg2) then
if id=0 then
--random
if arg1=arg2 then
return(arg1)
end if
elsif id=1 then
--exponent
if arg2>0 then
--if too large, Euphoria will signal an overflow, but I'm also worried
--about overflow_int32 giving inaccurate results
if log(abs(arg1)+1)+log(abs(arg2)+1)arg2)
elsif id=14 then
--lessthanorequalto
return(arg1<=arg2)
elsif id=15 then
--greaterthanorequalto
return(arg1>=arg2)
elsif id=19 then
--not
return(arg1=0)
elsif id=22 then
--logxor
return((arg1=0)!=(arg2=0))
elsif id=23 then
--abs
return(abs(arg1))
elsif id=24 then
--sign
return((arg1>0)-(arg1<0))
elsif id=25 then
--sqrt
if arg1<0 then
src_error(sprintf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS])
end if
return(floor(sqrt(arg1)+0.5))
end if
end if
--have to allow for side effects of shortcut evaluating expressions by only checking first arg
if atom(arg1) then
if id=20 then
--logand
if arg1=0 then
return 0
end if
elsif id=21 then
--logor
if arg1!=0 then
return 1
end if
end if
end if
end if
return({}) --return nonatom
end function
---------------------------------------------------------------------------
--goes through a script simplifying expressions that always have the same value
function optimize_script(sequence tree,sequence vars)
object arg
for i=1 to length(tree) do
arg=optimized_arg(tree[i],vars)
if atom(arg) then
tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg)
tree[i][TREE_BRANCHES]={}
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
procedure warn_unused_locals(sequence vars)
if find('u',optlist) then
for i=1 to length(vars) do
if not find(vars[i][CMD_TEXT],used_locals) then
src_warn(sprintf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS])
end if
end for
end if
end procedure
---------------------------------------------------------------------------
procedure warn_unused_globals()
integer at
if find('u',optlist) then
for i=1 to length(global_list[PAIR_NUM]) do
at=find(global_list[PAIR_NAME][i],used_globals)
if not at then
src_warn(sprintf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],global_list[PAIR_NUM][i]}),global_list[GLB_POS][i])
end if
end for
end if
end procedure
---------------------------------------------------------------------------
procedure compile_a_script(integer id,sequence trigger_data,sequence name_data,sequence arg_data,sequence script_data)
sequence script_tree
sequence local_vars
sequence binary
integer trigger
trigger=trigger_list[find(trigger_data[CMD_TEXT],column(trigger_list,PAIR_NAME))][PAIR_NUM]
current_script=name_data[CMD_TEXT]
local_vars=arg_data --start with argument names (so we can check for conflicts)
reenter_timing_zone("gather_local_vars")
local_vars=gather_local_vars(local_vars,script_data)
exit_timing_zone()
used_locals={}
reenter_timing_zone("compile_commands")
script_tree=compile_commands(script_data,local_vars)
exit_timing_zone()
reenter_timing_zone("convert_operators")
script_tree=convert_operators(script_tree)
exit_timing_zone()
reenter_timing_zone("expand_macros")
script_tree=expand_macros(script_tree)
exit_timing_zone()
reenter_timing_zone("normalize_flow_control")
script_tree=normalize_flow_control(script_tree,local_vars,"")
exit_timing_zone()
reenter_timing_zone("normalize_arguments")
script_tree=normalize_arguments(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("collapse_floaty_brackets")
script_tree=collapse_floaty_brackets(script_tree)
exit_timing_zone()
if not fast_mode then
reenter_timing_zone("optimize_script")
script_tree=optimize_script(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("sanity_check")
script_tree=sanity_check(script_tree,local_vars,"")
exit_timing_zone()
warn_unused_locals(local_vars)
end if
reenter_timing_zone("binary_compile")
binary=binary_compile(id,script_tree,local_vars)
exit_timing_zone()
all_scripts=append(all_scripts,{
id --id
,trigger --trigger type
,name_data[CMD_TEXT] --name
,name_data[CMD_POS] --source line
,script_tree
,local_vars
,binary --compiled data to go into the HSZ lumps
})
current_script=""
end procedure
---------------------------------------------------------------------------
--Parse argument declarations in a script argument list
function process_arglist(sequence args)
sequence arglist
sequence defaults
integer i
integer using_defaults
arglist={}
defaults={}
using_defaults=false
i=1
while i<=length(args) do
if compare("=",args[i][CMD_TEXT])=0 then
src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS])
end if
check_undefined_string(args[i],"argument name")
if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then
src_error(sprintf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
if ilength(args) then
src_error(sprintf("Syntax error: expected default value for script argument "&COLYEL&"%s"&COLRED,{args[i][CMD_TEXT]}),args[i+1][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults&=try_string_to_number({enforce_constants(args[i+2][CMD_TEXT]),args[i+2][CMD_POS]})
using_defaults=true
i+=3
else
if using_defaults=true then
--any argument with a default must be followed with arguments with defaults
src_error(sprintf("Default value missing for argument "&COLYEL&"%s"&COLRED&": the previous argument had a default value, so all following ones must too",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults=append(defaults,NO_DEFAULT)
i+=1
end if
end while
return({arglist,defaults})
end function
---------------------------------------------------------------------------
--Check that every declared script has been defined, with the same number of arguments,
--and each script appears exactly once. Basically, compare script_list and script_cmd
procedure check_script_declarations()
integer at
sequence args_and_defaults
sequence scripts_found
sequence src_position
scripts_found=repeat(false,length(script_list))
for i=1 to length(script_cmd) do
at=find(script_cmd[i][2][CMD_TEXT],column(script_list,PAIR_NAME))
current_script=script_cmd[i][2][CMD_TEXT]
args_and_defaults=process_arglist(script_cmd[i][3])
if not at then
--define script optional, add to script list
script_list=create_function(script_list,-1,script_cmd[i][2][CMD_TEXT],args_and_defaults[2],RESERVE_SCRIPT,script_cmd[i][2][CMD_POS])
scripts_found&={true}
else
if scripts_found[at]=true then
src_position=decode_srcpos(script_list[at][FUNC_POS])
src_error(
sprintf(
"Script "&COLYEL&"%s"&COLRED&" appears more than once! It was previously defined on line %d of "&COLPNK&"%s"&COLRED
,{script_cmd[i][2][CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]}
)
,script_cmd[i][2][CMD_POS]
)
end if
scripts_found[at]=true
--The srcpos stored for this script will be from definescript. Repoint it at the actual script
--(doesn't really matter, all_scripts would have ended up with that srcpos anyway)
script_list[at][FUNC_POS]=script_cmd[i][2][CMD_POS]
if length(script_cmd[i][3]) != length(script_list[at][FUNC_ARGS]) then
src_error(
sprintf(
"Script "&COLYEL&"%s"&COLRED&" has %d arguments named, but has %d arguments in its declaration"
,{script_cmd[i][2][CMD_TEXT],length(script_cmd[i][3]),length(script_list[at][FUNC_ARGS])}
)
,script_cmd[i][2][CMD_POS]
)
end if
for j=1 to length(script_list[at][FUNC_ARGS]) do
if not equal(args_and_defaults[2][j],NO_DEFAULT) and not equal(args_and_defaults[2][j],script_list[at][FUNC_ARGS][j]) then
src_warn(
sprintf(
"Default value for argument "&COLYEL&"%s"&COLRED&" does not match default given in script definition, %d"
,{script_cmd[i][3][j][CMD_TEXT],script_list[at][FUNC_ARGS][j]}
)
,script_cmd[i][2][CMD_POS]
)
end if
end for
end if
script_cmd[i][3]=args_and_defaults[1]
end for
current_script=""
for i=1 to length(scripts_found) do
if scripts_found[i]=false then
src_error(
sprintf("Script "&COLYEL&"%s"&COLRED&" was declared but is missing",{script_list[i][PAIR_NAME]})
,script_list[i][FUNC_POS]
)
end if
end for
end procedure
---------------------------------------------------------------------------
procedure compile_each_script()
sequence count
integer at
enter_timing_zone("Compiling scripts")
color_print("compiling scripts",{})
count=repeat(0,length(file_list))
enter_timing_zone("check_script_declarations")
check_script_declarations()
exit_timing_zone()
for i=1 to length(script_cmd) do
at=find(script_cmd[i][2][CMD_TEXT],column(script_list,PAIR_NAME))
-- color_print("%s\n",{script_cmd[i][2][CMD_TEXT]})
reenter_timing_zone("compile_a_script")
cur_script_num=i
compile_a_script(
script_list[at][PAIR_NUM] --ID
,script_cmd[i][1] --trigger
,script_cmd[i][2] --Name
,script_cmd[i][3] --argnames
,script_cmd[i][4] --data
)
cur_script_num=0
exit_timing_zone()
count[srcpos_file_number(script_cmd[i][2][CMD_POS])]+=1
color_print(".",{})
end for
color_print("\n",{})
if not fast_mode then
warn_unused_globals()
end if
for i=1 to length(count) do
if count[i] then
wrap_print("compiled %d scripts from "&COLBWHI&"%s"&COLWHI&"\n",{count[i],file_list[i][FILE_NAME]})
end if
end for
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
function generate_scripts_dot_txt()
sequence result
integer print_scripts
result=""
print_scripts = 0
if find('a',optlist) then
print_scripts = 2
elsif find('s',optlist) then
print_scripts = 1
end if
for i=1 to length(script_list) do
result&=sprintf("%s\r\n%d\r\n%d\r\n",{script_list[i][PAIR_NAME],script_list[i][PAIR_NUM],length(script_list[i][FUNC_ARGS])})
for j=1 to length(script_list[i][FUNC_ARGS]) do
if equal(script_list[i][FUNC_ARGS][j],NO_DEFAULT) then
result&="0\r\n"
else
result&=sprintf("%d\r\n",{script_list[i][FUNC_ARGS][j]})
end if
end for
if print_scripts and ((script_list[i][PAIR_NUM]2048 then
simple_warn(sprintf("Maximum function id was %d, but will not export function names above id 2047",{records-1}))
records=2048
end if
--header size in bytes (offset to location table)
result=output_word(6)
--file format version
result&=output_word(0)
--number of records in location table
result&=output_word(records)
offset=length(result)+records*2
i=1
for id=0 to records-1 do
if function_list[i][PAIR_NUM]=id then
result&=output_word(offset)
offset+=4+length(function_list[i][PAIR_NAME])
i+=1
else
result&=output_word(0)
end if
end for
i=1
while i<=length(function_list) and function_list[i][PAIR_NUM]0 then
temp=without_extension(fname) --you can't slice a function result directly! :(
fname=sprintf("%s~%d.%s",{temp[1..$-1],suffix,extension_only(fname)})
end if
--color_print("writing %d %s\n",{i,fname})
if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then
simple_error("unable to write a script source lump")
end if
end for
end procedure
---------------------------------------------------------------------------
procedure write_output_file()
integer fh
object lh
enter_timing_zone("Writing output file")
if length(all_scripts) then
fh=open(dest_file,"wb")
if fh!=-1 then
wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
--write header and version
if write_lump(fh,"HS","HamsterSpeak"&output_word(COMPILER_VERSION)&COMPILER_SUB_VERSION)=false then
simple_error("unable to write header")
end if
--write script index (old file)
if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then
simple_error("unable to write script index")
end if
--write script index (new file)
if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then
simple_error("unable to write binary script index")
end if
--write script commands list
if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then
simple_error("unable to write commands listing")
end if
--write each script
for i=1 to length(all_scripts) do
if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][1]}),all_scripts[i][7])=false then
simple_error(sprintf("unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][3]}))
end if
end for
if not find('n',optlist) then --no debug info
--write copy of scripts
wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
lh=begin_lump(fh,"source.lumped")
if equal(lh,false) then
simple_error("unable to write scripts source file")
end if
write_script_files(fh)
if end_lump(lh)=false then
simple_error("unable to finish write of scripts source file")
end if
end if
close(fh)
else
simple_error(sprintf("attempt to open"&COLYEL&"%s"&COLRED&" failed",{dest_file}))
end if
else
color_print("no scripts to output\n",{})
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
init()
load_and_lex()
show_source_info()
primary_parse_pass()
parse_top_level()
compile_each_script()
dump_debug_report()
write_output_file()
run_time=time()-start_time
color_print("done (%g seconds)\n",{run_time})
if find('t',optlist) then
print_timing_data()
end if
opt_wait_for_key()
if was_warnings = true then
abort(2)
end if