-- HamsterSpeak Compiler v.2E
--(C) Copyright 2002 James Paige and Hamster Republic Productions
-- Please read LICENSE.txt for GPL License details and disclaimer of liability
-- This is a compiler for HamsterSpeak scripts used for plotscripting in
-- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com
-- This code is written in Euphoria 2.2. You can get the public-domain
-- version from http://RapidEuphoria.com . I also highly reccomend
-- David Cuny's EE editor which you can download from the same site.
---------------------------------------------------------------------------
--Changelog
--2H 2006-03-29 Display better help and wait for keypress when run
-- by double-clicking the icon. Added -k command line
-- option to skip waiting for keypress
--2G 2005-10-03 Additional operators $+ and $= by TeeEmCee
-- Mention GPL in help text
--2F 2005-07-24 Strings implemented by TeeEmCee:
-- $id="..." -> setstring
-- $id+"..." -> appendstring
--2E 2005-02-15 Changed license to GPL
-- Added += and -= operators thanks to a patch from
-- The Mad Cacti and Fyrewulff
--2D 2002-08-03 Only a small change, strip out \r from lines of
-- the script as we read them to avoid errors related
-- to busted newlines
--2C 2002-03-05 Fixed some bugs that could cause crashes when
-- non-printable characters exist in the input file.
-- (naturally the script will not compile right, but
-- at least it will not crash)
--2B 2001-06-06 Added := as a commaless separater so it can be
-- defined as an operator
--2A 2001-05-04 Fixed -w command line option when used with -z
--2 First Release
---------------------------------------------------------------------------
without warning --to avoid annoying warnings
without type_check --for a very small speed boost
--with profile_time --time profiling
include hsspiffy.e --various routines, sequence manipulation - James Paige
include graphics.e --standard library, needed for color output
---------------------------------------------------------------------------
--constants--
constant false=0
constant true=1
constant COMPILER_VERSION=2
constant COMPILER_SUB_VERSION='H'
constant COPYRIGHT_DATE="2002"
--these constants are color-flags.
constant COLYEL=239 + YELLOW
constant COLRED=239 + RED
constant COLPNK=239 + BRIGHT_RED
constant COLWHI=239 + WHITE
constant COLBWHI=239 + BRIGHT_WHITE
constant SRC_TEXT=1
constant SRC_LINE=2
constant SRC_FILE=3
constant CMD_TEXT=1
constant CMD_LINE=2
constant RESERVE_CORE=1
constant RESERVE_UNIMPLEMENTED=2
constant RESERVE_FLOW=3
constant RESERVE_FUNCTION=4
constant RESERVE_SCRIPT=5
constant RESERVE_GLOBAL=6
constant RESERVE_BEGIN=7
constant RESERVE_END=8
constant RESERVE_OPERATOR=9
constant RESERVE_BUILTIN=10
constant RESERVE_NAMES={"declaration"
,"unimplimented keyword"
,"flow control"
,"hardcoded function"
,"user script"
,"global variable"
,"bracket "&COLYEL&"("&COLRED
,"bracket "&COLYEL&")"&COLRED
,"operator"
,"builtin command"
}
constant PAIR_NUM=1
constant PAIR_NAME=2
constant OPER_TRUENAME=3
constant OPER_LINE=4
constant FUNC_ARGS=3
constant FUNC_LINE=4
constant GLB_LINE=3
constant KIND_NUMBER=1
constant KIND_FLOW=2
constant KIND_GLOBAL=3
constant KIND_LOCAL=4
constant KIND_MATH=5
constant KIND_FUNCTION=6
constant KIND_SCRIPT=7
--constant KIND_VAR_REF=8
constant KIND_OPERATOR=9 --never appears in compiled script
constant KIND_PARENS=10 --never appears in compiled script
constant KIND_LONGNAMES={"number"
,"flow control statement"
,"global variable"
,"local variable"
,"built-in function"
,"hard-coded function"
,"script"
,"variable reference"
,"untranslated operator"
,"order-of-operations-enforcing parenthesis"
}
constant TREE_TRUNK=1
constant TREE_BRANCHES=2
constant CODE_START_BYTE_OFFSET=4
---------------------------------------------------------------------------
--globals-- --initializations--
sequence compiler_dir compiler_dir=""
sequence source_file source_file=""
sequence dest_file dest_file=""
sequence optlist optlist={}
sequence source source={}
sequence file_list file_list={}
integer total_lines total_lines=0
sequence cmd cmd={}
sequence constant_list constant_list=alpha_tree_create()
sequence operator_list operator_list={}
sequence function_list function_list={}
sequence global_list global_list={{},{},{}}
sequence script_list script_list={}
sequence script_cmd script_cmd={}
sequence reserved reserved=alpha_tree_create()
atom start_time start_time=time()
integer get_cmd_pointer get_cmd_pointer=0
integer autonumber_id autonumber_id=32767
sequence flow_list flow_list={
{0,"do"}
,{1,"begin"}
,{2,"end"}
,{3,"return"}
,{4,"if"}
,{5,"then"}
,{6,"else"}
,{7,"for"}
,{10,"while"}
}
sequence math_list math_list={
{0,"random",{0,1}}
,{1,"exponent",{0,2}}
,{2,"modulus",{0,1}}
,{3,"divide",{0,1}}
,{4,"multiply",{0,0}}
,{5,"subtract",{0,0}}
,{6,"add",{0,0}}
,{7,"xor",{0,0}}
,{8,"or",{0,0}}
,{9,"and",{0,0}}
,{10,"equal",{0,0}}
,{11,"notequal",{0,0}}
,{12,"lessthan",{0,0}}
,{13,"greaterthan",{0,0}}
,{14,"lessthanorequalto",{0,0}}
,{15,"greaterthanorequalto",{0,0}}
,{16,"setvariable",{0,0}}
,{17,"increment",{0,1}}
,{18,"decrement",{0,1}}
}
sequence all_scripts all_scripts={}
sequence current_script current_script=""
integer colors_enabled colors_enabled=true
integer error_file error_file=false
sequence used_globals used_globals={}
sequence used_locals used_locals={}
integer fast_mode fast_mode=false
integer end_anchor_kludge end_anchor_kludge=false
---------------------------------------------------------------------------
--time spent waiting for a user-keypress shouldnt count
function timeless_wait_key()
atom skip_time
integer key
skip_time=time()
key=wait_key()
start_time+=time()-skip_time
return(key)
end function
---------------------------------------------------------------------------
--prints a string with printf to stdout converting color codes
procedure color_print(sequence s,sequence printf_args)
sequence buffer
s=sprintf(s,printf_args)
buffer=""
for i=1 to length(s) do
if s[i]<=254 and s[i]>=239 then
puts(stdout,buffer)
buffer=""
if colors_enabled then
text_color(s[i]-239)
end if
else
buffer&=s[i]
end if
end for
if length(buffer) then
puts(stdout,buffer)
end if
end procedure
---------------------------------------------------------------------------
procedure opt_wait_for_key()
integer key
if not find('k',optlist) then
color_print("[Press Any Key]",{})
key=timeless_wait_key()
end if
end procedure
---------------------------------------------------------------------------
function html_char_convert(sequence s)
sequence buffer
sequence result
result=""
buffer=""
for i=1 to length(s) do
if s[i]=' ' and i>1 then
if s[i-1]=' ' then
buffer&=" "
else
buffer&=s[i]
end if
elsif s[i]='<' then
buffer&="<"
elsif s[i]='>' then
buffer&=">"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
function error_string_convert(sequence s)
sequence buffer
sequence result
result=""
buffer=""
for i=1 to length(s) do
if s[i]<=254 and s[i]>=239 then
if s[i]=COLYEL then
buffer&=""
elsif s[i]=COLRED then
buffer&=""
elsif s[i]=COLPNK then
buffer&=""
elsif s[i]=COLWHI then
buffer&=""
elsif s[i]=COLBWHI then
buffer&=""
end if
elsif s[i]='\n' then
buffer&="
\n"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
--prints a long string wrapped at 80 columns
procedure wrap_print(sequence s,sequence arguments)
sequence outstring
s=sprintf(s,arguments)
while length(s) do
outstring=before_wrap_point(s)
s=after_wrap_point(s)
color_print("%s\n",{outstring})
end while
end procedure
---------------------------------------------------------------------------
procedure error_file_print(sequence s)
integer fh
if error_file then
fh=open(compiler_dir&"hs_error.htm","a")
if fh!=failure then
puts(fh,error_string_convert(s)&"\n")
close(fh)
end if
end if
end procedure
---------------------------------------------------------------------------
--prints out warning message in red with word wrap
procedure simple_warn(sequence s)
sequence pos
if not find('w',optlist) then
--do not warn if -w is set
pos=get_position()
if pos[2]>1 then
printf(stdout,"\n",{})
end if
wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s})
error_file_print(sprintf("WARNING: %s",{html_char_convert(s)}))
end if
end procedure
---------------------------------------------------------------------------
--prints out an error message in red with word wrap, then aborts
procedure simple_error(sequence s)
sequence pos
pos=get_position()
if pos[2]>1 then
printf(stdout,"\n",{})
end if
wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s})
error_file_print(sprintf("ERROR: %s",{html_char_convert(s)}))
if end_anchor_kludge then
error_file_print("\n
\n")
end if
opt_wait_for_key()
abort(1)
end procedure
---------------------------------------------------------------------------
--prints out the copyright info, usage info, and command-line options
procedure check_arg_count(sequence args)
if length(args)=2 then
wrap_print("HamsterSpeak semicompiler v%d%s (C)%s James Paige, Hamster Republic Productions\n",{COMPILER_VERSION,COMPILER_SUB_VERSION,COPYRIGHT_DATE})
wrap_print("Please read LICENSE.txt for GPL License details and disclaimer of liability",{})
wrap_print(COLYEL&"%s [-cdefwy] source.hss [dest.hs]"&COLWHI&"\n\n",{hs_upper(file_only(args[2]))})
color_print(" -c colors will be disabled\n",{})
color_print(" -d dump debug report to hs_debug.txt\n",{})
color_print(" -f fast mode. disables some optimization\n",{})
color_print(" -k do not wait for a keypress when finished\n",{})
color_print(" -w suppress minor warnings\n",{})
color_print(" -y overwrite the destination file without asking\n",{})
color_print(" -z write error messages to hs_error.htm\n",{})
wrap_print("\nFor more info about Hamsterspeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{})
wrap_print("\nThis is a command-line program. You should either run it from the command-line (DOS prompt) or you should drag and drop your script file onto it.\n",{})
opt_wait_for_key()
abort(0)
end if
end procedure
---------------------------------------------------------------------------
--initializes global variables, and generaly gets things ready to roll
procedure init()
sequence args
integer index
integer key
integer fh
args=command_line()
compiler_dir=path_only(args[2])
check_arg_count(args)
optlist={}
index=3
while index<=length(args) do
if args[index][1]='-' then
optlist=optlist&hs_lower(args[index][2..length(args[index])])
args=delete_element(args,index)
else
index+=1
end if
end while
check_arg_count(args)
source_file=normalize_filename(args[3])
if length(args)>3 then
dest_file=normalize_filename(args[4])
else
dest_file=normalize_filename(alter_extension(source_file,"HS"))
end if
if find('f',optlist) then
fast_mode=true
color_print("using fast mode. some size optimization disabled\n",{})
end if
if find('c',optlist) then
colors_enabled=false
end if
--the semi-undocumented command line argument -z writes a file called
--hs_error.htm formatted for HssEd to read
if find('z',optlist) then
error_file=true
if file_exists(compiler_dir&"hs_error.htm") then
fh=open(compiler_dir&"hs_error.htm","w")
if fh!=failure then
puts(fh,"")
close(fh)
end if
end if
end if
wrap_print("Semicompiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file})
if file_exists(dest_file) then
if find('y',optlist) then
--found the -y command line arg, overwrite automatically
key='y'
else
--prompt the user to overwrite
wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file})
key=timeless_wait_key()
color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key})
end if
if hs_lower(key)='y' then
elsif hs_lower(key)='n' then
simple_error("output file overwrite canceled by user")
else
simple_error(sprintf(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)}))
end if
end if
--why the alpha-tree? because the reserved-word list can get HUGE.
--we want to be able to look up words in it quickly. A btree or some such
--thing would have been even better, but thats alot of trouble :)
reserved=alpha_tree_mass_insert(reserved,{
{"defineconstant",RESERVE_CORE}
,{"defineconstant",RESERVE_CORE}
,{"defineoperator",RESERVE_CORE}
,{"globalvariable",RESERVE_CORE}
,{"definefunction",RESERVE_CORE}
,{"definescript" ,RESERVE_CORE}
,{"script" ,RESERVE_CORE}
,{"do" ,RESERVE_FLOW}
,{"begin" ,RESERVE_BEGIN}
,{"end" ,RESERVE_END}
,{"return" ,RESERVE_FLOW}
,{"if" ,RESERVE_FLOW}
,{"then" ,RESERVE_FLOW}
,{"else" ,RESERVE_FLOW}
,{"for" ,RESERVE_FLOW}
,{"cfor" ,RESERVE_UNIMPLEMENTED}
,{"foreach" ,RESERVE_UNIMPLEMENTED}
,{"while" ,RESERVE_FLOW}
,{"break" ,RESERVE_UNIMPLEMENTED}
})--end mass_insert
for i=1 to length(math_list) do
reserved=alpha_tree_insert(reserved,math_list[i][PAIR_NAME],RESERVE_BUILTIN)
end for
end procedure
---------------------------------------------------------------------------
function strip_comments(sequence s)
integer at
at=find('#',s)
if at then
s=s[1..at-1]
end if
return s
end function
---------------------------------------------------------------------------
function seek_include(sequence s)
integer at
s=strip_comments(s)
at=match("include",hs_lower(s))
if at then
s=s[at+7..length(s)]
at=match(",",s)
if at then
s=s[at+1..length(s)]
return(trim_whitespace(s))
end if
end if
return("")
end function
---------------------------------------------------------------------------
procedure load_source(sequence filename,sequence reading_how)
integer fh
object line
integer index
integer filename_index
sequence include_name
fh=open(filename,"r")
if fh!=failure then
wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename})
file_list=append(file_list,filename)
filename_index=length(file_list)
index=1
line=gets(fh)
while sequence(line) do
line=exclude(line,"\n\r")
include_name=seek_include(line)
if length(include_name) then
--try current directory
if file_exists(include_name) then
load_source(include_name,"including")
else
--try source directory
if file_exists(path_only(source_file)&include_name) then
load_source(normalize_filename(path_only(source_file)&include_name),"including")
else
--try compiler_directory
if file_exists(compiler_dir&include_name) then
load_source(normalize_filename(compiler_dir&include_name),"including")
else
--give up
load_source(include_name,"including")
end if
end if
end if
else
--stores the original source line, the index in the file, and the index of the filename
source=append(source,{line,index,filename_index})
end if
index+=1
line=gets(fh)
end while
close(fh)
total_lines+=index
else
wrap_print("file "&COLBWHI&"%s"&COLWHI&" not found\n",{filename})
end if
end procedure
---------------------------------------------------------------------------
procedure show_source_info()
if total_lines then
wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)})
else
simple_error("no data to compile\n")
end if
end procedure
---------------------------------------------------------------------------
function convert_strings(sequence s)
integer start
integer stringstart
integer ptr
integer at
integer at2
integer switch
sequence output
sequence result
start=1
result=""
while startstart then
result=result&s[start..stringstart-1]
end if
start=stringstart
at=match("=\"",s[start..length(s)])
at2=match("+\"",s[start..length(s)])
if at=0 and at2=0 then
exit
end if
if at!=0 and atlength(s) then --did not find a closing " so not a valid string (previous matches presumably a coincidence)
result=result&s[stringstart..at+1]
start=at+2 --skip the $..=" and try again
exit
end if
if switch then
if s[ptr]='"' then
output=output&sprintf(",%d",{'"'})
elsif s[ptr]='\\' then
output=output&sprintf(",%d",{'\\'})
else
output=output&sprintf(",%d,%d",{'\\',s[ptr]}) --invalid sequence
end if
switch=false
else
if s[ptr]='"' then
result=result&output&")"
start=ptr+1
exit
elsif s[ptr]='\\' then
switch=true
else
output=output&sprintf(",%d",{s[ptr]})
end if
end if
ptr+=1
end while
end while
return(result&s[start..length(s)])
end function
---------------------------------------------------------------------------
--function smush_line(sequence s)
-- sequence sep
-- s=hs_lower(exclude(strip_comments(s)," \t\n"))
-- s=substring_replace(s,"(",",begin,")
-- s=substring_replace(s,")",",end,")
-- sep={"+","--","/","*","^","==","<>",">>","<<","<=",">=",":="}
-- for i=1 to length(sep) do
-- s=substring_replace(s,sep[i],","&sep[i]&",")
-- end for
-- return(s)
--end function
function smush_line(sequence s)
integer at
integer start
sequence sep
sequence masked
s=convert_strings(s)
s=hs_lower(exclude(strip_comments(s)," \t\n"))
s=substring_replace(s,"(",",begin,")
s=substring_replace(s,")",",end,")
masked=s
sep={"+=","-=","$+","$=","+","--","/","*","^^","^","==","<>",">>","<<","<=",">=",":=","&&","||"}
for i=1 to length(sep) do
at=match(sep[i],masked)
start=1
while at>=start do
s=s[1..at-1] & "," & sep[i] & "," & s[at+length(sep[i])..length(s)]
masked=masked[1..at-1] & repeat(0,length(sep[i])+2) & masked[at+length(sep[i])..length(masked)]
start=at+length(sep[i])+2
at=match(sep[i],masked)
end while
end for
return(s)
end function
---------------------------------------------------------------------------
procedure split_commands()
sequence line,broken
color_print("splitting commands\n",{})
for i=1 to length(source) do
line=smush_line(source[i][SRC_TEXT])
broken=explode(line,",")
for j=1 to length(broken) do
--text,origin
if length(broken[j]) then
cmd=append(cmd,{broken[j],i})
end if
end for
end for
end procedure
---------------------------------------------------------------------------
procedure src_warn(sequence s,integer line)
if not find('w',optlist) then
--do not warn if -w is set
error_file_print(sprintf("\n",{file_list[source[line][SRC_FILE]],source[line][SRC_LINE]}))
if length(current_script) then
simple_warn(
sprintf(
"in line %d of script "&COLYEL&"%s"&COLRED&" in "&COLPNK&"%s"&COLRED&"\n"&COLBWHI&"%s"&COLRED&"\n%s\n"
,{source[line][SRC_LINE],current_script,file_list[source[line][SRC_FILE]],source[line][SRC_TEXT],s}
)
)
else
simple_warn(sprintf("in line %d of "&COLPNK&"%s"&COLRED&"\n"&COLBWHI&"%s"&COLRED&"\n%s\n",{source[line][SRC_LINE],file_list[source[line][SRC_FILE]],source[line][SRC_TEXT],s}))
end if
error_file_print("\n
\n")
end if
end procedure
---------------------------------------------------------------------------
procedure src_error(sequence s,integer line)
error_file_print(sprintf("\n",{file_list[source[line][SRC_FILE]],source[line][SRC_LINE]}))
end_anchor_kludge=true
if length(current_script) then
simple_error(
sprintf(
"in line %d of script "&COLYEL&"%s"&COLRED&" in "&COLPNK&"%s"&COLRED&"\n"&COLBWHI&"%s"&COLRED&"\n%s\n"
,{source[line][SRC_LINE],current_script,file_list[source[line][SRC_FILE]],source[line][SRC_TEXT],s}
)
)
else
simple_error(sprintf("in line %d of "&COLPNK&"%s"&COLRED&"\n"&COLBWHI&"%s"&COLRED&"\n%s\n",{source[line][SRC_LINE],file_list[source[line][SRC_FILE]],source[line][SRC_TEXT],s}))
end if
end procedure
---------------------------------------------------------------------------
procedure check_for_reserved(sequence s,integer line,sequence expect)
if alpha_tree_seek(reserved,s) then
if compare("top-level declaration",expect)=0 then
src_error(
sprintf(
"Expected %s, but found %s "&COLYEL&"%s"&COLRED&". Perhaps there is an extra "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" earlier in the file"
,{expect,RESERVE_NAMES[alpha_tree_data(reserved,s,0)],s}
)
,line
)
elsif compare("user script name",expect)=0 then
if alpha_tree_data(reserved,s,0)=RESERVE_FUNCTION then
src_warn(sprintf("%s "&COLYEL&"%s"&COLRED&" is already reserved as a %s",{expect,s,RESERVE_NAMES[alpha_tree_data(reserved,s,0)]}),line)
else
src_error(sprintf("Expected %s, but found %s "&COLYEL&"%s"&COLRED,{expect,RESERVE_NAMES[alpha_tree_data(reserved,s,0)],s}),line)
end if
else
src_error(sprintf("Expected %s, but found %s "&COLYEL&"%s"&COLRED,{expect,RESERVE_NAMES[alpha_tree_data(reserved,s,0)],s}),line)
end if
end if
end procedure
---------------------------------------------------------------------------
function musnt_be_a_number(sequence s)
if length(exclude(s[CMD_TEXT],"-0123456789"))=0 and compare(s[CMD_TEXT],"--")!=0 then
src_error(sprintf("Expected a name, but found a number "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_LINE])
end if
return(s[CMD_TEXT])
end function
---------------------------------------------------------------------------
function try_undefined_constant(sequence s)
sequence const_data
check_for_reserved(s[CMD_TEXT],s[CMD_LINE],"constant name")
if alpha_tree_seek(constant_list,s[CMD_TEXT]) then
--constant is already defined
const_data=alpha_tree_data(constant_list,s[CMD_TEXT],0)
src_warn(sprintf("constant "&COLYEL&"%s"&COLRED&" will be ignored because it is already defined in line "&COLYEL&"%d"&COLRED&" of "&COLBWHI&"%s"&COLRED&" with the value "&COLYEL&"%d"&COLRED
,{
s[CMD_TEXT]
,source[const_data[2]][SRC_LINE]
,file_list[source[const_data[2]][SRC_FILE]]
,const_data[1]
}
),s[CMD_LINE])
end if
return musnt_be_a_number(s)
end function
---------------------------------------------------------------------------
function try_undefined_string(sequence s,sequence seeking)
check_for_reserved(s[CMD_TEXT],s[CMD_LINE],seeking)
return musnt_be_a_number(s)
end function
---------------------------------------------------------------------------
function force_16_bit(integer n,integer line)
if n>32767 then
src_warn(sprintf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),line)
n=32767
elsif n<-32768 then
src_warn(sprintf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),line)
n=-32768
end if
return(n)
end function
---------------------------------------------------------------------------
function try_string_to_number(sequence s)
integer result
result=floor(string_to_object(s[CMD_TEXT],0))
if not string_is_an_integer(s[CMD_TEXT]) then
src_error(sprintf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_LINE])
end if
result=force_16_bit(result,s[CMD_LINE])
return(result)
end function
---------------------------------------------------------------------------
function enforce_constants(sequence s)
--enforces both constants
object v
v=alpha_tree_data(constant_list,s,{})
if length(v) then
v=v[1]--first element is number (second is line)
if integer(v) then
return(sprintf("%d",{v}))
end if
end if
return(s)
end function
---------------------------------------------------------------------------
function get_cmd()
sequence result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_LINE])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=enforce_constants(result[CMD_TEXT])
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_no_constants()
sequence result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_LINE])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=result[CMD_TEXT]
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_block(integer convert_constants)
sequence this
sequence result
result={}
this=get_cmd()
if compare("begin",this[CMD_TEXT])!=0 then
src_error(sprintf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_LINE])
end if
while true do
if convert_constants then
this=get_cmd()
else
this=get_cmd_no_constants()
end if
if compare("begin",this[CMD_TEXT])=0 then
src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_LINE])
elsif compare("end",this[CMD_TEXT])=0 then
exit--break out of the while
else
result=append(result,this)
end if
end while
return(result)
end function
---------------------------------------------------------------------------
procedure parse_constant_block(sequence block)
integer num
sequence name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("expected name but constant block ended",block[i][CMD_LINE])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_LINE]})
-- name=try_undefined_constant({enforce_constants(block[i+1][CMD_TEXT]),block[i+1][CMD_LINE]})
name=try_undefined_constant({block[i+1][CMD_TEXT],block[i+1][CMD_LINE]})
constant_list=alpha_tree_insert(constant_list,name,{num,block[i+1][CMD_LINE]})
end for
end procedure
---------------------------------------------------------------------------
procedure create_global(integer id,sequence name,integer line)
integer at
at=find(id,global_list[PAIR_NUM])
if at then
src_error(sprintf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),line)
else
if id>=0 then
global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id)
global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name)
global_list[GLB_LINE]=append(global_list[GLB_LINE],line)
reserved=alpha_tree_insert(reserved,name,RESERVE_GLOBAL)
else
src_error(sprintf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted",{id}),line)
end if
end if
end procedure
---------------------------------------------------------------------------
procedure parse_global_block(sequence block)
integer num
sequence name
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("expected name but globalvariable block ended",block[i][CMD_LINE])
end if
num=try_string_to_number(block[i])
name=try_undefined_string(block[i+1],"global variable name")
create_global(num,name,block[i][CMD_LINE])
end for
end procedure
---------------------------------------------------------------------------
procedure parse_operator_block(sequence block)
integer num
sequence name,true
for i=1 to length(block) by 3 do
if i+2>length(block) then
src_error("expected name but defineoperator block ended",block[i][CMD_LINE])
end if
num=try_string_to_number(block[i])
name=musnt_be_a_number(block[i+1])
true=musnt_be_a_number(block[i+2])
operator_list=append(operator_list,{num,name,true,block[i+2][CMD_LINE]})
reserved=alpha_tree_insert(reserved,name,RESERVE_OPERATOR)
end for
end procedure
---------------------------------------------------------------------------
function create_function(sequence list,integer id,sequence name,sequence arglist,integer func_type,integer line)
integer at
at=find(id,column(list,PAIR_NUM))
if at then
src_error(sprintf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{RESERVE_NAMES[func_type],id,list[at][PAIR_NAME]}),line)
else
if id=0 and func_type=RESERVE_SCRIPT then
src_error(sprintf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),line)
elsif id<0 then
id=autonumber_id
autonumber_id-=1
end if
list=append(list,{id,name,arglist,line})
reserved=alpha_tree_insert(reserved,name,func_type)
end if
return(list)
end function
---------------------------------------------------------------------------
function parse_define_block(sequence block,sequence list,integer func_type)
integer num
sequence name
integer args
sequence arglist
integer name_line
integer i
i=1
while i<=length(block) do
num=try_string_to_number(block[i])
if i+1>length(block) then
src_error(sprintf("expected %s name but define block ended",{RESERVE_NAMES[func_type]}),block[i][CMD_LINE])
else
i+=1
name=try_undefined_string(block[i],RESERVE_NAMES[func_type]&" name")
name_line=block[i][CMD_LINE]
if i+1>length(block) then
src_error("expected argument count but define block ended",block[i][CMD_LINE])
else
i+=1
args=try_string_to_number(block[i])
arglist={}
for j=1 to args do
if i+1>length(block) then
src_error("expected argument default but define block ended",block[i][CMD_LINE])
else
i+=1
arglist=append(arglist,try_string_to_number(block[i]))
end if
end for
list=create_function(list,num,name,arglist,func_type,name_line)
i+=1
end if
end if
end while
return(list)
end function
---------------------------------------------------------------------------
procedure parse_for_constants()
sequence this
color_print("parsing constants\n",{})
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if compare("defineconstant",this[CMD_TEXT])=0 then
parse_constant_block(get_cmd_block(false))
end if
end while
end procedure
---------------------------------------------------------------------------
procedure parse_script()
sequence name
sequence arglist
sequence s
sequence this
integer depth
name=get_cmd()
current_script=name[CMD_TEXT]
arglist={}
while true do
if get_cmd_pointer>length(cmd) then
src_error(sprintf("script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]}),name[CMD_LINE])
end if
this=get_cmd()
if compare("begin",this[CMD_TEXT])=0 then
exit--break the while
end if
arglist=append(arglist,{try_undefined_string(this,"argument name"),this[CMD_LINE]})
end while
--every script is nested inside a big fat do() block
s={{"do",this[CMD_LINE]}}
depth=0
while true do
s=append(s,this)
if compare("end",this[CMD_TEXT])=0 then
depth-=1
if depth=0 then
exit--break while
end if
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
else
if alpha_tree_data(reserved,this[CMD_TEXT],3)<=RESERVE_UNIMPLEMENTED then
src_error(
sprintf(
"%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script. Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED
,{RESERVE_NAMES[alpha_tree_data(reserved,this[CMD_TEXT],0)],this[CMD_TEXT],name[CMD_TEXT]}
)
,this[CMD_LINE]
)
end if
end if
if get_cmd_pointer>length(cmd) then
src_error(
sprintf(
"script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED
,{name[CMD_TEXT]}
)
,name[CMD_LINE]
)
end if
this=get_cmd()
end while
script_cmd=append(script_cmd,{name,arglist,s})
current_script=""
end procedure
---------------------------------------------------------------------------
procedure parse_top_level()
sequence this
sequence ignore
color_print("parsing top-level\n",{})
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if compare("defineconstant",this[CMD_TEXT])=0 then
ignore=get_cmd_block(true)
elsif compare("globalvariable",this[CMD_TEXT])=0 then
parse_global_block(get_cmd_block(true))
elsif compare("defineoperator",this[CMD_TEXT])=0 then
parse_operator_block(get_cmd_block(true))
elsif compare("definefunction",this[CMD_TEXT])=0 then
function_list=parse_define_block(get_cmd_block(true),function_list,RESERVE_FUNCTION)
elsif compare("definescript",this[CMD_TEXT])=0 then
script_list=parse_define_block(get_cmd_block(true),script_list,RESERVE_SCRIPT)
elsif compare("script",this[CMD_TEXT])=0 then
parse_script()
else
if get_cmd_pointer>length(cmd) then
--file ends while looking for top-level declaration
exit
else
check_for_reserved(this[CMD_TEXT],cmd[get_cmd_pointer][CMD_LINE],"top-level declaration")
src_error(
sprintf(
"Expected top-level declaration but found "&COLYEL&"%s"&COLRED
,{this[CMD_TEXT]}
)
,this[CMD_LINE]
)
end if
end if
end while
cmd={}
end procedure
---------------------------------------------------------------------------
procedure dump_script_and_function_info(integer fh,sequence list)
sequence this
sequence id_string
for i=1 to length(list) do
this=list[i]
if this[1]>autonumber_id then
id_string=sprintf("AUTONUMBER=%d",{this[1]})
else
id_string=sprintf("ID=%d",{this[1]})
end if
printf(fh,"%s %d\t%s\t%s(",{
file_list[source[this[FUNC_LINE]][SRC_FILE]]
,source[this[FUNC_LINE]][SRC_LINE]
,id_string
,this[2]
})
for j=1 to length(this[FUNC_ARGS]) do
if j>1 then
printf(fh,",",{})
end if
printf(fh,"%d",{this[FUNC_ARGS][j]})
end for
printf(fh,")\n",{})
end for
end procedure
---------------------------------------------------------------------------
function seek_string_by_id(integer id,sequence list,sequence name)
integer at
at=find(id,column(list,PAIR_NUM))
if at then
return(list[at][PAIR_NAME])
else
simple_error(sprintf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id}))
end if
return("")
end function
---------------------------------------------------------------------------
function name_lookup(sequence pair)
integer at
if pair[1]=KIND_NUMBER then
return(sprintf("%d",{pair[2]}))
elsif pair[1]=KIND_LOCAL then
return(sprintf("local%d",{pair[2]}))
elsif pair[1]=KIND_GLOBAL then
at=find(pair[2],global_list[PAIR_NUM])
if at then
return(global_list[PAIR_NAME][at])
else
simple_error(sprintf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]}))
end if
elsif pair[1]=KIND_FLOW then
return(seek_string_by_id(pair[2],flow_list,"flow control structure"))
elsif pair[1]=KIND_SCRIPT then
return(seek_string_by_id(pair[2],script_list,"user script"))
elsif pair[1]=KIND_FUNCTION then
return(seek_string_by_id(pair[2],function_list,"hardcoded function"))
elsif pair[1]=KIND_MATH then
return(seek_string_by_id(pair[2],math_list,"built-in function"))
else
simple_error(sprintf("decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]}))
end if
end function
---------------------------------------------------------------------------
function bytepair_to_word(sequence data)
integer result
result=data[1]+data[2]*#100
if result>32767 then
--convert negatives
result=or_bits(result,#FFFF0000)
end if
return(result)
end function
---------------------------------------------------------------------------
function get_kind_and_id(sequence data)
return({bytepair_to_word(data[1..2]),bytepair_to_word(data[3..4])})
end function
---------------------------------------------------------------------------
function dump_script_binary(sequence bin,integer offset,integer depth)
sequence result
sequence kind_and_id
integer kind
integer argcount
integer new_offset
result=""
kind_and_id=get_kind_and_id(bin[1+offset*2..1+offset*2+3])
kind=kind_and_id[1]
result&=sprintf("%s%s",{
repeat(' ',depth)--indent
,name_lookup(kind_and_id)
})
if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then
argcount=bytepair_to_word(bin[1+offset*2+4..1+offset*2+5])
if argcount then
result&="(\n"
for i=1 to argcount do
new_offset=bytepair_to_word(bin[1+offset*2+6+(i-1)*2..1+offset*2+6+(i-1)*2+1])
result&=dump_script_binary(bin,new_offset,depth+2)
end for
result&=repeat(' ',depth)&")\n"
else
result&="()\n"
end if
else
result&="\n"
end if
return(result)
end function
---------------------------------------------------------------------------
function dump_script_tree(sequence tree,integer depth)
sequence result
result=""
for i=1 to length(tree) do
result&=sprintf("%s%s",{
repeat(' ',depth)--indent
,tree[i][TREE_TRUNK][CMD_TEXT]
})
if length(tree[i][TREE_BRANCHES])>0 then
result&="(\n"
result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2)
result&=repeat(' ',depth)&")\n"
else
result&="\n"
end if
end for
return(result)
end function
---------------------------------------------------------------------------
procedure dump_debug_report()
integer fh
sequence debug_file
--only do this if the -d debug option was on the command line
if find('d',optlist) then
if length(path_only(dest_file))>1 then
debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt")
else
debug_file="hs_debug.txt"
end if
fh=open(debug_file,"w")
if fh!=failure then
wrap_print("Writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file})
-------------------------------------
printf(fh,"[Scripts]\n",{})
dump_script_and_function_info(fh,script_list)
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Global Variables]\n",{})
for i=1 to length(global_list[PAIR_NUM]) do
printf(fh,"%s %d\tID=%d\t%s\n",{
file_list[source[global_list[GLB_LINE][i]][SRC_FILE]]
,source[global_list[GLB_LINE][i]][SRC_LINE]
,global_list[PAIR_NUM][i]
,global_list[PAIR_NAME][i]
})
end for
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Builtin Functions]\n",{})
dump_script_and_function_info(fh,function_list)
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Operators]\n",{})
for i=1 to length(operator_list) do
printf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{
file_list[source[operator_list[i][OPER_LINE]][SRC_FILE]]
,source[operator_list[i][OPER_LINE]][SRC_LINE]
,operator_list[i][PAIR_NAME]
,operator_list[i][OPER_TRUENAME]
,operator_list[i][PAIR_NUM]
})
end for
printf(fh,"\n",{})
-------------------------------------
printf(fh,"[Script Dumps]\n",{})
for i=1 to length(all_scripts) do
printf(fh,"%s %d\tID=%d\t%s\n",{
file_list[source[all_scripts[i][3]][SRC_FILE]]
,source[all_scripts[i][3]][SRC_LINE]
,all_scripts[i][1]
,all_scripts[i][2]
})
for j=1 to length(all_scripts[i][5]) do
printf(fh,"%s %d\tvar=%s\n",{
file_list[source[all_scripts[i][5][j][CMD_LINE]][SRC_FILE]]
,source[all_scripts[i][5][j][CMD_LINE]][SRC_LINE]
,all_scripts[i][5][j][CMD_TEXT]
})
end for
printf(fh,"%d bytes compiled\n",{length(all_scripts[i][6])})
-- printf(fh,"%s\n\n",{dump_script_tree(all_scripts[i][4],0)})
printf(fh,"%s\n\n",{dump_script_binary(all_scripts[i][6][CODE_START_BYTE_OFFSET+1..length(all_scripts[i][6])],0,0)})
end for
printf(fh,"\n",{})
-------------------------------------
close(fh)
else
wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file})
end if
end if
end procedure
---------------------------------------------------------------------------
function get_cmd_depth(integer ptr,sequence data,integer depth)
sequence result
sequence this
result={}
while true do
--if get_key()=27 then abort(1/0) end if
if ptr>length(data) then
src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_LINE])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
depth-=1
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
end if
if depth=0 then
exit --break out of the while
else
result=append(result,this)
end if
end while
return({ptr,result})
end function
---------------------------------------------------------------------------
--identify the kind and id of a text command. DOes not support untranslaed operators or floaty parethesis
function what_kind_and_id(sequence command,sequence local_vars)
integer kind,id
integer keyword
sequence s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_an_integer(s) then
kind=KIND_NUMBER
id=string_to_object(s,{})
elsif find(s,column(local_vars,CMD_TEXT)) then
kind=KIND_LOCAL
id=find(s,column(local_vars,CMD_TEXT))
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
id=global_list[PAIR_NUM][find(s,global_list[PAIR_NAME])]
elsif keyword=RESERVE_FLOW then
kind=KIND_FLOW
id=flow_list[find(s,column(flow_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
id=function_list[find(s,column(function_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
id=script_list[find(s,column(script_list,PAIR_NAME))][PAIR_NUM]
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
id=math_list[find(s,column(math_list,PAIR_NAME))][PAIR_NUM]
else
src_error(sprintf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_LINE])
end if
return({kind,id})
end function
---------------------------------------------------------------------------
--identify the kind of a text command
function what_kind(sequence command,sequence local_vars, integer look_for_operators)
integer kind
integer keyword
sequence s
s=command[CMD_TEXT]
keyword=alpha_tree_data(reserved,s,0)
if string_is_an_integer(s) then
kind=KIND_NUMBER
elsif find(s,column(local_vars,CMD_TEXT)) then
kind=KIND_LOCAL
elsif length(s)=0 then
kind=KIND_PARENS
elsif look_for_operators and find(s,column(operator_list,PAIR_NAME)) then
kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name
elsif keyword=RESERVE_GLOBAL then
kind=KIND_GLOBAL
elsif keyword=RESERVE_FLOW or keyword=RESERVE_BEGIN or keyword=RESERVE_END then
kind=KIND_FLOW
elsif keyword=RESERVE_FUNCTION then
kind=KIND_FUNCTION
elsif keyword=RESERVE_SCRIPT then
kind=KIND_SCRIPT
elsif keyword=RESERVE_BUILTIN then
kind=KIND_MATH
else
src_error(sprintf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_LINE])
end if
return(kind)
end function
---------------------------------------------------------------------------
function how_many_args(sequence name,integer kind)
integer result
integer at
if kind=KIND_PARENS then
result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P
elsif kind=KIND_FLOW then
result=-1 --flow supports an unknown number of args
elsif kind=KIND_OPERATOR then
result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions
elsif kind=KIND_MATH then
at=find(name[CMD_TEXT],column(math_list,PAIR_NAME))
result=length(math_list[at][FUNC_ARGS])
elsif kind=KIND_FUNCTION then
at=find(name[CMD_TEXT],column(function_list,PAIR_NAME))
result=length(function_list[at][FUNC_ARGS])
elsif kind=KIND_SCRIPT then
at=find(name[CMD_TEXT],column(script_list,PAIR_NAME))
result=length(script_list[at][FUNC_ARGS])
else
--numbers, variables, etc do not permit args
result=0
end if
return(result)
end function
---------------------------------------------------------------------------
function get_script_cmd(integer ptr,sequence data,sequence vars)
sequence command
sequence this
sequence after
integer kind
integer argcount
after={}
command=data[ptr]
ptr+=1
if compare("end",command[CMD_TEXT])=0 then
src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_LINE])
elsif compare("begin",command[CMD_TEXT])=0 then
--floaty brackets for order-of-operations-enforcement
ptr-=1
command[CMD_TEXT]=""
elsif compare("variable",command[CMD_TEXT])=0 then
--must ignore variable declaration
if ptr<=length(data) then
--there is room for args
ptr+=1--only increment the pointer when we have args
after=get_cmd_depth(ptr,data,1)
ptr=after[1] --this is a hack, because we cannot say {n,n}=func()
end if
--recursing seems the easyest way to skip a command
return(get_script_cmd(ptr,data,vars))
end if
if ptr<=length(data) then
--there is room for args
kind=what_kind(command,vars,true)
argcount=how_many_args(command,kind)
if argcount=0 then
--no arguments are allowed
else
this=data[ptr]
if compare("begin",this[CMD_TEXT])=0 then
--yes, it has args
ptr+=1--only increment the pointer when we have args
after=get_cmd_depth(ptr,data,1)
ptr=after[1] --this is a hack, because we cannot say {n,n}=func()
after=after[2]
elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION then
--has no args, but thats okay
else
--has no args, but requires them!
src_error(sprintf(
"expected "&COLYEL&"()"&COLRED&" or "&COLYEL&"begin,end"&COLRED&" for %s "&COLYEL&"%s"&COLRED&" but found "&COLYEL&"%s"&COLRED
,{KIND_LONGNAMES[kind],command[CMD_TEXT],this[CMD_TEXT]}),command[CMD_LINE]
)
end if
end if
end if
return({ptr,{command,after}})
end function
---------------------------------------------------------------------------
function compile_commands(sequence script_data,sequence vars)
integer ptr
sequence this
sequence command
sequence result
result={}
ptr=1
while true do
this=get_script_cmd(ptr,script_data,vars)
ptr=this[1]
command=this[2]
if length(command[2])>0 then
--this command has arguments that need parsing
command[2]=compile_commands(command[2],vars)
end if
result=append(result,command)
if ptr>length(script_data) then
exit --break out of while when there is no more data
end if
end while
return(result)
end function
---------------------------------------------------------------------------
function gather_local_vars(sequence vars,sequence data)
sequence this
integer at
integer ptr
ptr=1
while true do
this=data[ptr]
ptr+=1
if compare("variable",this[CMD_TEXT])=0 then
if ptr>length(data) then
src_error(sprintf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_LINE])
end if
this=data[ptr]
ptr+=1
if compare("begin",this[CMD_TEXT])=0 then
while true do
if ptr>length(data) then
src_error(sprintf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_LINE])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
exit--break the while
end if
check_for_reserved(this[CMD_TEXT],this[CMD_LINE],"local variable name")
at=find(this[CMD_TEXT],column(vars,CMD_TEXT))
if at then
src_error(
sprintf(
"local variable/argument "&COLYEL&"%s"&COLRED&" is already defined in line %d of "&COLBWHI&"%s"&COLRED,{this[CMD_TEXT],source[vars[at][CMD_LINE]][SRC_LINE]
,file_list[source[vars[at][CMD_LINE]][SRC_FILE]]}
)
,this[CMD_LINE]
)
else
vars=append(vars,this)
end if
end while
else
src_error(sprintf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_LINE])
end if
end if
if ptr>length(data) then
exit --break out of the while
end if
end while
return(vars)
end function
---------------------------------------------------------------------------
--parse the script tree and make if absorb then and else, for and while absorb do
function normalize_flow_control(sequence tree,sequence vars)
integer ptr
sequence s
integer line
integer argkind
integer var_at
ptr=1
while ptr<=length(tree) do
s=tree[ptr][TREE_TRUNK][CMD_TEXT]
line=tree[ptr][TREE_TRUNK][CMD_LINE]
if compare("if",s)=0 then
if length(tree[ptr][TREE_BRANCHES])>1 then
src_error(sprintf(
COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"and"&COLRED&" and "&COLYEL&"or"&COLRED&" for complex conditions"
,{length(tree[ptr][TREE_BRANCHES])}
),line)
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),line)
end if
if ptr1 then
src_error(sprintf(
COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"and"&COLRED&" and "&COLYEL&"or"&COLRED&" for complex conditions"
,{length(tree[ptr][TREE_BRANCHES])}
),line)
elsif length(tree[ptr][TREE_BRANCHES])=0 then
src_error(sprintf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),line)
end if
if ptr4 then
src_error(sprintf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(tree[ptr][TREE_BRANCHES])}),line)
elsif length(tree[ptr][TREE_BRANCHES])=3 then
--append default step value
tree[ptr][TREE_BRANCHES]=append(tree[ptr][TREE_BRANCHES],{{"1",tree[ptr][TREE_TRUNK][CMD_LINE]},{}})
end if
argkind=what_kind(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK],vars,true)
if argkind=KIND_LOCAL then
--translate into a numeric reference to a variable
used_locals=append(used_locals,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT])
var_at=find(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],column(vars,CMD_TEXT))
tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at})
elsif argkind=KIND_GLOBAL then
--warn, then translate into a numeric reference to a variable
src_warn(sprintf(
"Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop"
,{tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]}
),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_LINE])
var_at=find(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME])
tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]})
else
--only variables allowed as the first argument of a "for"
src_error(
sprintf("first argument of "&COLYEL&"for"&COLRED&" statement must be a variable, not %s "&COLYEL&"%s"&COLRED,{
KIND_LONGNAMES[argkind]
,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]
})
,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_LINE]
)
end if
if ptr length(tree[TREE_BRANCHES]) then
--add defaults if not enough args are present
if kind=KIND_MATH then
--special processing for math
if list[at][PAIR_NUM]<16 then
--math shouldnt have defaults
src_error(sprintf(
"math function "&COLYEL&"%s"&COLRED&" has %d arguments it should always have 2"
,{tree[TREE_TRUNK][CMD_TEXT],length(tree[TREE_BRANCHES])}
),tree[TREE_TRUNK][CMD_LINE])
else
--variable stuff can have a defaults
if length(tree[TREE_BRANCHES]) = 0 then
--no defaults for first argument of variable function
src_error(sprintf(
"variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments it needs at least 1"
,{tree[TREE_TRUNK][CMD_TEXT],length(tree[TREE_BRANCHES])}
),tree[TREE_TRUNK][CMD_LINE])
elsif length(tree[TREE_BRANCHES]) = 1 then
--make defaults for second arg of variable function
if list[at][PAIR_NUM]=16 then
--setvariable
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{"0",tree[TREE_TRUNK][CMD_LINE]}
,{}
})
else
--increment and decrement
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{"1",tree[TREE_TRUNK][CMD_LINE]}
,{}
})
end if
end if
end if
else
--normal processing for script and function
for i=length(tree[TREE_BRANCHES])+1 to length(list[at][FUNC_ARGS]) do
tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{
{sprintf("%d",{list[at][FUNC_ARGS][i]}),tree[TREE_TRUNK][CMD_LINE]}
,{}
})
end for
end if
end if
--this is as good a time as any to make sure that var maipulation functions point to real variables
if kind=KIND_MATH and list[at][PAIR_NUM]>=16 then
argkind=what_kind(tree[TREE_BRANCHES][1][TREE_TRUNK],vars,false)
if argkind=KIND_LOCAL then
--its local. translate it to a numeric reference
var_at=find(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],column(vars,CMD_TEXT))
tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at})
elsif argkind=KIND_GLOBAL then
--its global. translate it to a numeric reference
var_at=find(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME])
tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]})
else
--its not a variable. bad!
src_error(sprintf(
"first argument of variable manipulation function "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED
,{tree[TREE_TRUNK][CMD_TEXT],KIND_LONGNAMES[argkind],tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]}
),tree[TREE_TRUNK][CMD_LINE])
end if
end if
return(tree[TREE_BRANCHES])
end function
---------------------------------------------------------------------------
function normalize_arguments(sequence tree,sequence vars)
integer kind
--unlike the flow normalization and operator translation we do not insert/delete elements from the current level, so we can use a "for" safely, and dont need a "while"
for i=1 to length(tree) do
kind=what_kind(tree[i][TREE_TRUNK],vars,false)
if kind=KIND_SCRIPT then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,script_list,vars)
elsif kind=KIND_FUNCTION then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,function_list,vars)
elsif kind=KIND_MATH then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,math_list,vars)
end if --number, flow, global, local, parens need no argchecking
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order
function output_word(integer n)
integer b1,b2
b1=and_bits(n,#FF)
b2=floor(and_bits(n,#FFFF)/256)
return({b1,b2})
end function
---------------------------------------------------------------------------
--looks for a matching block of code that we can refer to instead of rewriting the code
--somewhat time consuming
function seek_appropriate_reference(sequence result,sequence done_code)
for i=1 to length(done_code)-(length(result)-1) by 2 do
if compare(result,done_code[i..i+(length(result)-1)])=0 then
return((i-1)/2)
end if
end for
return(-1)
end function
---------------------------------------------------------------------------
--return value is a sequence when new command data was appended, and an integer when an offset is returned
function binary_compile_recurse(sequence tree,sequence vars,sequence done_code)
sequence result
integer kind,id
integer at
sequence s
integer offset
sequence value_temp
object sub_result
sequence done_code_plus_result
result={}
kind=what_kind(tree[TREE_TRUNK],vars,false)
s=tree[TREE_TRUNK][CMD_TEXT]
if kind=KIND_NUMBER then
value_temp=value(s)
result&=output_word(kind)
result&=output_word(force_16_bit(value_temp[2],tree[TREE_TRUNK][CMD_LINE]))
elsif kind=KIND_GLOBAL then
at=find(s,global_list[PAIR_NAME])
result&=output_word(kind)
result&=output_word(global_list[PAIR_NUM][at])
elsif kind=KIND_LOCAL then
at=find(s,column(vars,CMD_TEXT))
result&=output_word(kind)
result&=output_word(at-1)
elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then
if kind=KIND_SCRIPT then
at=find(s,column(script_list,PAIR_NAME))
id=script_list[at][PAIR_NUM]
elsif kind=KIND_FUNCTION then
at=find(s,column(function_list,PAIR_NAME))
id=function_list[at][PAIR_NUM]
elsif kind=KIND_FLOW then
at=find(s,column(flow_list,PAIR_NAME))
id=flow_list[at][PAIR_NUM]
elsif kind=KIND_MATH then
at=find(s,column(math_list,PAIR_NAME))
id=math_list[at][PAIR_NUM]
end if
result&=output_word(kind)
result&=output_word(id)
result&=output_word(length(tree[TREE_BRANCHES]))
for i=1 to length(tree[TREE_BRANCHES]) do
--add placeholders for each argoffset
result&={#FFFFFF,#FFFFFF} --these values are waaaay out if range for a 16-bit number, so it will never be matched by seek_appropriate_reference
end for
for i=1 to length(tree[TREE_BRANCHES]) do
--actually evaluate each argument and set the real offsets
done_code_plus_result=done_code&result --prefabricating this is faster, since we use it twice
offset=floor(length(done_code_plus_result)/2)
sub_result=binary_compile_recurse(tree[TREE_BRANCHES][i],vars,done_code_plus_result)
if sequence(sub_result) then
--if new data was added, append it
result&=sub_result
else
--if a matching reference was available use it
offset=sub_result
end if
result[7+(i-1)*2..8+(i-1)*2]=output_word(offset)
end for
else
src_error(sprintf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),tree[TREE_TRUNK][CMD_LINE])
end if
if not fast_mode then
at=seek_appropriate_reference(result,done_code)
if at>=0 then
--found existing data exactly like this command, so just return a reference to it
return(at)
end if
end if
--return the data for this command to be appended
return(result)
end function
---------------------------------------------------------------------------
function binary_compile(sequence tree,sequence vars)
sequence result
--binary data is all in 16-bit signed words.
--the first word is the zero-rooted byte-offset of the first executable code byte
--in retrospect, word-offset would have been more appropriate, since everything is word-alinged, but hey! gotta be backwards compatable!
result=output_word(CODE_START_BYTE_OFFSET)
--the second word is the number of local variables
result&=output_word(length(vars))
--what follows is command data in the format [kindID,Value,argcount,argpointerlist]
--numbers and variables have no argcount or argpointerlist
--an argpointer is the zero-rooted word-offset of the argument relative
--to the start of the executable commands. I realise that this format is
--unnecisaraly complicated. I had hoped to get benefits of being able to
--store frequently reused commands only once and then just point to them,
--but in actual practice, it isnt worth the trouble, since the only
--commands that tend to be redundant are the really short ones.
--the first command is always a "do". there can be only one top-level command
if length(tree)!=1 then
simple_error(sprintf("compiler bug! script tree has %s root nodes",{length(tree)}))
end if
result&=binary_compile_recurse(tree[1],vars,"")
return(result)
end function
---------------------------------------------------------------------------
--floaty brackets are un-needed after the operators have been translated.
function collapse_floaty_brackets(sequence tree)
integer i
sequence graft
i=1
while i<=length(tree) do
if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then
--found a floaty-bracket
graft=tree[i][TREE_BRANCHES]
tree=delete_element(tree,i)
tree=insert_sequence(tree,graft,i)
else
if length(tree[i][TREE_BRANCHES]) then
tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES])
end if
i+=1
end if
end while
return(tree)
end function
---------------------------------------------------------------------------
function sanity_check(sequence tree,sequence vars,sequence parent)
sequence s
sequence kind_and_id
integer kind,id
for i=1 to length(tree) do
s=tree[i][TREE_TRUNK][CMD_TEXT]
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if (compare("if",parent)=0 or compare("while",parent)=0) and i=1 then
if kind=KIND_NUMBER then
if id then
src_warn(sprintf("condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),tree[i][TREE_TRUNK][CMD_LINE])
else
src_warn("condition is always false",tree[i][TREE_TRUNK][CMD_LINE])
end if
elsif kind=KIND_FLOW then
src_warn(sprintf("should not use flow control command "&COLYEL&"%s"&COLRED&" as a condition",{s}),tree[i][TREE_TRUNK][CMD_LINE])
end if
elsif compare("do",parent)=0 or compare("then",parent)=0 or compare("else",parent)=0 then
if kind=KIND_NUMBER then
src_warn(sprintf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here."
,{id}),tree[i][TREE_TRUNK][CMD_LINE])
elsif kind=KIND_GLOBAL then
src_warn(sprintf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),tree[i][TREE_TRUNK][CMD_LINE])
elsif kind=KIND_LOCAL then
src_warn(sprintf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{vars[id][CMD_TEXT]}),tree[i][TREE_TRUNK][CMD_LINE])
elsif kind=KIND_MATH and id<=15 then
src_warn(sprintf("built-in function "&COLYEL&"%s"&COLRED&" is returning a value that isnt being discarded"
,{s}),tree[i][TREE_TRUNK][CMD_LINE])
end if
end if
if kind=KIND_GLOBAL then
if not find(s,used_globals) then
used_globals=append(used_globals,s)
end if
elsif kind=KIND_LOCAL then
if not find(s,used_locals) then
used_locals=append(used_locals,s)
end if
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT])
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
function optimized_arg(sequence tree,sequence vars)
sequence kind_and_id
integer kind,id
object arg1,arg2
kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_NUMBER then
return(id)
elsif kind=KIND_MATH and id<=15 then
arg1=optimized_arg(tree[TREE_BRANCHES][1],vars)
arg2=optimized_arg(tree[TREE_BRANCHES][2],vars)
if integer(arg1) and integer(arg2) then
if id=0 then
--random
if arg1=arg2 then
return(arg1)
end if
elsif id=1 then
--exponent
return(power(arg1,arg2))
elsif id=2 then
--modulus
return(floor(remainder(arg1,arg2)))
elsif id=3 then
--divide
return(floor(arg1/arg2))
elsif id=4 then
--multiply
return(arg1*arg2)
elsif id=5 then
--subtract
return(arg1-arg2)
elsif id=6 then
--add
return(arg1+arg2)
elsif id=7 then
--xor
return(xor_bits(arg1,arg2))
elsif id=8 then
--or
return(or_bits(arg1,arg2))
elsif id=9 then
--and
return(and_bits(arg1,arg2))
elsif id=10 then
--equal
return(abs(arg1=arg2)*-1)
elsif id=11 then
--notequal
return(abs(arg1!=arg2)*-1)
elsif id=12 then
--lessthan
return(abs(arg1arg2)*-1)
elsif id=14 then
--lessthanorequalto
return(abs(arg1<=arg2)*-1)
elsif id=15 then
--greaterthanorequalto
return(abs(arg1>=arg2)*-1)
end if
end if
end if
return({})
end function
---------------------------------------------------------------------------
--goes through a script simplifying expressions that always have the same value
function optimize_script(sequence tree,sequence vars)
object arg
for i=1 to length(tree) do
arg=optimized_arg(tree[i],vars)
if integer(arg) then
tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg)
tree[i][TREE_BRANCHES]={}
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
procedure warn_unused_locals(sequence vars)
for i=1 to length(vars) do
if not find(vars[i][CMD_TEXT],used_locals) then
src_warn(sprintf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_LINE])
end if
end for
end procedure
---------------------------------------------------------------------------
procedure warn_unused_globals()
integer at
for i=1 to length(global_list[PAIR_NUM]) do
at=find(global_list[PAIR_NAME][i],used_globals)
if not at then
src_warn(sprintf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],global_list[PAIR_NUM][i]}),global_list[GLB_LINE][i])
end if
end for
end procedure
---------------------------------------------------------------------------
procedure compile_a_script(integer id,sequence name_data,sequence arg_data,sequence script_data)
sequence script_tree
sequence local_vars
sequence binary
current_script=name_data[CMD_TEXT]
local_vars=arg_data --start with argument names (so we can check for conflicts)
local_vars=gather_local_vars(local_vars,script_data)
used_locals={}
script_tree=compile_commands(script_data,local_vars)
script_tree=convert_operators(script_tree)
script_tree=normalize_flow_control(script_tree,local_vars)
script_tree=normalize_arguments(script_tree,local_vars)
script_tree=collapse_floaty_brackets(script_tree)
if not fast_mode then
script_tree=optimize_script(script_tree,local_vars)
script_tree=sanity_check(script_tree,local_vars,"")
warn_unused_locals(local_vars)
end if
binary=binary_compile(script_tree,local_vars)
all_scripts=append(all_scripts,{
id --id
,name_data[CMD_TEXT] --name
,name_data[CMD_LINE] --source line
,script_tree
,local_vars
,binary --compiled data to go into the HSX lumps
})
current_script=""
end procedure
---------------------------------------------------------------------------
procedure compile_each_script()
integer at
sequence count
count=repeat(0,length(file_list))
color_print("compiling scripts",{})
for i=1 to length(script_cmd) do
at=find(script_cmd[i][1][CMD_TEXT],column(script_list,PAIR_NAME))
if at then
--color_print("%s\n",{script_cmd[i][1][CMD_TEXT]})
if length(script_cmd[i][2]) != length(script_list[at][FUNC_ARGS]) then
src_error(
sprintf(
"script "&COLYEL&"%s"&COLRED&" has %d arguments named, but has %d arguments in its declaration"
,{script_cmd[i][1][CMD_TEXT],length(script_cmd[i][2]),length(script_list[at][FUNC_ARGS])}
)
,script_cmd[i][1][CMD_LINE]
)
else
compile_a_script(
script_list[at][PAIR_NUM] --ID
,script_cmd[i][1] --Name
,script_cmd[i][2] --argnames
,script_cmd[i][3] --data
)
count[source[script_cmd[i][1][CMD_LINE]][SRC_FILE]]+=1
end if
else
src_error(sprintf("script "&COLYEL&"%s"&COLRED&" is not defined",{script_cmd[i][1][CMD_TEXT]}),script_cmd[i][1][CMD_LINE])
end if
color_print(".",{})
end for
color_print("\n",{})
if not fast_mode then
warn_unused_globals()
end if
for i=1 to length(count) do
if count[i] then
wrap_print("compiled %d scripts from "&COLBWHI&"%s"&COLWHI&"\n",{count[i],file_list[i]})
end if
end for
end procedure
---------------------------------------------------------------------------
function generate_scripts_dot_txt()
sequence result
result=""
for i=1 to length(script_list) do
result&=sprintf("%s\r\n%d\r\n%d\r\n",{script_list[i][PAIR_NAME],script_list[i][PAIR_NUM],length(script_list[i][FUNC_ARGS])})
for j=1 to length(script_list[i][FUNC_ARGS]) do
result&=sprintf("%d\r\n",{j})
end for
end for
return(result)
end function
---------------------------------------------------------------------------
procedure write_output_file()
integer fh
if length(all_scripts) then
fh=open(dest_file,"wb")
if fh!=-1 then
wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
--write header and version
if write_lump(fh,"HS","HamsterSpeak"&output_word(COMPILER_VERSION))=false then
simple_error("unable to write header")
end if
--write script index
if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then
simple_error("unable to write script index")
end if
--write each script
for i=1 to length(all_scripts) do
if write_lump(fh,sprintf("%d.hsx",{all_scripts[i][1]}),all_scripts[i][6])=false then
simple_error(sprintf("unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][2]}))
end if
end for
close(fh)
else
simple_error(sprintf("attempt to open"&COLYEL&"%s"&COLRED&" failed",{dest_file}))
end if
else
color_print("no scripts to output\n",{})
end if
end procedure
---------------------------------------------------------------------------
init()
load_source(source_file,"reading")
show_source_info()
split_commands()
parse_for_constants()
parse_top_level()
compile_each_script()
dump_debug_report()
write_output_file()
color_print("done (%g seconds)\n",{time()-start_time})
opt_wait_for_key()