-- HamsterSpeak Compiler --(C) Copyright 2001-2017 James Paige and Hamster Republic Productions -- Please read LICENSE.txt for GPL License details and disclaimer of liability -- This is a compiler for HamsterSpeak scripts used for plotscripting in -- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com -- This code is written in Euphoria 4.0. You can get the Free Open-Source -- Euphoria compiler from http://OpenEuphoria.com . I also highly recommend -- David Cuny's EE editor --------------------------------------------------------------------------- --Changelog --3Ta 2017-07-28 Increased MAXGLOBAL to 50000 -- 2017-06-13 0x, 0o and 0b number prefixes --3T 2017-06-11 Allow subscript vars/args to shadow outer locals --3Se 2017-05-17 Disallow extra commas --3Sd 2017-03-17 --incdir option; check ../share/games/ohrrpgce/ instead --3Sc 2016-04-11 Check /usr/share/games/ohrrpgce/ for include files --3Sb 2014-11-11 Better 'include' line lexing & garbage detection --3Sa 2014-08-23 Fix printing of a lexer error message --3S 2014-07-22 Automatically include plotscr.hsd and ignore -- double inclusion; added --include and -b --3Rg 2013-02-23 Fix a subscript related crash --3Rf 2013-02-17 Fix several 31-bit int overflows --3Re 2014-02-17 Trivial change to math builtin warnings ----- 2013-11-09 Prevent misuse of betweenable-operators in -- declarations, and allow constants for global ids --3Rc 2013-10-11 subscript implementation changes (add nonlocal kind) --3Rb 2013-10-10 Remove variable() later, for syntax checking --3Ra 2013-10-09 Fixed \r lineendings (AGAIN) and slow \r\n handling --3R 2013-10-05 Add subscript --3Qc 2013-10-04 Disallow flow statements used as expressions --3Qb 2013-07-24 Require begin after then, do, etc. (else excluded) --3Qa 2013-04-11 Warn when plotscr.hsd not included ----- 2013-02-28 Allow non-constant expressions in case blocks again --3Pe 2013-01-03 Faster compile times --3Pd 2012-10-24 Fix \x escape code --3Pc 2012-10-09 Change srcpos encoding to remove source size limits --3Pb 2012-10-08 \x escape code --3Pa 2012-08-24 --reuse-ids option --3P 2012-08-18 Change way include is lexed: quotes no longer needed -- [], {}, . now totally disallowed -- Add < and > operators -- Add plotscrversion block and new HS header lump -- New switch syntax: do after case optional, case(else) -- Added elseif --3Of 2012-08-18 Lexer bugfix --3Oe 2012-04-11 Add \n and \t escape codes --3Od 2012-03-03 Change directory search order slightly --3Oc 2012-02-07 Increased MAXGLOBAL to 16383 --3Ob 2012-02-07 Make the max script global number set by a constant --3Oa 2012-01-20 Fix lexer allowing newlines in strings --3O 2012-01-19 Unicode support --3Nd 2011-10-16 Fix commands.bin generation bug --3Nc 2011-08-28 Additional expression optimisations --3Nb 2011-08-28 Fix overflow_int32 and bitwise operator overflow --3Na 2011-07-25 Add -x option for Hamster Whisper's benefit --3N 2011-07-12 Add assert --3Ma 2010-10-21 Correctly strip \r to fix newlines ... again. --3M 2010-05-02 abs, sign, sqrt; fix error reporting --3L 2010-04-29 tracevalue support, involving a few internal changes --3Ke 2010-04-18 Add -t commandline option --3Kd 2010-04-14 Quite a few bugfixes in optimizer --3Kc 2010-04-14 Start checking well-formedness of numbers with lexer --3Kb 2010-04-12 Whole lot of bugfixes in script declarations and -- checking for unreserved identifier names --3Ka 2010-04-12 Throw error if source file contains binary --3K 2010-04-12 Error column reporting (introducing srcpos); lump -- original source files into source.lumped --3Ja 2010-04-07 Wrote a (nearly) proper lexer --3J 2009-12-05 Multiple function names may now alias the same id --3I 2009-07-27 Export a backup copy of full source to source.txt lump -- disable with -n option --3H 2009-06-18 Export function names to commands.bin lump --3Gk 2009-05-25 Fix short references crash --3Gj 2009-01-27 Show future-compatability warning when using square -- brackets [] and curly brackets {} in names --3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake --3Gh 2008-07-14 Disable warnings for unused variables by default, and -- Add -u option to enable warnings for unused variables. --3Gg 2008-03-01 Raise global variable limits --3Gf 2008-02-11 Give user another chance on missing Y key :P --3Ge 2007-07-01 String conversion rewrite to allow more whitespace --3Gd 2007-07-01 Allow constants for new syntax default args --3Gc 2007-05-29 Handle empty paratheses correctly, and use -- lowercase ".hs" extension --3Gb 2007-05-09 Throw error on missing include file --3Ga 2007-05-09 String length not limited to 40 --3G 2007-02-09 Offset to string literal tables given as a 32-bit -- int, script format version++ to 2. --3F 2006-12-11 Support for variable number of arguments to functions -- by giving -1 as the number of args in the define block --3Ec 2006-11-30 while(true) no longer produces a warning --3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes --3Ea 2006-09-03 Actually export triggers in scripts.bin --3E 2006-08-30 "define trigger" support --3D 2006-08-16 String literal lookup tables added to end of -- HSZ lumps replacing setstring + appendstring --3C 2006-08-15 New syntax for giving default script arguments, -- and for leaving out definescript --3B 2006-08-13 HSX lumps renamed to HSZ --3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format -- version added to HSX header. Current is 1 --2Na 2006-07-07 Minor update to add new logical operators --2N 2006-07-07 Added not() logic function --2M 2006-07-06 Set exit code on warnings --2L 2006-05-13 Added switch statement (+ case keyword) --2K 2006-05-01 Added @scriptname and @globalvariable syntax to -- return script or global ID number at compile-time -- not run-time (for use with "run script by ID" and -- "read global" and "write global") --2J 2006-04-10 Added break, continue, exitscript, exitreturning -- flow statements. Also fixed some return bugs --2I 2006-04-04 Extended HSX header to include number of arguments -- to a script, to really fix arguments-overflow-into -- -locals bug --2H 2006-03-29 Display better help and wait for keypress when run -- by double-clicking the icon. Added -k command line -- option to skip waiting for keypress --2G 2005-10-03 Additional operators $+ and $= -- Mention GPL in help text --2F 2005-07-24 Strings implemented: -- $id="..." -> setstring -- $id+"..." -> appendstring --2E 2005-02-15 Changed license to GPL -- Added += and -= operators thanks to a patch from -- The Mad Cacti and Fyrewulff --2D 2002-08-03 Only a small change, strip out \r from lines of -- the script as we read them to avoid errors related -- to busted newlines --2C 2002-03-05 Fixed some bugs that could cause crashes when -- non-printable characters exist in the input file. -- (naturally the script will not compile right, but -- at least it will not crash) --2B 2001-06-06 Added := as a commaless separater so it can be -- defined as an operator --2A 2001-05-04 Fixed -w command line option when used with -z --2 First Release --------------------------------------------------------------------------- without warning --to avoid annoying warnings without type_check --for a small speed boost --with profile_time --time profiling include hsspiffy.e --various routines, sequence manipulation - James Paige include graphics.e --standard library, needed for color output include machine.e --needed for int_to_bytes include std/math.e --needed for mod include euphoria/unicode.e include euphoria/ucstypes.e include euphoria/io_with_unicode.e --------------------------------------------------------------------------- --constants-- constant false=0 constant true=1 -- increment COMPILER_VERSION only for major language-altering changes constant COMPILER_VERSION="3" -- For small changes increment the second letter of COMPILER_SUB_VERSION -- For large changes increment the first letter and reset the second letter --*Please make sure this is 2 chararacters long! Append a space if required.* constant COMPILER_SUB_VERSION="Ta" constant COPYRIGHT_DATE="2017" --This is the oldest version of plotscr.hsd which is supported (feel free to --increment this whenever using an older plotscr.hsd means you're missing out --on new but nonessential features; there's no harm) --*Should be 3 characters long, same captialisation as COMPILER_SUB_VERSION* constant MIN_PLOTSCR_VERSION="3P " --these constants are color-flags. May add more colors, up to #F8FF --(these codes are at the end of the first private use area) constant COL_FIRST=#F8E0 constant COLYEL=#F8E0 constant COLRED=#F8E1 constant COLPNK=#F8E2 constant COLWHI=#F8E3 constant COLBWHI=#F8E4 constant COL_LAST=#F8FF --Euphoria's color constants vary depending on the platform, so we avoid them constant HS_TO_EU_COLORS={YELLOW, RED, BRIGHT_RED, WHITE, BRIGHT_WHITE} --These are character classes used in lexer_table constant LEX_CHAR=0 --Most character which are valid in identifiers. Everything above char 255 constant LEX_SPACE=1 --space and tab constant LEX_BEGIN=2 --( constant LEX_END=3 --) constant LEX_OPERATOR=4 --The first character of an operator constant LEX_COMMENT=5 --# constant LEX_STRING=6 --" constant LEX_COMMA=7 --, constant LEX_WARN=8 --[]{}. constant LEX_BINARY=9 --Nonprintable characters constant LEX_DIGIT=10 --0-9 constant LEX_HEXDIGIT=11 --a-fA-F constant LEX_OPERATOR2=12 --Only used in lexer_table_key2!! constant LEX_NEWLINE=13 --\n constant FILE_NAME=1 constant FILE_TEXT=2 constant FILE_LINE_EXTENTS=3 constant POS_FILENAME=1 constant POS_LINE=2 constant POS_COLUMN=3 constant POS_TEXT=4 constant CMD_TEXT=1 constant CMD_POS=2 constant RESERVE_TOPLEVEL=1 --Tokens that are only allowed at the toplevel, but not including script triggers constant RESERVE_UNIMPLEMENTED=2 constant RESERVE_FLOW=3 --Flow control keywords constant RESERVE_FUNCTION=4 --Name of a defined function constant RESERVE_SCRIPT=5 --etc constant RESERVE_GLOBAL=6 constant RESERVE_BEGIN=7 constant RESERVE_END=8 constant RESERVE_OPERATOR=9 constant RESERVE_BUILTIN=10 constant RESERVE_CONSTANT=11 constant RESERVE_MACRO=12 constant RESERVE_KEYWORD=13 --Miscellaneous keywords which can only occur inside scripts constant RESERVE_NAMES={"declaration" ,"unimplemented keyword/operator" ,"flow control" ,"hardcoded function" ,"user script" ,"global variable" ,"bracket "&COLYEL&"("&COLRED ,"bracket "&COLYEL&")"&COLRED ,"operator" ,"builtin command" ,"constant" ,"hardcoded function/macro" ,"keyword" } constant PAIR_NUM=1 constant PAIR_NAME=2 constant PAIR_START=1 constant PAIR_END=2 constant OPER_TRUENAME=3 constant OPER_POS=4 constant FUNC_ARGS=3 --Either list of default values (each an integer or NO_DEFAULT), or VAR_ARGS constant FUNC_POS=4 --srcpos of start of function/script constant GLB_POS=3 --srcpos of globalvariable definition constant CONST_VALUE=1 constant CONST_POS=2 constant VAR_FRAME=3 --VarToken index, integer giving frame number of a variable (0 is current script, 1 is parent...) constant VAR_ID=4 --VarToken index, integer giving ID in frame constant NO_DEFAULT={} --when appears in script_list[FUNC_ARGS], indicates arg with no default value constant VAR_ARGS=0 --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args constant KIND_NUMBER=1 constant KIND_FLOW=2 constant KIND_GLOBAL=3 constant KIND_LOCAL=4 constant KIND_MATH=5 constant KIND_FUNCTION=6 constant KIND_SCRIPT=7 constant KIND_NONLOCAL=8 constant KIND_REFERENCE=9 --converted to KIND_NUMBER in compiled script constant KIND_OPERATOR=10 --never appears in compiled script constant KIND_PARENS=11 --never appears in compiled script constant KIND_MACRO=12 --never appears in compiled script constant KIND_KEYWORD=13 --never appears in compiled script constant KIND_LONGNAMES={"number" ,"flow control statement" ,"global variable" ,"local variable" ,"built-in function" ,"hard-coded function" ,"script" ,"non-local variable" ,"reference" ,"untranslated operator" ,"order-of-operations-enforcing parenthesis" ,"hard-coded function/macro" } constant TREE_TRUNK=1 constant TREE_BRANCHES=2 constant TIMING_DEPTH=1 constant TIMING_DESCRIPTION=2 constant TIMING_TIME=3 constant TIMING_ACTIVE=4 --Version number for the .hs/.hsp format (not including HSZ files!) constant HSP_FORMAT_VERSION=1 constant CODE_START_BYTE_OFFSET=18 constant HSZ_FORMAT_VERSION=3 constant MAXGLOBAL=50000 --largest ID constant MAX_LOCALS=100 --max number of locals per script (not including nonlocals) constant MAX_NEST_DEPTH=4 --------------------------------------------------------------------------- --globals-- --initializations-- string compiler_dir compiler_dir="" string include_dir include_dir="" string source_file source_file="" string dest_file dest_file="" sequence optlist optlist={} sequence file_list file_list={} --type {FILE_NAME,FILE_TEXT,FILE_LINE_EXTENTS} sequence additional_includes additional_includes={} integer max_srcpos max_srcpos=0 integer total_lines total_lines=0 sequence cmd cmd={} --TokenList (but type checking is very slow) sequence reuse_ids reuse_ids={} sequence constant_list constant_list=alpha_tree_create() sequence trigger_list trigger_list={} --type {PAIR_NUM, PAIR_NAME} sequence operator_list operator_list={} --type {PAIR_NUM, PAIR_NAME, OPER_TRUENAME, OPER_POS} sequence function_list function_list={} --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS} sequence global_list global_list={{},{},{}} --NOTE: three lists {PAIR_NUM, PAIR_NAME, GLB_POS}, not a list of globals sequence string_list string_list={} --list of strings sequence script_list script_list={} --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS} (PAIR_NAME is the full name) constant SCRIPT_TRIGGER_TOK=1 --Trigger token (type {CMD_TEXT, CMD_POS}), e.g. "script" constant SCRIPT_NAME=2 --String, not fully-scoped constant SCRIPT_POS=3 --(scrpos of script name) constant SCRIPT_FULL_NAME=4 --Full-scoped script name constant SCRIPT_ARG_TOKS=5 --List of argument name tokens (type {CMD_TEXT, CMD_POS}) --Initially the tokens may include default values (like "a=4"), which are later removed constant SCRIPT_BODY_TOKS=6 --List of tokens (type {CMD_TEXT, CMD_POS}) in the body, of the form do,begin,...,end constant SCRIPT_PARENT_IDX=7 --index in all_scripts, or 0 for none constant SCRIPT_NEST_DEPTH=8 --Nesting depth. A script is 0, subscript 1, sub-subscript 2... constant SCRIPT_ID=9 --Script ID. Value not appended until all IDs are assigned in check_script_declarations constant SCRIPT_TRIGGER_ID=10 --trigger ID. SCRIPT_TRIGGER_ID and later not appended until compile_a_script constant SCRIPT_NUM_NONLOCALS=11 constant SCRIPT_AST=12 constant SCRIPT_VARIABLES=13 --VarList of locals and nonlocals constant SCRIPT_BINARY=14 sequence all_scripts all_scripts={} --list of Scripts == {SCRIPT_*} (variable length) sequence reserved reserved=alpha_tree_create() atom start_time start_time=time() atom run_time run_time=0 sequence timing_data timing_data={} integer timing_depth timing_depth=-1 integer get_cmd_pointer get_cmd_pointer=0 integer autonumber_id autonumber_id=32767 sequence flow_list flow_list={ {0,"do"} ,{1,"begin"} ,{2,"end"} ,{3,"return"} ,{4,"if"} ,{5,"then"} ,{6,"else"} ,{7,"for"} ,{10,"while"} ,{11,"break"} ,{12,"continue"} ,{13,"exitscript"} ,{14,"exitreturning"} ,{15,"switch"} ,{16,"case"} --never appears in compiled script ,{17,"subscript"} --never appears in compiled script } --elseif is not a flow type sequence flow_requiring_brackets --else is excluded, because of case(else) flow_requiring_brackets={ "do", "if", "then", "elseif", "for", "while", "exitreturning", "switch", "case" } sequence math_list math_list={ --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS} (FUNC_POS omitted) {0,"random",{0,1}} ,{1,"exponent",{0,2}} ,{2,"modulus",{0,1}} ,{3,"divide",{0,1}} ,{4,"multiply",{0,0}} ,{5,"subtract",{0,0}} ,{6,"add",{0,0}} ,{7,"xor",{0,0}} ,{8,"or",{0,0}} ,{9,"and",{0,0}} ,{10,"equal",{0,0}} ,{11,"notequal",{0,0}} ,{12,"lessthan",{0,0}} ,{13,"greaterthan",{0,0}} ,{14,"lessthanorequalto",{0,0}} ,{15,"greaterthanorequalto",{0,0}} ,{16,"setvariable",{0,0}} ,{17,"increment",{0,1}} ,{18,"decrement",{0,1}} ,{19,"not",{0}} ,{20,"logand",{0,0}} ,{21,"logor",{0,0}} ,{22,"logxor",{0,0}} ,{23,"abs",{0}} ,{24,"sign",{0}} ,{25,"sqrt",{0}} } --update is_simple_math_op on adding new operators sequence separator_list separator_list={ "+=","-=","$+","$=","$","+","--","/","*","^^","^" ,"==","<>",">>","<<","<=",">=",">","<",":=","=","&&","||" } sequence lexer_table lexer_table={} --maps from ASCII value+1 (note +1!) to LEX_* constant sequence lexer_table_key2 lexer_table_key2={} string hex_chars hex_chars="0123456789abcedfABCDEF" sequence hex_char_values hex_char_values={0,1,2,3,4,5,6,7,8,9,#a,#b,#c,#e,#d,#f,#A,#B,#C,#D,#E,#F} sequence compiled_data compiled_data={} --used inside binary_compile only; compiled data for the current script string current_script current_script="" --name of current script sequence current_scopes current_scopes={""} --list of possible prefixes to add to (sub)script names, eg "master<-" integer cur_script_num cur_script_num=0 --index into all_scripts of current script integer colors_enabled colors_enabled=true integer simple_colorcodes simple_colorcodes=false integer error_file error_file=false sequence used_globals used_globals={} --list of strings sequence used_locals used_locals={} --list of strings sequence string_table string_table={} --binary data string plotscr_version plotscr_version="" integer max_used_function max_used_function=0 --maximum id of a function used in a script integer fast_mode fast_mode=false integer end_anchor_kludge end_anchor_kludge=false integer was_warnings was_warnings=false --------------------------------------------------------------------------- --types-- --A position in the script source. An integer, encoding an index in file_list --and a character number that file (where newlines are one character). --See encode_srcpos and decode_srcpos type srcpos(object pos) --need to allow 0 as a dummy value if atom(pos) and pos=floor(pos) and pos>=0 and pos<=max_srcpos then return true else return false end if end type --Not the same as Euphoria's string type, which is limited to extended ASCII type string(object str) if not sequence(str) then return false end if --Too slow -- for i=1 to length(str) do -- if not integer(str[i]) or str[i]<0 then -- return false -- end if -- end for return true end type --A CMD_TEXT,CMD_POS pair type Token(object tok) if sequence(tok) and length(tok)=2 and string(tok[1]) and srcpos(tok[2]) then return true else return false end if end type --A CMD_TEXT,CMD_POS,VAR_FRAME,VAR_ID tuple describing a local/nonlocal variable type VarToken(object var) if length(var)=4 and string(var[1]) and srcpos(var[2]) and integer(var[3]) and integer(var[4]) then return true end if return false end type --sequence of Tokens type TokenList(object list) if not sequence(list) then return false end if for i=1 to length(list) do if not Token(list[i]) then return false end if end for return true end type --sequence of VarTokens type VarList(object vars) if not sequence(vars) then return false end if for i=1 to length(vars) do if not VarToken(vars[i]) then return false end if end for return true end type --{Token,children} type Node(object node) if sequence(node) and length(node)=2 and Token(node[1]) and sequence(node[2]) then for i=1 to length(node[2]) do if not Node(node[2][i]) then return false end if end for return true else return false end if end type --sequence of Nodes type NodeList(object list) if not sequence(list) then return false end if for i=1 to length(list) do if not Node(list[i]) then return false end if end for return true end type --An element of all_scripts type Script(object script) if sequence(script) and length(script)>=SCRIPT_NEST_DEPTH and length(script)<=SCRIPT_BINARY then return true else return false end if end type --------------------------------------------------------------------------- --time spent waiting for a user-keypress shouldnt count function timeless_wait_key() atom skip_time integer key skip_time=time() key=wait_key() skip_time=time()-skip_time start_time+=skip_time for i=1 to length(timing_data) do if timing_data[i][TIMING_ACTIVE] then timing_data[i][TIMING_TIME]+=skip_time end if end for return(key) end function --------------------------------------------------------------------------- --Euphoria's built-in sprintf truncates all string elements to 8 bits, --so this is an Unicode-enabled replacement. Supports %d, %s, %g, %% --Beware! This will be way slower than sprintf, but luckily it's not --needed anywhere where speed matters. function sprintf_utf(string s, sequence printf_args) integer seg_start, seg_end integer arg_ctr integer code string ret if length(printf_args)=0 then return s end if ret="" arg_ctr=1 seg_start=1 seg_end=find('%',s) while seg_end do ret&=s[seg_start..seg_end-1] code=s[seg_end+1] if code='s' then ret&=printf_args[arg_ctr] arg_ctr+=1 elsif code='d' or code='g' then ret&=sprintf({'%',code},printf_args[arg_ctr]) arg_ctr+=1 elsif code='%' then ret&='%' else simple_error(sprintf("sprintf_utf: unrecognised format code %%%s\n",{s[seg_end+1]})) end if seg_start=seg_end+2 seg_end=find('%',s,seg_start) end while if seg_start<=length(s) then ret&=s[seg_start..$] end if if arg_ctr!=length(printf_args)+1 then simple_error(sprintf("sprintf_utf: recieved %d format arguments, only used %d\n",{length(printf_args),arg_ctr-1})) end if return ret end function --------------------------------------------------------------------------- --fprintf as UTF8 procedure print_utf(object fh, string s, sequence printf_args) if length(printf_args) then s=sprintf_utf(s,printf_args) end if s=toUTF(s,utf_32,utf_8) puts(fh,s) end procedure --------------------------------------------------------------------------- --prints a string with printf to stdout converting color codes procedure color_print(string s, sequence printf_args) string buffer s=sprintf_utf(s,printf_args) if simple_colorcodes then buffer=s else buffer="" for i=1 to length(s) do if s[i]>=COL_FIRST and s[i]<=COL_LAST then print_utf(stdout,buffer,{}) buffer="" if colors_enabled then text_color(HS_TO_EU_COLORS[s[i]-COL_FIRST+1]) end if else buffer&=s[i] end if end for end if if length(buffer) then print_utf(stdout,buffer,{}) end if end procedure --------------------------------------------------------------------------- procedure opt_wait_for_key() integer key if not find('k',optlist) then color_print("[Press Any Key]\n",{}) key=timeless_wait_key() end if end procedure --------------------------------------------------------------------------- procedure enter_timing_zone(string description) if not find('t',optlist) then return end if timing_depth+=1 timing_data=append(timing_data,{timing_depth,description,-time(),true}) end procedure --------------------------------------------------------------------------- procedure reenter_timing_zone(string description) if not find('t',optlist) then return end if for i=1 to length(timing_data) do if equal(timing_data[i][TIMING_DESCRIPTION],description) then timing_depth+=1 timing_data[i][TIMING_DEPTH]=timing_depth --could change... timing_data[i][TIMING_TIME]-=time() timing_data[i][TIMING_ACTIVE]=true return end if end for enter_timing_zone(description) end procedure --------------------------------------------------------------------------- procedure exit_timing_zone() if not find('t',optlist) then return end if for i=length(timing_data) to 1 by -1 do --there can only be one active timing job at each depth at a time if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then timing_data[i][TIMING_TIME]+=time() timing_data[i][TIMING_ACTIVE]=false exit end if end for timing_depth-=1 end procedure --------------------------------------------------------------------------- procedure print_timing_data() string indent sequence data string tmp1, tmp2 if run_time=0 then --so that all percentages show as 0% run_time=1e100 end if for i=1 to length(timing_data) do data=timing_data[i] indent=repeat(' ',data[TIMING_DEPTH]*4) tmp1=sprintf("%.2f",{data[TIMING_TIME]}) tmp2=sprintf("%5.2g",{100*data[TIMING_TIME]/run_time}) color_print("%s "&COLBWHI&"%s"&COLWHI&"s %s%% %s\n",{indent,tmp1,tmp2,data[TIMING_DESCRIPTION]}) end for end procedure --------------------------------------------------------------------------- function html_char_convert(string s) string buffer string result result="" buffer="" for i=1 to length(s) do if s[i]=' ' and i>1 then if s[i-1]=' ' then buffer&=" " else buffer&=s[i] end if elsif s[i]='<' then buffer&="<" elsif s[i]='>' then buffer&=">" else buffer&=s[i] end if end for if length(buffer) then result&=buffer end if return(result) end function --------------------------------------------------------------------------- function error_string_convert(string s) string buffer string result result="" buffer="" for i=1 to length(s) do if s[i]>=COL_FIRST and s[i]<=COL_LAST then if s[i]=COLYEL then buffer&="" elsif s[i]=COLRED then buffer&="" elsif s[i]=COLPNK then buffer&="" elsif s[i]=COLWHI then buffer&="" elsif s[i]=COLBWHI then buffer&="" end if elsif s[i]='\n' then buffer&="
\n" else buffer&=s[i] end if end for if length(buffer) then result&=buffer end if return(result) end function --------------------------------------------------------------------------- --prints a long string wrapped at 80 columns procedure wrap_print(string s, sequence arguments) string outstring s=sprintf_utf(s,arguments) while length(s) do outstring=before_wrap_point(s) s=after_wrap_point(s) color_print("%s\n",{outstring}) end while end procedure --------------------------------------------------------------------------- procedure error_file_print(string s) integer fh if error_file then fh=open(compiler_dir&"hs_error.htm","a") if fh!=failure then print_utf(fh,error_string_convert(s)&"\n",{}) close(fh) end if end if end procedure --------------------------------------------------------------------------- --prints out warning message in red with word wrap procedure simple_warn(string s) sequence pos if not find('w',optlist) then --do not warn if -w is set pos=get_position() if pos[2]>1 then print_utf(stdout,"\n",{}) end if wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s}) error_file_print(sprintf_utf("WARNING: %s",{html_char_convert(s)})) was_warnings = true end if end procedure --------------------------------------------------------------------------- --prints out an error message in red with word wrap, then aborts procedure simple_error(string s) sequence pos pos=get_position() if pos[2]>1 then print_utf(stdout,"\n",{}) end if wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s}) error_file_print(sprintf_utf("ERROR: %s",{html_char_convert(s)})) if end_anchor_kludge then error_file_print("\n
\n") end if opt_wait_for_key() abort(1) end procedure --------------------------------------------------------------------------- --prints out usage info if not enough arguments procedure check_arg_count(sequence args) if length(args)=2 then wrap_print("HamsterSpeak semicompiler v%s%s (C)%s James Paige&Hamster Republic Productions\n",{COMPILER_VERSION,COMPILER_SUB_VERSION,COPYRIGHT_DATE}) wrap_print("Please read LICENSE.txt for GPL License details and disclaimer of liability",{}) wrap_print(COLYEL&"%s [-abcdfknstwuyz] [long-options] source.hss [dest.hs]"&COLWHI&"\n\n",{hs_upper(file_only(args[2]))}) color_print(" -f fast mode. Disables some optimization\n",{}) color_print(" -k do not wait for a keypress when finished\n",{}) color_print(" -n don't add debug info, or a copy of the source code, to .hs file\n",{}) color_print(" -w suppress minor warnings\n",{}) color_print(" -u show warnings for unused variables\n",{}) color_print(" -y overwrite the destination file without asking\n",{}) color_print(" --incdir \n" & " also search this directory for include files\n",{}) color_print(" --include \n" & " include an additional file\n",{}) color_print("\nAdvanced options (not intended for normal use):\n",{}) color_print(" -b don't include plotscr.hsd automatically\n",{}) color_print(" -c colors will be disabled\n",{}) color_print(" -d dump debug report to hs_debug.txt\n",{}) color_print(" -s print the name of each plotscript as it is compiled\n",{}) color_print(" -a same as -s, but printing all scripts\n",{}) color_print(" -t print detailed timing info (for HSpeak developers)\n",{}) color_print(" -z write error messages to hs_error.htm\n",{}) --Undocumented arguments: -- --unicode-cols -- used by Hamster Whisper for piped output -- --reuse-ids scripts.bin -- Read the provided scripts.bin file from a previous compilation, and reuse script ID mapping wrap_print("\nFor more info about HamsterSpeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{}) wrap_print("\nThis is a command-line program. You should either run it from the command-line, or drag and drop your script file onto it. You don't ever need to run HSpeak yourself; Custom will do that for you.\n",{}) opt_wait_for_key() abort(0) end if end procedure --------------------------------------------------------------------------- --Build the lexer tables, which for each code point 0<=i<256, contains the type --of that character in entry i+1 (have to handle NUL) --Other characters above code 255 are always LEX_CHAR procedure init_lexer() sequence table_insert --main table table_insert={" (),\n#\t\"[]{}.", {LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_NEWLINE,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN}} --by default each character is either binary, or an allowed identifier --character. This is for backwards compatibility --NOTE the +1! lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,127-32) & repeat(LEX_BINARY,#A1-127) & repeat(LEX_CHAR,256-#A1) for i=1 to length(table_insert[1]) do lexer_table[table_insert[1][i]+1]=table_insert[2][i] end for for i=0 to 9 do lexer_table['0'+i+1]=LEX_DIGIT end for for i=0 to 5 do lexer_table['a'+i+1]=LEX_HEXDIGIT lexer_table['A'+i+1]=LEX_HEXDIGIT end for --add the one-character prefix of each operator for i=1 to length(separator_list) do lexer_table[separator_list[i][1]+1]=LEX_OPERATOR end for --keyword suffix parsing table lexer_table_key2=repeat(LEX_CHAR,256) lexer_table_key2[' '+1]=LEX_SPACE lexer_table_key2['\t'+1]=LEX_SPACE --2nd character of each operator for i=1 to length(separator_list) do if length(separator_list[i])=2 then lexer_table_key2[separator_list[i][2]+1]=LEX_OPERATOR2 end if end for --when floating point is added will want a table for number parsing end procedure --------------------------------------------------------------------------- --initializes global variables, and generally gets things ready to roll procedure init() sequence args integer index integer key integer fh --Process arguments args=command_line() compiler_dir=path_only(args[2]) check_arg_count(args) optlist={} index=3 while index<=length(args) do --These are undocumented if equal(args[index],"--unicode-cols") then simple_colorcodes=true args=delete_element(args,index) elsif equal(args[index],"--reuse-ids") and index+1<=length(args) then color_print("Reusing script ID numbers from previous compile\n",{}) reuse_ids=read_scripts_dot_bin(args[index+1]) args=delete_slice(args,index,index+1) elsif equal(args[index],"--include") and index+1<=length(args) then additional_includes&={args[index+1]} args=delete_slice(args,index,index+1) elsif equal(args[index],"--incdir") and index+1<=length(args) then include_dir=args[index+1] args=delete_slice(args,index,index+1) elsif args[index][1]='-' then optlist=optlist&hs_lower(args[index][2..length(args[index])]) args=delete_element(args,index) else index+=1 end if end while check_arg_count(args) source_file=normalize_filename(args[3]) if length(args)>3 then dest_file=normalize_filename(args[4]) else dest_file=normalize_filename(alter_extension(source_file,"hs")) end if if find('f',optlist) then fast_mode=true color_print("Using fast mode. Some size optimization disabled\n",{}) end if if find('c',optlist) then colors_enabled=false end if --'x' used by old versions of Hamster Whisper to specify color code format --However this clashes with Latin-1 characters and doesn't work with UTF-8 anyway --Replaced by --unicode-cols --the semi-undocumented command line argument -z writes a file called --hs_error.htm formatted for HssEd to read if find('z',optlist) then error_file=true if file_exists(compiler_dir&"hs_error.htm") then fh=open(compiler_dir&"hs_error.htm","w") if fh!=failure then puts(fh,"") close(fh) end if end if end if wrap_print("HamsterSpeak semicompiler v%s%s\n",{COMPILER_VERSION,COMPILER_SUB_VERSION}) wrap_print("Semicompiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file}) if file_exists(dest_file) then if find('y',optlist) then --found the -y command line arg, overwrite automatically key='y' else while true do --prompt the user to overwrite wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file}) key=timeless_wait_key() color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key}) if hs_lower(key)='y' then exit elsif hs_lower(key)='n' then simple_error("output file overwrite cancelled by user") else wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)}) end if end while end if end if --Init global data tables --why the alpha-tree? because the reserved-word list can get HUGE. --we want to be able to look up words in it quickly. A btree or some such --thing would have been even better, but thats alot of trouble :) reserved=alpha_tree_mass_insert(reserved,{ {"defineconstant",RESERVE_TOPLEVEL} ,{"definetrigger" ,RESERVE_TOPLEVEL} ,{"defineoperator",RESERVE_TOPLEVEL} ,{"globalvariable",RESERVE_TOPLEVEL} ,{"definefunction",RESERVE_TOPLEVEL} ,{"definescript" ,RESERVE_TOPLEVEL} ,{"plotscrversion",RESERVE_TOPLEVEL} ,{"include" ,RESERVE_TOPLEVEL} --should never appear, in theory ,{"do" ,RESERVE_FLOW} ,{"begin" ,RESERVE_BEGIN} ,{"end" ,RESERVE_END} ,{"return" ,RESERVE_FLOW} ,{"if" ,RESERVE_FLOW} ,{"then" ,RESERVE_FLOW} ,{"else" ,RESERVE_FLOW} ,{"elseif" ,RESERVE_FLOW} --not in flow_list... shouldn't that be a problem? ,{"for" ,RESERVE_FLOW} ,{"cfor" ,RESERVE_UNIMPLEMENTED} ,{"foreach" ,RESERVE_UNIMPLEMENTED} ,{"while" ,RESERVE_FLOW} ,{"break" ,RESERVE_FLOW} ,{"continue" ,RESERVE_FLOW} ,{"exitscript" ,RESERVE_FLOW} ,{"exitreturning" ,RESERVE_FLOW} ,{"switch" ,RESERVE_FLOW} ,{"case" ,RESERVE_FLOW} ,{"subscript" ,RESERVE_KEYWORD} --keywords have special case handling ,{"variable" ,RESERVE_KEYWORD} ,{"=" ,RESERVE_UNIMPLEMENTED} ,{"tracevalue" ,RESERVE_MACRO} ,{"assert" ,RESERVE_MACRO} -- Stray "$"'s are picked up by the lexer (not totally ideal maybe) })--end mass_insert for i=1 to length(math_list) do reserved=alpha_tree_insert(reserved,math_list[i][PAIR_NAME],RESERVE_BUILTIN) end for -- Adding the separators (operators) here prevents them from being used before -- defineoperator is handled (either a block before include,plotscr.hsd, or in -- e.g. defineconstant) for i=1 to length(separator_list) do if alpha_tree_data(reserved,separator_list[i],false)=false then reserved=alpha_tree_insert(reserved,separator_list[i],RESERVE_OPERATOR) end if end for --Append subscript trigger id (this is needed for scripts.bin) trigger_list=append(trigger_list,{-1,"subscript"}) --Lexer finite state transition tables init_lexer() end procedure --------------------------------------------------------------------------- --returns {filenumber,file position} pair function decode_srcpos_file(srcpos pos) integer val val=pos --0 is an invalid srcpos if val=0 then simple_error("compiler bug: can't decode uninitialised srcpos") end if val-=1 for i=1 to length(file_list) do --an offset of 0 is reserved, but currently unused if val=0 then simple_error("compiler bug: 0 srcpos file offset") end if if val<=length(file_list[i][FILE_TEXT]) then return {i,val} end if val-=length(file_list[i][FILE_TEXT])+1 end for simple_error(sprintf_utf("compiler bug: can't decode invalid srcpos %d",{pos})) end function --------------------------------------------------------------------------- function srcpos_file_number(srcpos pos) sequence temp temp=decode_srcpos_file(pos) return temp[1] end function --------------------------------------------------------------------------- --Returns 1-based character number in file function srcpos_point(srcpos pos) sequence temp temp=decode_srcpos_file(pos) return temp[2] end function --------------------------------------------------------------------------- --Encode offset (1-based characters from start of file) in a file as a srcpos function encode_srcpos(integer filenum, integer offset) srcpos pos pos=1 for i=1 to filenum-1 do pos+=length(file_list[i][FILE_TEXT])+1 end for return pos+offset end function --------------------------------------------------------------------------- --possibly slow: prehaps avoid heavy use --returns {file name, line number, column number, line text}: index with POS_* --column number returned is 0-based function decode_srcpos(srcpos pos) integer point, filenum integer lineno string filetext sequence lines sequence temp temp=decode_srcpos_file(pos) filenum=temp[1] point=temp[2] filetext=file_list[filenum][FILE_TEXT] lines=file_list[filenum][FILE_LINE_EXTENTS] lineno=length(lines) for i=1 to length(lines) do if point<=lines[i][PAIR_END] then lineno=i exit end if end for return({file_list[filenum][FILE_NAME],lineno,point-lines[lineno][PAIR_START],filetext[lines[lineno][PAIR_START]..lines[lineno][PAIR_END]]}) end function --------------------------------------------------------------------------- function form_error_text(string s, srcpos pos) string line sequence src_position string column_display integer tab_compensate src_position=decode_srcpos(pos) --?pos --pretty_print(1,src_position,{2}) line=substring_replace(src_position[POS_TEXT],"\t"," ") tab_compensate=3*count('\t',src_position[POS_TEXT][1..src_position[POS_COLUMN]]) column_display=repeat(' ',src_position[POS_COLUMN]+tab_compensate) error_file_print(sprintf_utf("\n",{src_position[POS_FILENAME],src_position[POS_LINE]})) if length(current_script) then return( sprintf_utf( "in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n" &COLBWHI&"%s\n" &"%s^\n" &COLRED&"%s\n" ,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s} ) ) else return( sprintf_utf( "in line %d of "&COLPNK&"%s"&COLRED&"\n" &COLBWHI&"%s\n" &"%s^\n" &COLRED&"%s\n" ,{src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s} ) ) end if --note that the calling procedure must send/deal with closing tags to error_file_print end function --------------------------------------------------------------------------- procedure src_warn(string s, srcpos pos) if not find('w',optlist) then --do not warn if -w is set simple_warn(form_error_text(s,pos)) error_file_print("\n
\n") end if end procedure --------------------------------------------------------------------------- procedure src_error(string s, srcpos pos) end_anchor_kludge=true simple_error(form_error_text(s,pos)) end procedure --------------------------------------------------------------------------- --Find newlines in a file (possibly mixed type), returns line extents function split_lines(string file_text) integer line_start=1 integer last_13=-1 sequence line_extents = {} -- {start of line, end of line} pairs, both inclusive for i=1 to length(file_text) do if file_text[i]=13 then line_extents=append(line_extents, {line_start,i-1}) last_13=i line_start=i+1 elsif file_text[i]=10 then if last_13!=i-1 then -- handle {13,10} "\r\n" line_extents=append(line_extents, {line_start,i-1}) end if line_start=i+1 end if end for if line_start<=length(file_text) then line_extents=append(line_extents, {line_start,length(file_text)}) end if return line_extents end function --------------------------------------------------------------------------- --Read a file, adding to file_list. --pos is used only for location of include statement, may be 0. procedure load_source(string filename, string reading_how, srcpos pos) integer fh string file_text sequence line_extents -- {start of line, end of line} pairs fh=open(filename,"r") if fh!=failure then wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename}) line_extents={} reenter_timing_zone("read_file") --Autodetect the encoding file_text=read_file(fh, TEXT_MODE, UTF) exit_timing_zone() close(fh) line_extents=split_lines(file_text) total_lines+=length(line_extents) file_list=append(file_list,{filename,file_text,line_extents}) max_srcpos+=length(file_text)+1 else if pos then src_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos) else simple_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename})) end if end if end procedure --------------------------------------------------------------------------- --Load and lex, recursively finding includes procedure load_and_lex_source(string filename, string reading_how, srcpos pos, sequence include_stack) integer include_num if find(filename,include_stack) then src_error(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included recursively",{filename}),pos) end if include_stack&={filename} include_num=find(filename,column(file_list,FILE_NAME)) if include_num then --Silently ignore this include. Double includes are allowed because of automatic inclusion. --src_warn(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included multiple times",{include_name}),pos) return end if --Reads one additional file reenter_timing_zone("load_and_lex_source/reading") load_source(filename,reading_how,pos) exit_timing_zone() --Lex that file, and load and lex all included files lex_file(length(file_list), include_stack) end procedure --------------------------------------------------------------------------- --Search for an include file function find_include_file(string include_name) --try source directory if file_exists(path_only(source_file)&include_name) then return normalize_filename(path_only(source_file)&include_name) end if if file_exists(include_dir&include_name) then return include_dir&include_name end if --try current directory if file_exists(include_name) then return include_name end if if file_exists(compiler_dir&include_name) then return normalize_filename(compiler_dir&include_name) end if --in case we're installed at $prefix/games, try $prefix/share/games/ohrrpgce/ if file_exists(compiler_dir&"../share/games/ohrrpgce/"&include_name) then return normalize_filename(compiler_dir&"../share/games/ohrrpgce/"&include_name) end if --give up return include_name end function --------------------------------------------------------------------------- procedure show_source_info() if total_lines then wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)}) else simple_error("no data to compile\n") end if end procedure --------------------------------------------------------------------------- procedure lexer_binary_error(srcpos pos) --better use src_error, to track down garbage if accidentally creeping in src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos) end procedure --------------------------------------------------------------------------- --reads and processes the escape codes in a string and returns it enclosed in quote marks function lexer_read_string(string s, integer i, integer stop, srcpos pos) string string integer escaping integer at1, at2 escaping=false string="\"" while true do if i>stop then --did not find a closing " so not a valid string src_error("Expected \" at end of line to end string (multi-line strings are not supported)",pos+stop) end if if s[i]<256 and lexer_table[s[i]+1]=LEX_BINARY then lexer_binary_error(pos+i) end if if escaping then if s[i]='"' or s[i]='\\' or s[i]='\'' then --including ' for closer compatibility with Python string&=s[i] elsif s[i]='n' then string&=10 elsif s[i]='t' then string&=9 elsif s[i]='x' then --escape 8 bit values (later will add \u and \U for 16 bit and 32 bit escapes) at1=0 at2=0 if i+2<=stop then at1=find(s[i+1],hex_chars) at2=find(s[i+2],hex_chars) end if if at1=0 or at2=0 then src_error("Expected two hexidecimal characters to follow \\x escape code",pos+i) end if string&={16*hex_char_values[at1]+hex_char_values[at2]} i+=2 else --invalid sequence string&='\\' string&=s[i] end if escaping=false else if s[i]='"' then return({i,string&'"'}) elsif s[i]='\\' then escaping=true else string&=s[i] end if end if i+=1 end while end function --------------------------------------------------------------------------- --Lex a signed or unsigned integer literal. --Returns {newi, text} with text is the string representation of an integer. function lexer_read_number(string s, integer i, integer stop, srcpos pos) integer state atom val integer sign,base string text integer starti starti=i --Process negative prefix, because it isn't treated as a unary operator if s[i]='-' then text="-" i+=1 sign=-1 else text="" sign=1 end if --Skip whitespace while i<=stop and (s[i]=' ' or s[i]='\t') do i+=1 end while --Check for base prefix 0x 0o or 0b. No separating space allowed! base=10 if i+2<=stop and s[i]='0' then if s[i+1]='x' or s[i+1]='X' then base=16 i+=2 elsif s[i+1]='o' or s[i+1]='O' then base=8 i+=2 elsif s[i+1]='b' or s[i+1]='B' then base=2 i+=2 end if end if val=0 while i<=stop do if s[i]<256 then state=lexer_table[s[i]+1] else state=LEX_CHAR end if --color_print("lexing number i=%d %d=%s state %d\n",{i,s[i],{s[i]},state}) if state=LEX_DIGIT then --0-9 integer digit digit=s[i]-'0' if digit>=base then src_error(sprintf_utf("Found digit "&COLYEL&"%s"&COLRED&" which is too big, while parsing a base-%d number",{s[i],base}),pos+i) end if text&=s[i] val=val*base+digit elsif base=16 and state=LEX_HEXDIGIT then --a-fA-F text&=s[i] val=val*16+(toLower(s[i])-'a'+10) elsif state=LEX_SPACE then --yes, allowed in numbers too elsif state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_OPERATOR or state=LEX_COMMENT or state=LEX_BINARY or state=LEX_NEWLINE then --backtrack one character --known bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead i-=1 exit else --currently, state=LEX_CHAR or state=LEX_HEXDIGIT or state=LEX_WARN or state=LEX_STRING if s[i]='.' then src_error("Floating point values are not yet implemented",pos+i) end if src_error( sprintf_utf("Found unexpected character "&COLYEL&"%s"&COLRED&" while reading a number, "&COLYEL&"%s"&COLRED&". Names such as variables can't start with a digit. Or maybe you forgot a comma or operator?" ,{s[i],text}) ,pos+i ) end if i+=1 end while if base!=10 then --Special behaviour for non-decimals: silently overflow large 32 bit unsigned values to negatives if val>=#80000000 and val<#100000000 then val=val-#100000000 end if --Convert to text text=sprintf("%d",{sign*val}) end if if not int32(sign*val) then src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32 bit signed integer. Integers must be between -2147483648 and 2147483647",{text}),pos+starti) end if return({i,text}) end function --------------------------------------------------------------------------- --Clean the rest of a line for seek_include after 'include' occurs --s is the whole line, i is end of the 'include', pos is the srcpos at start of line function lexer_read_include_line(TokenList tokens, string s, integer i, integer stop, srcpos pos) integer at sequence temp string filename_string if not equal(tokens[1][CMD_TEXT],"include") then src_error(COLYEL&"include"&COLRED&" must be the only statement on the line. Maybe you tried to use this reserved keyword illegally?",tokens[$][CMD_POS]) end if filename_string="" i+=1 s=s[i..stop] if length(s) then --Need to increment i while stripping while s[1]=' ' or s[1]='\t' do s=s[2..length(s)] i+=1 end while --A quote mark in the middle of a filename doesn't need any escaping if s[1]='"' then temp=lexer_read_string(s,2,length(s),pos+i-1) s=s[temp[1]+1..$] i+=temp[1] --1 past the closing " filename_string=temp[2] --enclosed in quotes end if at=find('#',s) if at then s=s[1..at-1] end if --Trim either whitespace after "filename", or after non-quoted filename s=trim_whitespace(s) if length(filename_string) then if length(s) then src_error("Garbage after the end of the filename in this "&COLYEL&"include"&COLRED&" statement", pos+i) end if s=filename_string end if tokens=append(tokens,{s,pos+i-1}) end if if length(tokens)=1 then src_error("Expected name of file to follow "&COLYEL&"include"&COLRED,tokens[1][CMD_POS]) end if if length(tokens)>2 then src_error(sprintf_utf("Found unexpected syntax "&COLYEL&"%s"&COLRED&" instead of filename to include. Syntax of an include statement is "&COLYEL&"include, filename"&COLRED&" possibly with a path",{tokens[2][CMD_TEXT]}),tokens[2][CMD_POS]) end if return(tokens) end function --------------------------------------------------------------------------- -- s is the text for the whole file, the interval i to stop is a single line, -- and pos is the srcpos for the beginning of the file function lex_line(sequence s, integer i, integer stop, srcpos pos) integer state TokenList tokens string ident --identifier text being built up sequence temp string masked integer oldi integer found integer textstart --the column at which this identifier starts, otherwise 0 string remem_ident integer remem_textstart integer nonnumeral --whether we are definitely inside an identifier, ie. a digit doesn't indicate a number integer backtracked integer last_state --A LEX_* constant; the last state that was not LEX_SPACE integer last_state_i --Value at i where last_state was set ident="" nonnumeral=false last_state=LEX_NEWLINE last_state_i=0 textstart=0 tokens={} while i<=stop do if s[i]<256 then state=lexer_table[s[i]+1] else state=LEX_CHAR end if --color_print("lexing i=%d %d=%s state %d nonnum=%d ident=%s\n",{i,s[i],{s[i]},state,nonnumeral,ident}) if state=LEX_CHAR or state=LEX_HEXDIGIT then ident=ident & toLower(s[i]) if textstart=0 then textstart=i end if nonnumeral=true elsif state=LEX_SPACE then elsif state=LEX_DIGIT then if nonnumeral then ident=ident & s[i] else if length(ident) then --either ident is "-" or it is "" i=textstart ident="" textstart=0 end if temp=lexer_read_number(s,i,stop,pos) tokens=append(tokens,{temp[2],pos+i}) i=temp[1] --nonnumeral remains false end if elsif state=LEX_WARN then ident=ident & toLower(s[i]) if textstart=0 then textstart=i end if nonnumeral=true src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is reserved and not allowed in names (identifiers)!", {s[i]}), pos+i) else --We've reached the end of a token... unless it's only the first character --of a two-character operator. remem_ident=ident --for backtracking remem_textstart=textstart backtracked=false if length(ident) then tokens=append(tokens,{ident,pos+textstart}) end if ident="" nonnumeral=false textstart=0 if state=LEX_BEGIN then tokens=append(tokens,{"begin",pos+i}) elsif state=LEX_END then if last_state=LEX_COMMA then --foo(1,). This probably doesn't particularly need to be disallowed --Could also check for ) but that will be caught later end if tokens=append(tokens,{"end",pos+i}) elsif state=LEX_COMMA then if last_state=LEX_COMMA or last_state=LEX_BEGIN or last_state=LEX_OPERATOR or last_state=LEX_OPERATOR2 then --foo(, 1) or foo(1, , 2) or , src_error( sprintf_utf("Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",{}), pos+i) end if elsif state=LEX_NEWLINE then elsif state=LEX_COMMENT then exit elsif state=LEX_STRING then temp=lexer_read_string(s,i+1,stop,pos) tokens=append(tokens,{temp[2],pos+i}) i=temp[1] elsif state=LEX_OPERATOR then --Read the next non-space character. If it's LEX_OPERATOR2 we greedily --add it, otherwise we do nothing (we only support length 1 and 2 --separators). However, we might have have been too greedy (not a value --two-char separator), and need to put it back. textstart=i masked=s[i..i] integer lookahead_state --This while loop is in order to allow whitespace in the middle of separators. while i length(s) found=false for j=1 to length(separator_list) do if match(separator_list[j],masked)=1 then tokens=append(tokens,{separator_list[j],pos+textstart}) if length(separator_list[j])=1 then i=textstart end if found=true exit end if end for if found=false then --need to backtrack, in particular for - --color_print("backtracking on '%s'\n", {masked}) backtracked=true ident=remem_ident & masked[1] oldi=i+1 i=textstart if length(remem_ident) then tokens=tokens[1..$-1] textstart=remem_textstart nonnumeral=true elsif equal(ident,"-") then --nonnumeral remains false: we've seen exactly "-" else --a keyword character followed by garbage src_error( sprintf_utf("Expected a two-character operator (like "&COLYEL&":="&COLRED&") but the second character (e.g. "&COLYEL&"="&COLRED&") is missing",{}), pos+i) end if else textstart=0 if last_state=LEX_COMMA then -- , --(Could check for ( but that will be detected later anyway.) --Allowed if it's a unary operator. There's only one, since - is not an operator. if s[i]!='$' then src_error( sprintf_utf("Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",{}), pos+last_state_i) end if end if end if elsif state=LEX_BINARY then lexer_binary_error(pos+i) end if if backtracked=false then if equal(remem_ident,"include") then --include is a special case, as normal lexing rules don't apply for rest of line if length(ident) then simple_error("compiler bug!\n") end if return lexer_read_include_line(tokens,s,i,stop,pos) end if end if --End of a complete token end if if state!=LEX_SPACE then last_state_i=i last_state=state end if i+=1 end while if length(ident) then --ought to check whether ident is 'include'... tokens=append(tokens,{ident,pos+textstart}) end if return(tokens) end function --------------------------------------------------------------------------- --this also checks for rogue strings and $'s function translate_plotstrings(TokenList tokens) integer state integer start_token integer i Token string_func Token string_token srcpos pos state=0 i=1 while i<=length(tokens) do if compare(tokens[i][CMD_TEXT],"$")=0 then pos=tokens[i][CMD_POS] if state=0 then state=1 start_token=i else --jump to error throw state=1 exit end if end if if state=2 then if tokens[i][CMD_TEXT][1]='"' then string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1]) string_token={sprintf("@$string%d",{length(string_list)}),pos} tokens=tokens[1..start_token-1]&{string_func,{"begin",pos}}&tokens[start_token+1..i-2]&{string_token,{"end",pos}}&tokens[i+1..$] state=0 --may have multiple strings on one line else --we saw a + (or illegal =) inside the string number expression state=1 end if end if if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then string_func={"setstringfromtable",pos} state=2 elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then string_func={"appendstringfromtable",pos} state=2 end if i+=1 end while if state!=0 then --we've seen a surplus $, not right src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos) end if return(tokens) end function --------------------------------------------------------------------------- --Given a command tree, get the original text from the file --OK, this function is horribly complicated just so tracevalue can print --"hero X (me)" instead of "herox(me)". It could be implemented much simpler if --macros were expanded in the lexer, but then build_ast and --convert_macros would have to be as well function tree_original_text(Node tree) TokenList script_toks Node subtree srcpos leftmost, rightmost integer left_at, right_at integer filenum integer left_point, right_point integer depth TokenList tokens string text script_toks=all_scripts[cur_script_num][SCRIPT_BODY_TOKS] --find leftmost and rightmost tokens making up the expression, not including brackets leftmost=tree[TREE_TRUNK][CMD_POS] subtree=tree while length(subtree[TREE_BRANCHES]) do subtree=subtree[TREE_BRANCHES][1] if subtree[TREE_TRUNK][CMD_POS]32767 then src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos) n=32767 elsif n<-32768 then src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos) n=-32768 end if return(n) end function --------------------------------------------------------------------------- function try_string_to_number(Token s) atom result result=floor(string_to_object(s[CMD_TEXT],0)) if not string_is_int32(s[CMD_TEXT]) then src_error(sprintf_utf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS]) end if return(result) end function --------------------------------------------------------------------------- function enforce_constants(string s) --enforces both constants object v v=alpha_tree_data(constant_list,s,{}) if length(v) then v=v[CONST_VALUE] if int32(v) then return(sprintf("%d",{v})) end if end if return(s) end function --------------------------------------------------------------------------- function get_cmd() Token result if get_cmd_pointer>length(cmd) then src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS]) end if result=cmd[get_cmd_pointer] get_cmd_pointer+=1 result[CMD_TEXT]=enforce_constants(result[CMD_TEXT]) return(result) end function --------------------------------------------------------------------------- function get_cmd_no_constants() Token result if get_cmd_pointer>length(cmd) then src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS]) end if result=cmd[get_cmd_pointer] get_cmd_pointer+=1 result[CMD_TEXT]=result[CMD_TEXT] return(result) end function --------------------------------------------------------------------------- function get_cmd_block(integer convert_constants) Token this TokenList result result={} this=get_cmd() if compare("begin",this[CMD_TEXT])!=0 then src_error(sprintf_utf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS]) end if while true do if convert_constants then this=get_cmd() else this=get_cmd_no_constants() end if if compare("begin",this[CMD_TEXT])=0 then src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS]) elsif compare("end",this[CMD_TEXT])=0 then exit--break out of the while else result=append(result,this) end if end while return(result) end function --------------------------------------------------------------------------- procedure parse_constant_block(TokenList block) atom num string name for i=1 to length(block) by 2 do if i+1>length(block) then src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS]) end if num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]}) check_undefined_constant(block[i+1],num) name=block[i+1][CMD_TEXT] constant_list=alpha_tree_insert(constant_list,name,{num,block[i+1][CMD_POS]}) reserved=alpha_tree_insert(reserved,name,RESERVE_CONSTANT) end for end procedure --------------------------------------------------------------------------- procedure parse_trigger_block(TokenList block) integer num string name for i=1 to length(block) by 2 do if i+1>length(block) then src_error("Expected name but script trigger definition block ended",block[i][CMD_POS]) end if num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS]) check_undefined_string(block[i+1],"script trigger name") name=block[i+1][CMD_TEXT] trigger_list=append(trigger_list,{num,name}) reserved=alpha_tree_insert(reserved,name,RESERVE_TOPLEVEL) end for end procedure --------------------------------------------------------------------------- procedure parse_version_block(TokenList block) string req_version if length(block)<2 then --Don't have a srcpos simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (empty 'plotscr version' block)") end if plotscr_version=block[1][CMD_TEXT] --version ID of this plotscr.hsd version (equal to the version of HSpeak at the time) req_version=block[2][CMD_TEXT] --the minimum version of HSpeak which supports this plotscr.hsd --trim quote marks if plotscr_version[1]!='"' or req_version[1]!='"' then simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (expected strings in 'plotscr version')") end if plotscr_version=plotscr_version[2..$-1] req_version=req_version[2..$-1] if compare(plotscr_version, MIN_PLOTSCR_VERSION) < 0 then simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.") end if --printf(stdout,"'%s' '%s'\n", {req_version, COMPILER_VERSION&COMPILER_SUB_VERSION}) if compare(req_version, COMPILER_VERSION&COMPILER_SUB_VERSION) > 0 then simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from a newer version of the OHRRPGCE than supported by this version of HSpeak. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.") end if end procedure --------------------------------------------------------------------------- procedure create_global(atom id, string name, srcpos pos) integer at at=find(id,global_list[PAIR_NUM]) if at then src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos) else if id>=0 and id<=MAXGLOBAL then global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id) global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name) global_list[GLB_POS]=append(global_list[GLB_POS],pos) reserved=alpha_tree_insert(reserved,name,RESERVE_GLOBAL) else src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to %d",{id,MAXGLOBAL}),pos) end if end if end procedure --------------------------------------------------------------------------- procedure parse_global_block(TokenList block) atom num for i=1 to length(block) by 2 do if i+1>length(block) then src_error("expected name but globalvariable block ended",block[i][CMD_POS]) end if num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]}) check_undefined_string(block[i+1],"global variable name") create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS]) end for end procedure --------------------------------------------------------------------------- procedure parse_operator_block(TokenList block) atom num integer keyword string name,truename srcpos pos for i=1 to length(block) by 3 do if i+2>length(block) then src_error("expected name but defineoperator block ended",block[i][CMD_POS]) end if num=try_string_to_number(block[i]) mustnt_be_a_number(block[i+1]) mustnt_be_a_number(block[i+2]) name=block[i+1][CMD_TEXT] truename=block[i+2][CMD_TEXT] pos=block[i+2][CMD_POS] operator_list=append(operator_list,{num,name,truename,pos}) keyword=alpha_tree_data(reserved,name,false) if keyword then if keyword!=RESERVE_OPERATOR and keyword!=RESERVE_BUILTIN then src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" may not be used as an operator",{RESERVE_NAMES[keyword],name}),pos) end if else reserved=alpha_tree_insert(reserved,name,RESERVE_OPERATOR) end if end for end procedure --------------------------------------------------------------------------- --arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to --VAR_ARGS indicating a variable (unlimited) number of arguments. function create_function(sequence list, integer id, string name, object arglist, integer func_type, srcpos pos) integer at sequence encoded if func_type=RESERVE_SCRIPT then at=find(id,column(list,PAIR_NUM)) if at then src_error(sprintf_utf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{RESERVE_NAMES[func_type],id,list[at][PAIR_NAME]}),pos) end if if id=0 or id<-1 then src_error(sprintf_utf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos) elsif id=-1 then --autonumbered if length(reuse_ids) then encoded=encode_ohr(name) if length(encoded)>36 then encoded=encoded[1..36] end if at=find_in_column(encoded,reuse_ids,PAIR_NAME) if at then id=reuse_ids[at][PAIR_NUM] end if end if if id<0 then id=autonumber_id loop do autonumber_id-=1 id=autonumber_id until find_in_column(id,reuse_ids,PAIR_NUM)=0 end loop end if end if end if list=append(list,{id,name,arglist,pos}) reserved=alpha_tree_insert(reserved,name,func_type) return(list) end function --------------------------------------------------------------------------- function parse_define_block(TokenList block, sequence list, integer func_type) atom num string name atom args sequence arglist --sequence of strings srcpos name_pos integer i i=1 while i<=length(block) do num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS]) if i+1>length(block) then src_error(sprintf_utf("expected %s name but define block ended",{RESERVE_NAMES[func_type]}),block[i][CMD_POS]) else i+=1 check_undefined_string(block[i],RESERVE_NAMES[func_type]&" name") name=block[i][CMD_TEXT] name_pos=block[i][CMD_POS] if i+1>length(block) then src_error("expected argument count but define block ended",block[i][CMD_POS]) else i+=1 args=try_string_to_number(block[i]) arglist={} if args<0 then list=create_function(list,num,name,VAR_ARGS,func_type,name_pos) else for j=1 to args do if i+1>length(block) then src_error("expected argument default but define block ended",block[i][CMD_POS]) else i+=1 arglist=append(arglist,try_string_to_number(block[i])) end if end for list=create_function(list,num,name,arglist,func_type,name_pos) end if i+=1 end if end if end while return(list) end function --------------------------------------------------------------------------- procedure preliminary_parse_pass() Token this enter_timing_zone("Preliminary pass") color_print("preliminary pass\n",{}) get_cmd_pointer=1 while get_cmd_pointer<=length(cmd) do --read a top-level command this=get_cmd() if equal("defineconstant",this[CMD_TEXT]) then parse_constant_block(get_cmd_block(false)) elsif equal("definetrigger",this[CMD_TEXT]) then parse_trigger_block(get_cmd_block(false)) elsif equal("plotscrversion",this[CMD_TEXT]) then parse_version_block(get_cmd_block(false)) end if end while --Check for very old plotscr.hsd if equal(plotscr_version,"") then if find("plotscr.hsd",column(file_list,FILE_NAME)) then simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE..") end if --If plotscr.hsd is not included then we're compiling with -b, so don't complain end if exit_timing_zone() end procedure --------------------------------------------------------------------------- --Parse a script or subscript --parent_name is a fully-scoped name of parent script, or "" --if this script isn't a subscript of another. --Returns Token with name of parsed script function parse_script(Token trigger, string parent_name, integer parent_idx) Token name string full_name TokenList arglist TokenList body Token this string err_string string previous_script Token subname sequence src_position string msg integer depth integer nest_depth integer temp integer idx name=get_cmd() if parent_idx=0 then full_name=name[CMD_TEXT] nest_depth=0 else full_name=sprintf("%s<-%s",{parent_name,name[CMD_TEXT]}) nest_depth=all_scripts[parent_idx][SCRIPT_NEST_DEPTH]+1 if nest_depth>MAX_NEST_DEPTH then src_error( sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is nested too deeply (%d layers deep), which is more than is supported. Reorganise your scripts to not use so much nesting." ,{trigger[CMD_TEXT],name[CMD_TEXT],nest_depth}) ,name[CMD_POS] ) end if end if previous_script=current_script current_script=full_name --name[CMD_TEXT] --Check not already a script cur_script_num=parent_idx temp=lookup_scoped_script_name(name[CMD_TEXT]) cur_script_num=0 if temp then if parent_idx then msg="There already is a script or subscript named "&COLYEL&"%s"&COLRED&" in this context/scope, defined on line %d of "&COLPNK&"%s"&COLRED else msg="Script "&COLYEL&"%s"&COLRED&" appears more than once! It was previously defined on line %d of "&COLPNK&"%s"&COLRED end if src_position=decode_srcpos(all_scripts[temp][SCRIPT_POS]) src_error( sprintf_utf(msg,{name[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]}) ,name[CMD_POS] ) end if --Check not a reserved non-script name. --This check is independent of the above 'name already in scope' check. if not find_in_column(full_name,script_list,PAIR_NAME) then --Doesn't appear in a definescript block; if it did the name is already reserved. --Note that we won't add it to script_list yet; that happens later in check_script_declarations check_undefined_string(name,"user script name") end if --Parse arguments arglist={} while true do if get_cmd_pointer>length(cmd) then src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{trigger[CMD_TEXT],name[CMD_TEXT]}),name[CMD_POS]) end if this=get_cmd() if compare("begin",this[CMD_TEXT])=0 then exit--break the while end if --Note that arglist isn't the actual list of arguments, it's just a string of tokens right now (including defaults). --The actual arglist is built in process_arglist, while compiling each script. arglist=append(arglist,this) end while --Create all_scripts entry now for recursive calls; SCRIPT_BODY_TOKS filled in later all_scripts=append(all_scripts,{ trigger ,name[CMD_TEXT] ,name[CMD_POS] ,full_name ,arglist ,{} ,parent_idx ,nest_depth }) idx=length(all_scripts) --Every script is nested inside a big fat do() block body={{"do",this[CMD_POS]}} depth=0 while true do body=append(body,this) if compare("end",this[CMD_TEXT])=0 then depth-=1 if depth=0 then exit--break while end if elsif compare("begin",this[CMD_TEXT])=0 then depth+=1 elsif compare("subscript",this[CMD_TEXT])=0 then --Recurse, so body of subscript doesn't appear in this script. --The subscript node stays, for syntax checking, but is removed in normalize_flow_control subname=parse_script(this,full_name,idx) body&={{"begin",this[CMD_POS]},{"end",this[CMD_POS]}} else temp=alpha_tree_data(reserved,this[CMD_TEXT],RESERVE_FLOW) if temp<=RESERVE_UNIMPLEMENTED then err_string=sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",{RESERVE_NAMES[alpha_tree_data(reserved,this[CMD_TEXT],0)],this[CMD_TEXT]} ) if temp=RESERVE_TOPLEVEL then err_string&=sprintf_utf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]}) end if src_error(err_string,this[CMD_POS]) end if end if if get_cmd_pointer>length(cmd) then src_error( sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED ,{trigger[CMD_TEXT],name[CMD_TEXT]} ) ,name[CMD_POS] ) end if this=get_cmd() end while all_scripts[idx][SCRIPT_BODY_TOKS]=body current_script=previous_script return name end function --------------------------------------------------------------------------- procedure parse_top_level() Token this sequence triggers integer triggeridx enter_timing_zone("Top level pass") color_print("parsing top-level\n",{}) triggers=column(trigger_list,PAIR_NAME) get_cmd_pointer=1 while get_cmd_pointer<=length(cmd) do --read a top-level command this=get_cmd() triggeridx=find(this[CMD_TEXT],triggers) if compare("defineconstant",this[CMD_TEXT])=0 then get_cmd_block(true) elsif compare("definetrigger",this[CMD_TEXT])=0 then get_cmd_block(true) elsif compare("globalvariable",this[CMD_TEXT])=0 then parse_global_block(get_cmd_block(true)) elsif compare("defineoperator",this[CMD_TEXT])=0 then parse_operator_block(get_cmd_block(true)) elsif compare("definefunction",this[CMD_TEXT])=0 then function_list=parse_define_block(get_cmd_block(true),function_list,RESERVE_FUNCTION) elsif compare("definescript",this[CMD_TEXT])=0 then script_list=parse_define_block(get_cmd_block(true),script_list,RESERVE_SCRIPT) elsif compare("plotscrversion",this[CMD_TEXT])=0 then --ignore get_cmd_block(false) elsif triggeridx>0 then if trigger_list[triggeridx][PAIR_NUM]<0 then src_error( sprintf_utf( COLYEL&"%s"&COLRED&" can only occur inside another script. Use "&COLYEL&"script"&COLRED&" instead for regular scripts." ,{this[CMD_TEXT]} ) ,this[CMD_POS] ) else parse_script(this,{},0) end if else check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration") src_error( sprintf_utf( "Expected top-level declaration but found "&COLYEL&"%s"&COLRED ,{this[CMD_TEXT]} ) ,this[CMD_POS] ) end if end while cmd={} exit_timing_zone() end procedure --------------------------------------------------------------------------- procedure dump_script_and_function_info(integer fh, sequence list) sequence this string id_string sequence src_position for i=1 to length(list) do this=list[i] if this[PAIR_NUM]>autonumber_id then id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]}) else id_string=sprintf("ID=%d",{this[PAIR_NUM]}) end if src_position=decode_srcpos(this[FUNC_POS]) print_utf(fh,"%s %d\t%s\t%s(",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,id_string ,this[PAIR_NAME] }) if equal(this[FUNC_ARGS],VAR_ARGS) then print_utf(fh,"VARIABLE ARGS",{}) else for j=1 to length(this[FUNC_ARGS]) do if j>1 then print_utf(fh,",",{}) end if if equal(this[FUNC_ARGS][j],NO_DEFAULT) then print_utf(fh,"REQ",{}) else print_utf(fh,"%d",{this[FUNC_ARGS][j]}) end if end for end if print_utf(fh,")\n",{}) end for end procedure --------------------------------------------------------------------------- function seek_string_by_id(integer id, sequence list, string name) integer at at=find_in_column(id,list,PAIR_NUM) if at then return(list[at][PAIR_NAME]) else simple_error(sprintf_utf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id})) end if return("") end function --------------------------------------------------------------------------- --Used while dumping script binary function name_lookup(sequence pair, VarList locals) integer at integer frame,id if pair[1]=KIND_NUMBER then return(sprintf("%d",{pair[2]})) elsif pair[1]=KIND_LOCAL or pair[1]=KIND_NONLOCAL then frame=and_bits(pair[2],#FF00)/#100 id=and_bits(pair[2],#FF) for i=1 to length(locals) do if locals[i][VAR_FRAME]=frame and locals[i][VAR_ID]=id then return locals[i][CMD_TEXT] end if end for simple_error(sprintf_utf("HSpeak bug: decompiler found illegal variable ID "&COLYEL&"%d*256+%d"&COLRED,{frame,id})) elsif pair[1]=KIND_GLOBAL then at=find(pair[2],global_list[PAIR_NUM]) if at then return(global_list[PAIR_NAME][at]) else simple_error(sprintf_utf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]})) end if elsif pair[1]=KIND_FLOW then return(seek_string_by_id(pair[2],flow_list,"flow control structure")) elsif pair[1]=KIND_SCRIPT then return(seek_string_by_id(pair[2],script_list,"user script")) elsif pair[1]=KIND_FUNCTION then return(seek_string_by_id(pair[2],function_list,"hardcoded function")) elsif pair[1]=KIND_MATH then return(seek_string_by_id(pair[2],math_list,"built-in function")) else simple_error(sprintf_utf("HSpeak bug: decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]})) end if end function --------------------------------------------------------------------------- function binstring_to_int(sequence encoded) --bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles atom temp temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000 if and_bits(temp,#80000000) then return(temp-#100000000) end if return(temp) end function --------------------------------------------------------------------------- function read_word(sequence encoded) --opposite of output_word integer temp temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100 if and_bits(temp,#8000) then return(temp-#10000) end if return(temp) end function --------------------------------------------------------------------------- function dump_script_binary(sequence bin, integer offset, integer depth, VarList locals) string result sequence kind_and_id integer kind integer argcount integer new_offset result="" kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])} kind=kind_and_id[1] result&=sprintf_utf("%s%s",{ repeat(' ',depth) --indent ,name_lookup(kind_and_id,locals) }) if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11]) if argcount then result&="(\n" for i=0 to argcount-1 do new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3]) result&=dump_script_binary(bin,new_offset,depth+2,locals) end for result&=repeat(' ',depth)&")\n" else result&="()\n" end if else result&="\n" end if return(result) end function --------------------------------------------------------------------------- function dump_script_tree(NodeList tree, integer depth) string result result="" for i=1 to length(tree) do result&=sprintf_utf("%s%s",{ repeat(' ',depth)--indent ,tree[i][TREE_TRUNK][CMD_TEXT] }) if length(tree[i][TREE_BRANCHES])>0 then result&="(\n" result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2) result&=repeat(' ',depth)&")\n" else result&="\n" end if end for return(result) end function --------------------------------------------------------------------------- function dump_script_strings(sequence bin) string result integer table_start integer offset integer len result="" table_start=binstring_to_int(bin[9..12]) if table_start=0 then return("") end if bin=bin[table_start+1..$] offset=1 while offset1 then debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt") else debug_file="hs_debug.txt" end if fh=open(debug_file,"w") if fh!=failure then wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file}) ------------------------------------- print_utf(fh,"[Scripts]\n",{}) dump_script_and_function_info(fh,script_list) print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Global Variables]\n",{}) for i=1 to length(global_list[PAIR_NUM]) do src_position=decode_srcpos(global_list[GLB_POS][i]) print_utf(fh,"%s %d\tID=%d\t%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,global_list[PAIR_NUM][i] ,global_list[PAIR_NAME][i] }) end for print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Builtin Functions]\n",{}) dump_script_and_function_info(fh,function_list) print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Operators]\n",{}) for i=1 to length(operator_list) do src_position=decode_srcpos(operator_list[i][OPER_POS]) print_utf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,operator_list[i][PAIR_NAME] ,operator_list[i][OPER_TRUENAME] ,operator_list[i][PAIR_NUM] }) end for print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Script Dumps]\n",{}) for i=1 to length(all_scripts) do script=all_scripts[i] src_position=decode_srcpos(script[SCRIPT_POS]) print_utf(fh,"%s %d\tID=%d\t%s (%s)\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,script[SCRIPT_ID] ,script[SCRIPT_NAME] ,script[SCRIPT_FULL_NAME] }) idx=script[SCRIPT_PARENT_IDX] if idx then src_position=decode_srcpos(script[SCRIPT_POS]) print_utf(fh,"%s %d\tparent=ID %d\t%s (access to %d nonlocals)\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,all_scripts[idx][SCRIPT_ID] ,all_scripts[idx][SCRIPT_NAME] ,script[SCRIPT_NUM_NONLOCALS] }) end if src_position=decode_srcpos(script[SCRIPT_TRIGGER_TOK][CMD_POS]) print_utf(fh,"%s %d\tTrigger=%d\t%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,script[SCRIPT_TRIGGER_ID] ,script[SCRIPT_TRIGGER_TOK][CMD_TEXT] }) locals=script[SCRIPT_VARIABLES] for j=1 to length(locals) do src_position=decode_srcpos(locals[j][CMD_POS]) print_utf(fh,"%s %d\tvar(frame %d id %d)=%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,locals[j][VAR_FRAME] ,locals[j][VAR_ID] ,locals[j][CMD_TEXT] }) end for print_utf(fh,"%d bytes compiled\n",{length(script[SCRIPT_BINARY])}) -- print_utf(fh,"%s\n\n",{dump_script_tree(script[SCRIPT_AST],0)}) print_utf(fh,"%s",{dump_script_binary(script[SCRIPT_BINARY][CODE_START_BYTE_OFFSET+1..$],0,0,script[SCRIPT_VARIABLES])}) print_utf(fh,"%s\n\n",{dump_script_strings(script[SCRIPT_BINARY])}) end for print_utf(fh,"\n",{}) ------------------------------------- close(fh) else wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file}) end if end if end procedure --------------------------------------------------------------------------- function get_cmd_depth(integer ptr, TokenList data, integer depth) TokenList result Token this result={} while true do --if get_key()=27 then abort(1/0) end if if ptr>length(data) then src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS]) end if this=data[ptr] ptr+=1 if compare("end",this[CMD_TEXT])=0 then depth-=1 elsif compare("begin",this[CMD_TEXT])=0 then depth+=1 end if if depth=0 then exit --break out of the while else result=append(result,this) end if end while return({ptr,result}) end function --------------------------------------------------------------------------- --Look up the name of a script in all current scopes, returning all_scripts --index if found, or 0 if not function lookup_scoped_script_name(string name) string s integer at integer idx idx=cur_script_num while true do if idx then s=all_scripts[idx][SCRIPT_FULL_NAME]&"<-"&name else s=name end if at=find_in_column(s,all_scripts,SCRIPT_FULL_NAME) if at then return at end if if idx=0 then return 0 end if idx=all_scripts[idx][SCRIPT_PARENT_IDX] end while end function --------------------------------------------------------------------------- --identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis function what_kind_and_id(Token command, VarList local_vars) integer kind integer at atom id integer keyword string s s=command[CMD_TEXT] keyword=alpha_tree_data(reserved,s,0) if string_is_int32(s) then kind=KIND_NUMBER id=string_to_object(s,{}) elsif length(s)=0 then kind=KIND_PARENS id=0 elsif s[1] = '@' then kind=KIND_REFERENCE id=0 -- ID always resolves to 0 for references here, since it is too early to know all -- script IDs (really?). The real work is done in binary_compile_recurse elsif find_in_column(s,local_vars,CMD_TEXT) then at=find_in_column(s,local_vars,CMD_TEXT) if local_vars[at][VAR_FRAME]=0 then kind=KIND_LOCAL else kind=KIND_NONLOCAL end if id=local_vars[at][VAR_ID] + 256*local_vars[at][VAR_FRAME] elsif keyword=RESERVE_GLOBAL then kind=KIND_GLOBAL id=global_list[PAIR_NUM][find(s,global_list[PAIR_NAME])] elsif keyword=RESERVE_FLOW then kind=KIND_FLOW id=flow_list[find_in_column(s,flow_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_FUNCTION then kind=KIND_FUNCTION id=function_list[find_in_column(s,function_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_SCRIPT then kind=KIND_SCRIPT id=script_list[find_in_column(s,script_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_BUILTIN then kind=KIND_MATH id=math_list[find_in_column(s,math_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_MACRO then kind=KIND_MACRO id=0 elsif keyword=RESERVE_KEYWORD then kind=KIND_KEYWORD id=0 elsif s[1] = '"' then src_error( sprintf_utf( "The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct", {shorten_string(s,15)}), command[CMD_POS] ) else --It may be a subscript, in which case the scoped name needs to be looked up at=lookup_scoped_script_name(s) if at then kind=KIND_SCRIPT id=all_scripts[at][SCRIPT_ID] else src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS]) end if end if return({kind,id}) end function --------------------------------------------------------------------------- --identify the kind of a text command function what_kind(Token command, VarList local_vars, integer look_for_operators) integer at integer kind integer keyword string s s=command[CMD_TEXT] keyword=alpha_tree_data(reserved,s,0) if string_is_int32(s) then kind=KIND_NUMBER elsif length(s)=0 then kind=KIND_PARENS elsif s[1] = '@' then kind=KIND_REFERENCE elsif find_in_column(s,local_vars,CMD_TEXT) then at=find_in_column(s,local_vars,CMD_TEXT) if local_vars[at][VAR_FRAME]=0 then kind=KIND_LOCAL else kind=KIND_NONLOCAL end if elsif look_for_operators and find_in_column(s,operator_list,PAIR_NAME) then kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name elsif keyword=RESERVE_GLOBAL then kind=KIND_GLOBAL elsif keyword=RESERVE_FLOW or keyword=RESERVE_BEGIN or keyword=RESERVE_END then kind=KIND_FLOW elsif keyword=RESERVE_FUNCTION then kind=KIND_FUNCTION elsif keyword=RESERVE_SCRIPT then kind=KIND_SCRIPT elsif keyword=RESERVE_BUILTIN then kind=KIND_MATH elsif keyword=RESERVE_MACRO then kind=KIND_MACRO elsif keyword=RESERVE_KEYWORD then kind=KIND_KEYWORD elsif s[1] = '"' then src_error( sprintf_utf( "The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct", {shorten_string(s,15)}), command[CMD_POS] ) else --It may be a subscript, in which case the scoped name needs to be looked up at=lookup_scoped_script_name(s) if at then kind=KIND_SCRIPT else src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS]) end if end if return(kind) end function --------------------------------------------------------------------------- --Check whether a math operator is not an assignment, a compare operator, or 'random' function is_simple_math_op(integer id) return (id >= 1 and id <= 9) or id >= 19 end function --------------------------------------------------------------------------- --Given name (CMD_TEXT) of a Node, return whether it is a block: all its arguments are --statements as opposed to expressions (if, for, while, switch, which take a mix need special handling) function block_command(string name) return compare("do",name)=0 or compare("then",name)=0 or compare("else",name)=0 end function --------------------------------------------------------------------------- procedure check_is_expression(Node node, string parent) if alpha_tree_data(reserved,node[TREE_TRUNK][CMD_TEXT],0)=RESERVE_FLOW then src_error( sprintf_utf(COLYEL&"%s"&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&"%s"&COLRED&")" ,{node[TREE_TRUNK][CMD_TEXT],parent}) ,node[TREE_TRUNK][CMD_POS]) end if end procedure --------------------------------------------------------------------------- --this function not used anywhere function how_many_args(string name, integer kind) integer result integer at if kind=KIND_PARENS then result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P elsif kind=KIND_FLOW then result=-1 --flow supports an unknown number of args elsif kind=KIND_OPERATOR then result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions elsif kind=KIND_MATH then at=find_in_column(name[CMD_TEXT],math_list,PAIR_NAME) result=length(math_list[at][FUNC_ARGS]) elsif kind=KIND_FUNCTION then at=find_in_column(name[CMD_TEXT],function_list,PAIR_NAME) if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then result=-1 else result=length(function_list[at][FUNC_ARGS]) end if elsif kind=KIND_SCRIPT then at=find_in_column(name[CMD_TEXT],script_list,PAIR_NAME) result=length(script_list[at][FUNC_ARGS]) else --numbers, variables, etc do not permit args result=0 end if return(result) end function --------------------------------------------------------------------------- --Something that potentially takes args function takes_args(integer kind) if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO or kind=KIND_KEYWORD then return(true) else return(false) end if end function --------------------------------------------------------------------------- --Get next AST node from a list of tokens by pairing up begin,end --Returns {new_ptr,{token,children}} where children is a TokenList not a NodeList function get_script_node(integer ptr, TokenList data, VarList vars) Token command sequence ptr_and_block TokenList block integer kind integer followed_by_begin block={} command=data[ptr] ptr+=1 if compare("end",command[CMD_TEXT])=0 then src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS]) elsif compare("begin",command[CMD_TEXT])=0 then --floaty brackets for order-of-operations-enforcement ptr-=1 command[CMD_TEXT]="" end if kind=what_kind(command,vars,true) followed_by_begin=false if ptr<=length(data) then --there is room for args followed_by_begin=equal("begin",data[ptr][CMD_TEXT]) --distinguishing between functions with and without args means wait() would be ok but noop() would not if takes_args(kind) then if followed_by_begin then --yes, it has args ptr+=1--only increment the pointer when we have args ptr_and_block=get_cmd_depth(ptr,data,1) ptr=ptr_and_block[1] block=ptr_and_block[2] if length(block)=0 and kind=KIND_PARENS then src_error("found empty parentheses not associated with a function call",command[CMD_POS]) end if end if end if end if if kind=KIND_FLOW then --Require certain flow control types to be followed by parentheses. --We have to check this here or earlier, because after building the parse tree --there will be no difference between "()" and omitted parentheses. --(Note that several later checks in normalize_flow_control are redundant to this) --FIXME: Ideally we wouldn't actually check anything here, but instead preserve presence --of empty parentheses to check later (it will matter in future anyway according to typing plan) --FIXME: excludes else, because of 'case(else)' if find(command[CMD_TEXT],flow_requiring_brackets) then if not followed_by_begin then src_error(sprintf_utf(COLYEL&"%s"&COLRED&" must be followed by "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{command[CMD_TEXT]}),command[CMD_POS]) end if end if --else has a special check for 'else,if' mistake. --'not followed_by_begin' excludes 'else(...)if' if equal("else",command[CMD_TEXT]) and ptr<=length(data) and equal("if",data[ptr][CMD_TEXT]) and not followed_by_begin then src_error(COLYEL&"else if"&COLRED&" may not be written with a separator in-between",command[CMD_POS]) --temp=get_script_node(ptr, data, vars) --hack to allow if necessary --temp[2][1][CMD_TEXT]="elseif" --return temp end if end if return({ptr,{command,block}}) end function --------------------------------------------------------------------------- --Returns a NodeList function build_ast(TokenList script_body_toks, VarList vars) integer ptr sequence ptr_and_seminode sequence seminode --Not a Node, has a TokenList instead of NodeList Node node NodeList result result={} ptr=1 while true do ptr_and_seminode=get_script_node(ptr,script_body_toks,vars) ptr=ptr_and_seminode[1] seminode=ptr_and_seminode[2] if length(seminode[TREE_BRANCHES])>0 then --this seminode has arguments that need parsing node={seminode[TREE_TRUNK],build_ast(seminode[TREE_BRANCHES],vars)} else node=seminode end if result=append(result,node) if ptr>length(script_body_toks) then exit --break out of while when there is no more data end if end while return(result) end function --------------------------------------------------------------------------- --When variables are inherited by a subscript all frame numbers need to be incremented function shift_variables_frame(VarList vars) for i=1 to length(vars) do vars[i][VAR_FRAME]+=1 end for return vars end function --------------------------------------------------------------------------- --Returns new VarList of locals. function add_local_variable(VarList vars, Token var) integer id sequence src_position check_undefined_string(var,"local variable name") --Search for an existing local with the same name. Ignore non-locals, because they are shadowed for idx=1 to length(vars) do if equal(vars[idx][CMD_TEXT],var[CMD_TEXT]) then if vars[idx][VAR_FRAME]=0 then --is local src_position=decode_srcpos(vars[idx][CMD_POS]) src_error( sprintf_utf( "Local variable/argument "&COLYEL&"%s"&COLRED&" is already defined in line %d of "&COLPNK&"%s"&COLRED, {var[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]} ) ,var[CMD_POS] ) end if end if end for --Always add to scope of current script (frame == 0) id=length(vars)-all_scripts[cur_script_num][SCRIPT_NUM_NONLOCALS] --Extend the Token into a VarToken, and prefix to the list, so that it takes --precedence over shadowed variables return {var&{0,id}}&vars end function --------------------------------------------------------------------------- --Find the variable declarations in a script function gather_local_vars(VarList vars, TokenList data) Token this integer ptr ptr=1 while true do this=data[ptr] ptr+=1 if compare("variable",this[CMD_TEXT])=0 then if ptr>length(data) then src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS]) end if this=data[ptr] ptr+=1 if compare("begin",this[CMD_TEXT])=0 then while true do if ptr>length(data) then src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS]) end if this=data[ptr] ptr+=1 if compare("end",this[CMD_TEXT])=0 then exit--break the while end if vars=add_local_variable(vars,this) end while else src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS]) end if end if if ptr>length(data) then exit --break out of the while end if end while return(vars) end function --------------------------------------------------------------------------- --There are only two, hardcoded, macros right now: tracevalue, assert. function expand_macros(NodeList tree) NodeList newtree NodeList newargs string new_string Token string_token sequence src_position sequence temp srcpos pos integer ptr ptr=1 while ptr<=length(tree) do --recurse first, because tree[ptr] might be split up (as in 'assert') if length(tree[ptr][TREE_BRANCHES]) then tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES]) end if if equal("tracevalue",tree[ptr][TREE_TRUNK][CMD_TEXT]) then newargs={} tree[ptr][TREE_TRUNK][CMD_TEXT]="tracevalueinternal" pos=tree[ptr][TREE_TRUNK][CMD_POS] for i=1 to length(tree[ptr][TREE_BRANCHES]) do new_string=tree_original_text(tree[ptr][TREE_BRANCHES][i]) string_list=append(string_list,new_string) string_token={sprintf("@$string%d",{length(string_list)}),pos} newargs&={{string_token,{}},tree[ptr][TREE_BRANCHES][i]} end for tree[ptr][TREE_BRANCHES]=newargs end if if equal("assert",tree[ptr][TREE_TRUNK][CMD_TEXT]) then --assert(condition) --> if (not(condition)) then ($assert expression string="condition", assertfailure) pos=tree[ptr][TREE_TRUNK][CMD_POS] src_position=decode_srcpos(pos) if length(tree[ptr][TREE_BRANCHES])!=1 then src_error(sprintf_utf(COLYEL&"assert"&COLRED&" statement takes exactly one argument: a condition. It has %d",{length(tree[ptr][TREE_BRANCHES])}),pos) end if newtree={{{"if",pos},{}}, {{"then",pos},{}}} newtree[1][TREE_BRANCHES]={{{"not",pos},{tree[ptr][TREE_BRANCHES][1]}}} new_string=sprintf_utf("%s:%d: %s",{src_position[POS_FILENAME],src_position[POS_LINE],tree_original_text(tree[ptr][TREE_BRANCHES][1])}) string_list=append(string_list,new_string) string_token={sprintf("@$string%d",{length(string_list)}),pos} temp={{{"setstringfromtable",pos},{}}, {{"assertfailure",pos},{}}} temp[1][TREE_BRANCHES]={{{enforce_constants("assertexpressionstring"),pos},{}},{string_token,{}}} --stringnum, string newtree[2][TREE_BRANCHES]=temp tree=replace_slice(tree,ptr,ptr,newtree) --tree[ptr..ptr]=newtree ptr+=1 end if ptr+=1 end while return(tree) end function --------------------------------------------------------------------------- --tree[ptr] is a partially constructed if Node (lacking else child), followed either by an else or elseif Node. --Handle else/elseif, parenting to tree[pt], and return the new tree. function build_else_node(NodeList tree, integer ptr) NodeList elseargs string nextcmd integer eptr srcpos pos nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT] pos=tree[ptr+1][TREE_TRUNK][CMD_POS] if equal("else",nextcmd) then --found else tree[ptr][TREE_BRANCHES]&={tree[ptr+1]} tree=delete_element(tree,ptr+1) else -- equal("elseif",nextcmd) --Split off the if elseargs={ {{"if",pos},tree[ptr+1][TREE_BRANCHES]} } --Grab the rest of this if,elseif,...,else chain, to put inside the else eptr=ptr+2 while eptr<=length(tree) do nextcmd=tree[eptr][TREE_TRUNK][CMD_TEXT] --Stop when we see an else but otherwise we don't check correctness until we recurse if equal(nextcmd,"else") then eptr+=1 exit elsif find(nextcmd,{"elseif","then"}) then --continue else exit end if eptr+=1 end while elseargs&=tree[ptr+2..eptr-1] tree=tree[1..ptr]&tree[eptr..$] tree[ptr][TREE_BRANCHES]&={ {{"else",pos},elseargs} } end if return tree end function --------------------------------------------------------------------------- --Build switch AST Nodes, return new tree function build_switch_node(NodeList tree, VarList vars, integer ptr) srcpos pos srcpos argpos NodeList args Token thiscmd NodeList rawargs NodeList caseargs Token casecmd string lastnodename sequence collected --either a Node or {} sequence temp integer have_else srcpos elsepos integer expect_case integer kind atom id string else_error_msg else_error_msg=COLYEL&"else"&COLRED&" case should be placed last inside a "&COLYEL&"switch"&COLRED&" block" pos=tree[ptr][TREE_TRUNK][CMD_POS] args=tree[ptr][TREE_BRANCHES] --start with the condition, length 1 if length(args)>1 then src_error(sprintf_utf( COLYEL&"switch"&COLRED&" statement has %d expressions. It should have only one. (The body of the switch should go in its "&COLYEL&"do(...)"&COLRED&" block instead.)" ,{length(args)} ),args[2][TREE_TRUNK][CMD_POS]) elsif length(args)=0 then src_error(COLYEL&"switch"&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED,pos) end if check_is_expression(args[1],"switch") if ptr>=length(tree) then src_error(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,pos) end if thiscmd=tree[ptr+1][TREE_TRUNK] if equal("do",thiscmd[CMD_TEXT])=0 then src_error(sprintf_utf(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{thiscmd[CMD_TEXT]}),thiscmd[CMD_POS]) end if --found do, processing starts here (build arguments to switch, after the expression) rawargs=tree[ptr+1][TREE_BRANCHES] --contents of the 'do' lastnodename="" have_else=false expect_case=false collected={} for j=1 to length(rawargs) do thiscmd=rawargs[j][TREE_TRUNK] argpos=thiscmd[CMD_POS] if j=1 and equal("case",thiscmd[CMD_TEXT])=0 then src_error(sprintf_utf(COLYEL&"switch() do("&COLRED&" should be followed with a "&COLYEL&"case"&COLRED&", not with "&COLYEL&"%s"&COLRED&"." ,{thiscmd[CMD_TEXT]}),argpos) end if --to resolve the ambiguity of an else, we always suppose that they belong to an 'if' (or elseif) if possible if equal("else",thiscmd[CMD_TEXT]) and find(lastnodename,{"if","then","elseif"})=0 then --Old syntax else(...) block if length(collected) then args&={collected} collected={} end if if have_else then src_error(else_error_msg,elsepos) end if expect_case=false if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then --Last case is new-style syntax and wasn't followed by any statements --Lets just disallow this to prevent a little confusion src_error(COLYEL&"case(...) else(...)"&COLRED&" is ambiguous. "& "Write either "&COLYEL&"case(..., else) ..."&COLRED&" or "&COLYEL&"case(...) do() else(...)"&COLRED&" as fits your intentions." ,rawargs[j-1][TREE_TRUNK][CMD_POS]) end if if j!=length(rawargs) then if length(rawargs[j][TREE_BRANCHES])=0 then else_error_msg&=". Did you forget to enclose the else block with brackets, like "&COLYEL&"else(...)"&COLRED&"?" end if src_error(else_error_msg,argpos) end if --convert it to a do args&={ {{"do",argpos},rawargs[j][TREE_BRANCHES]} } elsepos=argpos have_else=true elsif equal("case",thiscmd[CMD_TEXT]) then if length(collected) then args&={collected} collected={} end if if have_else then --Note we disallow case(else) case(1) but allow case(else,1) src_error(else_error_msg,elsepos) end if expect_case=false caseargs=rawargs[j][TREE_BRANCHES] --Call a couple passes early here, so that constant expressions are allowed caseargs=collapse_floaty_brackets(caseargs) caseargs=optimize_script(caseargs,vars) --Check contents are only either integers, or "else", and append to args for k=1 to length(caseargs) do casecmd=caseargs[k][TREE_TRUNK] if equal(casecmd[CMD_TEXT],"else") then if have_else then src_error(COLYEL&"else"&COLRED&" appears multiple times inside this "&COLYEL&"switch"&COLRED&" block",casecmd[CMD_POS]) end if have_else=true elsepos=argpos --don't emit anything: next do is the 'else' one else temp=what_kind_and_id(casecmd,vars) kind=temp[1] id=temp[2] if kind=KIND_FLOW or (kind=KIND_MATH and is_simple_math_op(id)=false) then src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is not allowed inside a "&COLYEL&"case"&COLRED ,{casecmd[CMD_TEXT]}),casecmd[CMD_POS]) end if args&={caseargs[k]} end if end for else --Disallow "case() do() garbage" if expect_case then src_error(sprintf_utf("Expected "&COLYEL&"case"&COLRED&" or "&COLYEL&"else"&COLRED&" to follow "&COLYEL&"case(...) do(...)"&COLRED&" but found "&COLYEL&"%s"&COLRED,{thiscmd[CMD_TEXT]}),argpos) end if if length(collected)=0 then --Start collecting into a do. But if it is a do (old syntax), don't double encapsulate it if equal("do",thiscmd[CMD_TEXT]) then args&={rawargs[j]} expect_case=true else collected={{"do",argpos},{rawargs[j]}} end if else --Continue collecting collected[TREE_BRANCHES]&={rawargs[j]} end if end if lastnodename=thiscmd[CMD_TEXT] end for if length(collected) then args&={collected} end if if length(args)>1 and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then --Last case (possibly an else) is new-style syntax and wasn't followed by any statements, --so add the missing do. Don't need to do this, but I'd like to keep things tidy --(srcpos is of the last case) args&={ {{"do",argpos},{}} } end if if have_else=false then --Insert a dummy do default block since else has been left out args&={ {{"do",pos},{}} } end if tree[ptr][TREE_BRANCHES]=args --delete the do block return delete_element(tree,ptr+1) end function --------------------------------------------------------------------------- --Given a variable token for 'for' or 'setvariable', etc. (parent) and list of locals/non-locals 'vars', --return a an integer ID token, or throw an error if not a variable. function convert_variable_to_id(Token var, VarList locals, string parent) sequence kind_and_id atom id integer argkind integer var_at kind_and_id=what_kind_and_id(var,locals) argkind=kind_and_id[1] id=kind_and_id[2] if argkind=KIND_LOCAL or argkind=KIND_NONLOCAL then id=-1-id elsif argkind=KIND_GLOBAL then --id unmodified else --It's not a variable. bad! src_error(sprintf_utf( "first argument of "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED ,{parent,KIND_LONGNAMES[argkind],var[CMD_TEXT]} ),var[CMD_POS]) end if return {sprintf("%d",{id}),var[CMD_POS]} end function --------------------------------------------------------------------------- --parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements --Note that several of these checks are now redundant to the check for 'begin' performed in get_script_node function normalize_flow_control(NodeList tree, VarList vars, string parent) integer ptr string s string nextcmd NodeList args Token ctr srcpos pos integer argkind integer var_at integer keyword ptr=1 while ptr<=length(tree) do s=tree[ptr][TREE_TRUNK][CMD_TEXT] pos=tree[ptr][TREE_TRUNK][CMD_POS] args=tree[ptr][TREE_BRANCHES] keyword=alpha_tree_data(reserved,s,0) if keyword=RESERVE_FLOW then if equal(s,"then") or equal(s,"else") or equal(s,"do") then --these are checked for correct parents below. elsif block_command(parent)=false then src_error(COLYEL&s&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&parent&COLRED&")",pos) end if end if if compare("if",s)=0 then if length(args)>1 then src_error(sprintf_utf( COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions" ,{length(args)} ),args[2][TREE_TRUNK][CMD_POS]) elsif length(args)=0 then src_error(sprintf_utf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos) end if check_is_expression(args[1],s) if ptr1 then src_error(sprintf_utf( COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions" ,{length(args)} ),args[2][TREE_TRUNK][CMD_POS]) elsif length(args)=0 then src_error(sprintf_utf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos) end if check_is_expression(args[1],s) if ptr4 then src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(args)}),args[5][TREE_TRUNK][CMD_POS]) elsif length(args)=3 then --append default step value args&={ {{"1",pos},{}} } end if ctr=args[1][TREE_TRUNK] argkind=what_kind(ctr,vars,true) if argkind=KIND_LOCAL then used_locals=append(used_locals,ctr[CMD_TEXT]) elsif argkind=KIND_GLOBAL then --warn, then translate into a numeric reference to a variable src_warn(sprintf_utf( "Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop" ,{ctr[CMD_TEXT]} ),ctr[CMD_POS]) end if args[1][TREE_TRUNK]=convert_variable_to_id(ctr,vars,"for") check_is_expression(args[2],s) check_is_expression(args[3],s) check_is_expression(args[4],s) --Handle "do" if ptr1 then src_error(sprintf_utf( COLYEL&s&COLRED&" statement has %d arguments. It should have only one." ,{length(args)} ),args[2][TREE_TRUNK][CMD_POS]) elsif length(args)=0 then src_error(sprintf_utf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit script"&COLRED,{}),pos) end if elsif compare("break",s)=0 or compare("continue",s)=0 then if length(args)>1 then src_error(sprintf_utf( COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one." ,{length(args)} ),args[2][TREE_TRUNK][CMD_POS]) elsif length(args)=0 then --append default value tree[ptr][TREE_BRANCHES]={{{"1",pos},{}}} end if elsif compare("exitscript",s)=0 then if length(args)>0 then src_error(sprintf_utf( COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit returning"&COLRED ,{length(args)} ),args[1][TREE_TRUNK][CMD_POS]) end if elsif compare("switch",s)=0 then tree=build_switch_node(tree,vars,ptr) elsif compare("case",s)=0 then src_error(COLYEL&"case"&COLRED&" is not allowed outside of "&COLYEL&"switch"&COLRED,pos) elsif compare("subscript",s)=0 or compare("variable",s)=0 then --Does nothing; remove. (Kept until now to check they isn't placed in a silly place) tree=delete_element(tree,ptr) continue end if if equal(s,"") then --Floaty brackets (they haven't been removed yet). Look to parent instead. s=parent end if tree[ptr][TREE_BRANCHES]=normalize_flow_control(tree[ptr][TREE_BRANCHES],vars,s) ptr+=1 end while return(tree) end function --------------------------------------------------------------------------- --A modification of Dijkstra's shunting-yard algorithm --(This could probably subsume build_ast too) function convert_operators(NodeList tree) NodeList new_tree sequence stack --{int,Node} pairs Node opnode integer ptr integer at integer priority integer nonoperator_before stack={} new_tree={} ptr=1 nonoperator_before=false while ptr<=length(tree) do --for each peer branch on the tree at=find_in_column(tree[ptr][TREE_TRUNK][CMD_TEXT],operator_list,PAIR_NAME) if at>0 then if nonoperator_before=false then --there is no room for the before-operand. Bad! src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its left-side operand (did not expect to see an operator here)",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS]) elsif ptr=length(tree) then --there is no room for the after-operand. Bad! src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its right-side operand",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS]) end if priority=operator_list[at][PAIR_NUM] --convert the operator to its true function name (which might be the same) tree[ptr][TREE_TRUNK][CMD_TEXT]=operator_list[at][OPER_TRUENAME] while length(stack) and priority>=stack[$][1] do --pop the operator from the stack and "apply" it to the tree opnode=stack[$][2] stack=stack[1..$-1] opnode[TREE_BRANCHES]=new_tree[$-1..$] new_tree=new_tree[1..$-2]&{opnode} end while stack&={{priority,tree[ptr]}} nonoperator_before=false else if nonoperator_before then --Two non-ops in a row means moved onto the new argument in this argument list, so finish processing operators first while length(stack) do --pop the operator from the stack and "apply" it to the tree opnode=stack[$][2] stack=stack[1..$-1] opnode[TREE_BRANCHES]=new_tree[$-1..$] new_tree=new_tree[1..$-2]&{opnode} end while end if tree[ptr][TREE_BRANCHES]=convert_operators(tree[ptr][TREE_BRANCHES]) new_tree&={tree[ptr]} nonoperator_before=true end if ptr+=1 end while --copy of above while length(stack) do --pop the operator from the stack and "apply" it to the tree opnode=stack[$][2] stack=stack[1..$-1] opnode[TREE_BRANCHES]=new_tree[$-1..$] new_tree=new_tree[1..$-2]&{opnode} end while return new_tree end function --------------------------------------------------------------------------- --Ensure correct number of arguments, add defaults, and translate variables to IDs in assignments function fix_arguments(Node tree, integer kind, integer id, sequence list, VarList vars) integer at integer argnum,maxargs NodeList args string funcname srcpos funcpos funcname=tree[TREE_TRUNK][CMD_TEXT] funcpos=tree[TREE_TRUNK][CMD_POS] args=tree[TREE_BRANCHES] argnum=length(args) --Lookup ID because name might be scoped at=find_in_column(id,list,PAIR_NUM) if equal(list[at][FUNC_ARGS],VAR_ARGS) then return(args) else maxargs=length(list[at][FUNC_ARGS]) end if if maxargs < argnum then src_error(sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" takes at most %d arguments but is being passed %d arguments" ,{KIND_LONGNAMES[kind],funcname,maxargs,argnum} ),args[maxargs+1][TREE_TRUNK][CMD_POS]) elsif maxargs > argnum then --add defaults if not enough args are present if kind=KIND_MATH then --special processing for math if list[at][PAIR_NUM]<=15 or list[at][PAIR_NUM]>=19 then --math shouldnt have defaults src_error(sprintf_utf( "math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d" ,{funcname,argnum,maxargs} ),funcpos) else --variable assignment commands can have defaults if argnum = 0 then --no defaults for first argument of variable function src_error(sprintf_utf( "variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1" ,{funcname,argnum} ),funcpos) elsif argnum = 1 then --make defaults for second arg of variable function if list[at][PAIR_NUM]=16 then --setvariable args=append(args,{ {"0",funcpos} ,{} }) else --increment and decrement args=append(args,{ {"1",funcpos} ,{} }) end if end if end if else --normal processing for script and function for i=argnum+1 to maxargs do if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then src_error(sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d" ,{KIND_LONGNAMES[kind],funcname,i} ),funcpos) end if args=append(args,{ {sprintf("%d",{list[at][FUNC_ARGS][i]}),funcpos} ,{} }) end for end if end if --Translate variables in the variable manipulation functions if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<=18 then args[1][TREE_TRUNK]=convert_variable_to_id(args[1][TREE_TRUNK],vars,funcname) end if return(args) end function --------------------------------------------------------------------------- --Calls fix_arguments on each non-flow node with arguments function normalize_arguments(NodeList tree, VarList vars) sequence kind_and_id integer kind atom id --Unlike the flow normalization and operator translation we do not insert/delete elements from --the current level, so we can use a "for" safely, and dont need a "while" for i=1 to length(tree) do kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars) kind=kind_and_id[1] id=kind_and_id[2] if kind=KIND_SCRIPT then tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,script_list,vars) elsif kind=KIND_FUNCTION then tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,function_list,vars) elsif kind=KIND_MATH then tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,math_list,vars) end if --number, flow, global, local, parens need no argchecking if length(tree[i][TREE_BRANCHES]) then --if there are sub-arguments, recurse tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars) end if end for return(tree) end function --------------------------------------------------------------------------- --returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order function output_word(integer n) integer b1,b2 b1=and_bits(n,#FF) b2=floor(and_bits(n,#FFFF)/256) return({b1,b2}) end function --------------------------------------------------------------------------- function convert_to_bytes(sequence s) sequence result result={} for i=1 to length(s) do result&=int_to_bytes(s[i]) end for return(result) end function --------------------------------------------------------------------------- --Encode a unicode (UTF32/native Euphoria) string in the format the engine --requires, which is currently Latin-1 function encode_ohr(string string, integer maxbytes=0) for i=1 to length(string) do if string[i]>=256 then string[i]='?' end if end for if maxbytes and maxbytes7 and equal("$string",str_temp[1..7]) then value_temp=value(str_temp[8..length(str_temp)]) result&=KIND_NUMBER result&=table_string(value_temp[2]) else src_error(sprintf_utf("Reference "&COLYEL&"@%s"&COLRED&" not understood: references must be the name of a global variable or a script",{str_temp}),cmdpos) end if end if end if elsif kind=KIND_GLOBAL then result&=kind result&=id elsif kind=KIND_LOCAL or kind=KIND_NONLOCAL then result&=kind result&=id elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then --Use id from what_kind_and_id if kind=KIND_FUNCTION then if max_used_function= initial_data_len then can_reference=false end if result[3+i]=recurse_result end for --Now either remove or replace the temporary stuff if can_reference then compiled_data=compiled_data[1..initial_data_len] else compiled_data[initial_data_len+1 .. initial_data_len+length(result)]=result return(initial_data_len) end if else src_error(sprintf_utf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),cmdpos) end if if not fast_mode and can_reference then at=match(result,compiled_data) if at>0 then if initial_data_len!=length(compiled_data) then simple_error("compiler bug! Unexpected compiled_data length") end if --found existing data exactly like this command, so just return a reference to it if at=1 then simple_error("compiler bug! node matches root node") end if return(at-1) end if end if --append data directly compiled_data&=result return(initial_data_len) end function --------------------------------------------------------------------------- function binary_compile(Script script) NodeList tree sequence result integer recurse_ret integer parent_idx integer parent_id parent_idx=script[SCRIPT_PARENT_IDX] if parent_idx=0 then parent_id=0 else parent_id=all_scripts[parent_idx][SCRIPT_ID] end if string_table={} compiled_data={} --binary data is in a mix of 16-bit and 32-bit signed words. --the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit. --output header --0..1: the zero-rooted byte-offset of the first executable code byte result=output_word(CODE_START_BYTE_OFFSET) --2..3: the number of local variables (including args but not nonlocals) result&=output_word(length(script[SCRIPT_VARIABLES]) - script[SCRIPT_NUM_NONLOCALS]) --4..5: the number of arguments the script takes (also in SCRIPTS.TXT) result&=output_word(length(script[SCRIPT_ARG_TOKS])) --6..7: the format of the command data (presently used to indicate 32-bit encoding) result&=output_word(HSZ_FORMAT_VERSION) --8..11: a 32bit offset to the string literal table (in bytes), we don't know it yet result&=output_word(0) result&=output_word(0) --12..13: parent script ID or 0 if none result&=output_word(parent_id) --14..15: Nesting depth result&=output_word(script[SCRIPT_NEST_DEPTH]) --16..17: number of nonlocals result&=output_word(script[SCRIPT_NUM_NONLOCALS]) --what follows is command data in the format [kindID,Value,argcount,argpointerlist] --numbers and variables have no argcount or argpointerlist --an argpointer is the zero-rooted word-offset of the argument relative --to the start of the executable commands. I realise that this format is --unnecessarily complicated. I had hoped to get benefits of being able to --store frequently reused commands only once and then just point to them, --but in actual practice, it isnt worth the trouble, since the only --commands that tend to be redundant are the really short ones. --the first command is always a "do". there can be only one top-level command tree=script[SCRIPT_AST] if length(tree)!=1 then simple_error(sprintf_utf("compiler bug! script tree has %s root nodes",{length(tree)})) end if reenter_timing_zone("binary_compile_recurse") recurse_ret=binary_compile_recurse(tree[1],script[SCRIPT_VARIABLES]) if recurse_ret!=0 then simple_error(sprintf_utf("compiler bug! Root binary_compile_recurse call returned %d",{recurse_ret})) end if exit_timing_zone() result&=convert_to_bytes(compiled_data) compiled_data={} --append with a table of string literals and give its offset if length(string_table)>0 then result[9..12]=int_to_bytes(length(result)) result&=string_table end if return(result) end function --------------------------------------------------------------------------- --floaty brackets are un-needed after the operators have been translated. function collapse_floaty_brackets(NodeList tree) integer i NodeList graft i=1 while i<=length(tree) do if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then --found a floaty-bracket graft=tree[i][TREE_BRANCHES] if length(graft)!=1 then src_error(sprintf_utf("Found unexpected pair of brackets containing %d expressions. Either the brackets should not exist at all, or should only enclose a single expression",{length(graft)}),tree[i][TREE_TRUNK][CMD_POS]) end if tree[i]=graft[1] --delete_element(tree,i) --tree=insert_sequence(tree,graft,i) else if length(tree[i][TREE_BRANCHES]) then tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES]) end if i+=1 end if end while return(tree) end function --------------------------------------------------------------------------- --parent is CMD_TEXT of parent node function sanity_check(NodeList tree, VarList vars, string parent) string s sequence kind_and_id integer kind atom id srcpos pos for i=1 to length(tree) do s=tree[i][TREE_TRUNK][CMD_TEXT] pos=tree[i][TREE_TRUNK][CMD_POS] kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars) kind=kind_and_id[1] id=kind_and_id[2] if (compare("if",parent)=0) and i=1 then if kind=KIND_NUMBER then if id then src_warn(sprintf_utf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),pos) else src_warn("Condition is always false",pos) end if elsif kind=KIND_FLOW then src_warn(sprintf_utf("Should not use flow control command "&COLYEL&"%s"&COLRED&" as condition for if",{s}),pos) end if elsif block_command(parent) then if kind=KIND_NUMBER then src_warn(sprintf_utf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here." ,{id}),pos) elsif kind=KIND_GLOBAL then src_warn(sprintf_utf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here." ,{s}),pos) elsif kind=KIND_LOCAL or kind=KIND_NONLOCAL then src_warn(sprintf_utf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here." ,{s}),pos) elsif kind=KIND_MATH and (id<=15 or id=19 or id>=22) then -- setvariable, increment, decrement, logand, logor have side effects src_warn(sprintf_utf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded" ,{s}),pos) end if end if if kind=KIND_GLOBAL then if not find(s,used_globals) then used_globals=append(used_globals,s) end if elsif kind=KIND_LOCAL then if not find(s,used_locals) then used_locals=append(used_locals,s) end if end if if length(tree[i][TREE_BRANCHES]) then --if there are sub-arguments, recurse tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT]) end if end for return(tree) end function --------------------------------------------------------------------------- --simulate a result overflowing a signed 32 bit register function overflow_int32(atom val) --maybe check whether val is so large that this is could be inaccurate? atom ret ret=mod(val,#100000000) if ret>2147483647 then ret-=#100000000 end if return(ret) end function --------------------------------------------------------------------------- function optimized_arg(Node tree, VarList vars) sequence kind_and_id integer kind atom id object arg1,arg2 kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars) kind=kind_and_id[1] id=kind_and_id[2] if kind=KIND_NUMBER then return(id) elsif kind=KIND_MATH and (id<=15 or id>=19) then arg1=optimized_arg(tree[TREE_BRANCHES][1],vars) if equal(arg1,{}) then arg1=tree[TREE_BRANCHES][1] end if if length(tree[TREE_BRANCHES])>=2 then arg2=optimized_arg(tree[TREE_BRANCHES][2],vars) else arg2=0 end if if equal(arg2,{}) then arg2=tree[TREE_BRANCHES][2] end if if atom(arg1) and atom(arg2) then if id=0 then --random if arg1=arg2 then return(arg1) end if elsif id=1 then --exponent if arg2>0 then --if too large, Euphoria will signal an overflow, but I'm also worried --about overflow_int32 giving inaccurate results if log(abs(arg1)+1)+log(abs(arg2)+1)arg2) elsif id=14 then --lessthanorequalto return(arg1<=arg2) elsif id=15 then --greaterthanorequalto return(arg1>=arg2) elsif id=19 then --not return(arg1=0) elsif id=22 then --logxor return((arg1=0)!=(arg2=0)) elsif id=23 then --abs return(overflow_int32(abs(arg1))) elsif id=24 then --sign return((arg1>0)-(arg1<0)) elsif id=25 then --sqrt if arg1<0 then src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS]) end if return(floor(sqrt(arg1)+0.5)) end if end if --have to allow for side effects of shortcut evaluating operators by only checking first arg if atom(arg1) then if id=4 then --multiply if arg1=0 then return 0 elsif arg1=1 then return arg2 end if elsif id=6 then --add if arg1=0 then return arg2 end if elsif id=20 then --logand if arg1=0 then return 0 end if elsif id=21 then --logor if arg1!=0 then return 1 end if end if end if if atom(arg2) then if id=4 then --multiply if arg2=0 then return 0 elsif arg2=1 then return arg1 end if elsif id=5 then --subtract if arg2=0 then return arg1 end if elsif id=6 then --add if arg2=0 then return arg1 end if end if end if end if return({}) --return nonatom end function --------------------------------------------------------------------------- --goes through a script simplifying expressions that always have the same value function optimize_script(NodeList tree, VarList vars) object arg for i=1 to length(tree) do arg=optimized_arg(tree[i],vars) if atom(arg) then tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg) tree[i][TREE_BRANCHES]={} elsif sequence(arg) and length(arg) then tree[i]=arg end if if length(tree[i][TREE_BRANCHES]) then --if there are sub-arguments, recurse tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars) end if end for return(tree) end function --------------------------------------------------------------------------- --FIXME: if a local is used only in a subscript it is reported as unused procedure warn_unused_locals(VarList vars) if find('u',optlist) then for i=1 to length(vars) do if vars[i][VAR_FRAME]=0 and not find(vars[i][CMD_TEXT],used_locals) then src_warn(sprintf_utf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS]) end if end for end if end procedure --------------------------------------------------------------------------- procedure warn_unused_globals() integer at if find('u',optlist) then for i=1 to length(global_list[PAIR_NUM]) do at=find(global_list[PAIR_NAME][i],used_globals) if not at then src_warn(sprintf_utf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],global_list[PAIR_NUM][i]}),global_list[GLB_POS][i]) end if end for end if end procedure --------------------------------------------------------------------------- --script_idx is all_scripts index procedure compile_a_script(integer script_idx) Script script NodeList script_tree VarList local_vars TokenList args integer num_nonlocals integer parent_idx integer trigger_idx integer trigger cur_script_num=script_idx script=all_scripts[script_idx] trigger_idx=find_in_column(script[SCRIPT_TRIGGER_TOK][CMD_TEXT],trigger_list,PAIR_NAME) trigger=trigger_list[trigger_idx][PAIR_NUM] current_script=script[SCRIPT_FULL_NAME] parent_idx=script[SCRIPT_PARENT_IDX] reenter_timing_zone("gather_local_vars") if parent_idx then local_vars=shift_variables_frame(all_scripts[parent_idx][SCRIPT_VARIABLES]) else local_vars={} end if num_nonlocals=length(local_vars) script&={ trigger --trigger type ,num_nonlocals } all_scripts[script_idx]=script args=script[SCRIPT_ARG_TOKS] --The arguments are the first several local variables for i=1 to length(args) do --Check for name conflicts local_vars=add_local_variable(local_vars,args[i]) end for local_vars=gather_local_vars(local_vars,script[SCRIPT_BODY_TOKS]) if length(local_vars)-num_nonlocals > MAX_LOCALS then simple_error( sprintf_utf("Script "&COLYEL&"%s"&COLRED&" has %d arguments and variables but at most %d are supported", {script[SCRIPT_FULL_NAME],length(local_vars)-num_nonlocals,MAX_LOCALS}) ) end if exit_timing_zone() used_locals={} reenter_timing_zone("build_ast") script_tree=build_ast(script[SCRIPT_BODY_TOKS],local_vars) exit_timing_zone() reenter_timing_zone("convert_operators") script_tree=convert_operators(script_tree) exit_timing_zone() reenter_timing_zone("expand_macros") script_tree=expand_macros(script_tree) exit_timing_zone() reenter_timing_zone("normalize_flow_control") script_tree=normalize_flow_control(script_tree,local_vars,"") exit_timing_zone() reenter_timing_zone("normalize_arguments") script_tree=normalize_arguments(script_tree,local_vars) exit_timing_zone() reenter_timing_zone("collapse_floaty_brackets") script_tree=collapse_floaty_brackets(script_tree) exit_timing_zone() if not fast_mode then reenter_timing_zone("optimize_script") script_tree=optimize_script(script_tree,local_vars) exit_timing_zone() reenter_timing_zone("sanity_check") script_tree=sanity_check(script_tree,local_vars,"") exit_timing_zone() warn_unused_locals(local_vars) end if script&={ script_tree ,local_vars ,{} --compiled data to go into the HSZ lumps } all_scripts[script_idx]=script reenter_timing_zone("binary_compile") all_scripts[script_idx][SCRIPT_BINARY]=binary_compile(script) exit_timing_zone() current_script="" cur_script_num=0 end procedure --------------------------------------------------------------------------- --Parse argument declarations (initially containing default values) --in a script argument list, returns {arg_names_list, default_vals_list} pair --where any default values have been removed from the argument names function process_arglist(TokenList args) TokenList arglist sequence defaults integer i integer using_defaults arglist={} defaults={} using_defaults=false i=1 while i<=length(args) do if compare("=",args[i][CMD_TEXT])=0 then src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS]) end if check_undefined_string(args[i],"argument name") if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then src_error(sprintf_utf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS]) end if if i2048 then simple_warn(sprintf_utf("Maximum function id was %d, but will not export function names above id 2047",{records-1})) records=2048 end if --header size in bytes (offset to location table) result=output_word(6) --file format version result&=output_word(0) --number of records in location table result&=output_word(records) offset=length(result)+records*2 id=0 i=1 encoded_names={} while i<=length(function_list) do if function_list[i][PAIR_NUM]0 then temp=without_extension(fname) --you can't slice a function result directly! :( fname=sprintf("%s~%d.%s",{temp[1..$-1],suffix,extension_only(fname)}) end if --color_print("writing %d %s\n",{i,fname}) if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then simple_error("unable to write a script source lump") end if end for end procedure --------------------------------------------------------------------------- procedure write_output_file() sequence header integer fh object lh enter_timing_zone("Writing output file") header=sprintf("HamsterSpeak\n%s\n%d\n%s\n%d\n%d\n",{COMPILER_VERSION & COMPILER_SUB_VERSION,HSP_FORMAT_VERSION,plotscr_version,HSZ_FORMAT_VERSION,max_used_function}) if length(all_scripts) then fh=open(dest_file,"wb") if fh!=-1 then wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file}) --write header and version if write_lump(fh,"HS",header)=false then simple_error("unable to write header") end if --write script index (old file) if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then simple_error("unable to write script index") end if --write script index (new file) if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then simple_error("unable to write binary script index") end if --write script commands list if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then simple_error("unable to write commands listing") end if --write each script for i=1 to length(all_scripts) do if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][SCRIPT_ID]}),all_scripts[i][SCRIPT_BINARY])=false then simple_error(sprintf_utf("internal problem: unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][SCRIPT_NAME]})) end if end for if not find('n',optlist) then --no debug info --write copy of scripts wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file}) lh=begin_lump(fh,"source.lumped") if equal(lh,false) then simple_error("unable to write scripts source file") end if write_script_files(fh) if end_lump(lh)=false then simple_error("unable to finish write of scripts source file") end if end if close(fh) else simple_error(sprintf_utf("attempt to open "&COLYEL&"%s"&COLRED&" failed",{dest_file})) end if else color_print("no scripts to output\n",{}) end if exit_timing_zone() end procedure --------------------------------------------------------------------------- init() load_and_lex_all() show_source_info() preliminary_parse_pass() parse_top_level() compile_each_script() dump_debug_report() write_output_file() run_time=time()-start_time color_print("done (%g seconds)\n",{run_time}) if find('t',optlist) then print_timing_data() end if opt_wait_for_key() if was_warnings = true then abort(2) end if