-- HamsterSpeak Compiler --(C) Copyright 2002 James Paige and Hamster Republic Productions -- Please read LICENSE.txt for GPL License details and disclaimer of liability -- This is a compiler for HamsterSpeak scripts used for plotscripting in -- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com -- This code is written in Euphoria 4.0. You can get the Free Open-Source -- Euphoria compiler from http://OpenEuphoria.com . I also highly recommend -- David Cuny's EE editor --------------------------------------------------------------------------- --Changelog --3Pe 2013-01-03 Faster compile times --3Pd 2012-10-24 Fix \x escape code --3Pc 2012-10-09 Change srcpos encoding to remove source size limits --3Pb 2012-10-08 \x escape code --3Pa 2012-08-24 --reuse-ids option --3P 2012-08-18 Change way include is lexed: quotes no longer needed -- [], {}, . now totally disallowed -- Add < and > operators -- Add plotscrversion block and new HS header lump -- New switch syntax: do after case optional, case(else) -- Added elseif --3Of 2012-08-18 Lexer bugfix --3Oe 2012-04-11 Add \n and \t escape codes --3Od 2012-03-03 Change directory search order slightly --3Oc 2012-02-07 Increased MAXGLOBAL to 16383 --3Ob 2012-02-07 Make the max script global number set by a constant --3Oa 2012-01-20 Fix lexer allowing newlines in strings --3O 2012-01-19 Unicode support --3Nd 2011-10-16 Fix commands.bin generation bug --3Nc 2011-08-28 Additional expression optimisations --3Nb 2011-08-28 Fix overflow_int32 and bitwise operator overflow --3Na 2011-07-25 Add -x option for Hamster Whisper's benefit --3N 2011-07-12 Add assert --3Ma 2010-10-21 Correctly strip \r to fix newlines ... again. --3M 2010-05-02 abs, sign, sqrt; fix error reporting --3L 2010-04-29 tracevalue support, involving a few internal changes --3Ke 2010-04-18 Add -t commandline option --3Kd 2010-04-14 Quite a few bugfixes in optimizer --3Kc 2010-04-14 Start checking well-formedness of numbers with lexer --3Kb 2010-04-12 Whole lot of bugfixes in script declarations and -- checking for unreserved identifier names --3Ka 2010-04-12 Throw error if source file contains binary --3K 2010-04-12 Error column reporting (introducing srcpos); lump -- original source files into source.lumped --3Ja 2010-04-07 Wrote a (nearly) proper lexer --3J 2009-12-05 Multiple function names may now alias the same id --3I 2009-07-27 Export a backup copy of full source to source.txt lump -- disable with -n option --3H 2009-06-18 Export function names to commands.bin lump --3Gk 2009-05-25 Fix short references crash --3Gj 2009-01-27 Show future-compatability warning when using square -- brackets [] and curly brackets {} in names --3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake --3Gh 2008-07-14 Disable warnings for unused variables by default, and -- Add -u option to enable warnings for unused variables. --3Gg 2008-03-01 Raise global variable limits --3Gf 2008-02-11 Give user another chance on missing Y key :P --3Ge 2007-07-01 String conversion rewrite to allow more whitespace --3Gd 2007-07-01 Allow constants for new syntax default args --3Gc 2007-05-29 Handle empty paratheses correctly, and use -- lowercase ".hs" extension --3Gb 2007-05-09 Throw error on missing include file --3Ga 2007-05-09 String length not limited to 40 --3G 2007-02-09 Offset to string literal tables given as a 32-bit -- int, script format version++ to 2. --3F 2006-12-11 Support for variable number of arguments to functions -- by giving -1 as the number of args in the define block --3Ec 2006-11-30 while(true) no longer produces a warning --3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes --3Ea 2006-09-03 Actually export triggers in scripts.bin --3E 2006-08-30 "define trigger" support --3D 2006-08-16 String literal lookup tables added to end of -- HSZ lumps replacing setstring + appendstring --3C 2006-08-15 New syntax for giving default script arguments, -- and for leaving out definescript --3B 2006-08-13 HSX lumps renamed to HSZ --3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format -- version added to HSX header. Current is 1 --2Na 2006-07-07 Minor update to add new logical operators --2N 2006-07-07 Added not() logic function --2M 2006-07-06 Set exit code on warnings --2L 2006-05-13 Added switch statement (+ case keyword) --2K 2006-05-01 Added @scriptname and @globalvariable syntax to -- return script or global ID number at compile-time -- not run-time (for use with "run script by ID" and -- "read global" and "write global") --2J 2006-04-10 Added break, continue, exitscript, exitreturning -- flow statements. Also fixed some return bugs --2I 2006-04-04 Extended HSX header to include number of arguments -- to a script, to really fix arguments-overflow-into -- -locals bug --2H 2006-03-29 Display better help and wait for keypress when run -- by double-clicking the icon. Added -k command line -- option to skip waiting for keypress --2G 2005-10-03 Additional operators $+ and $= -- Mention GPL in help text --2F 2005-07-24 Strings implemented: -- $id="..." -> setstring -- $id+"..." -> appendstring --2E 2005-02-15 Changed license to GPL -- Added += and -= operators thanks to a patch from -- The Mad Cacti and Fyrewulff --2D 2002-08-03 Only a small change, strip out \r from lines of -- the script as we read them to avoid errors related -- to busted newlines --2C 2002-03-05 Fixed some bugs that could cause crashes when -- non-printable characters exist in the input file. -- (naturally the script will not compile right, but -- at least it will not crash) --2B 2001-06-06 Added := as a commaless separater so it can be -- defined as an operator --2A 2001-05-04 Fixed -w command line option when used with -z --2 First Release --------------------------------------------------------------------------- without warning --to avoid annoying warnings without type_check --for a small speed boost --with profile_time --time profiling include hsspiffy.e --various routines, sequence manipulation - James Paige include graphics.e --standard library, needed for color output include machine.e --needed for int_to_bytes include std/math.e --needed for mod include euphoria/unicode.e include euphoria/ucstypes.e include euphoria/io_with_unicode.e --------------------------------------------------------------------------- --constants-- constant false=0 constant true=1 -- increment COMPILER_VERSION only for major language-altering changes constant COMPILER_VERSION="3" -- For small changes increment the second letter of COMPILER_SUB_VERSION -- For large changes increment the first letter and reset the second letter --*Please make sure this is 2 chararacters long! Append a space if required.* constant COMPILER_SUB_VERSION="Pe" constant COPYRIGHT_DATE="2002" --This is the oldest version of plotscr.hsd which is supported (feel free to --increment this whenever using an older plotscr.hsd means you're missing out --on new but nonessential features; there's no harm) --*Should be 3 characters long, same captialisation as COMPILER_SUB_VERSION* constant MIN_PLOTSCR_VERSION="3P " --these constants are color-flags. May add more colors, up to #F8FF --(these codes are at the end of the first private use area) constant COL_FIRST=#F8E0 constant COLYEL=#F8E0 constant COLRED=#F8E1 constant COLPNK=#F8E2 constant COLWHI=#F8E3 constant COLBWHI=#F8E4 constant COL_LAST=#F8FF --Euphoria's color constants vary depending on the platform, so we avoid them constant HS_TO_EU_COLORS={YELLOW, RED, BRIGHT_RED, WHITE, BRIGHT_WHITE} constant LEX_CHAR=0 constant LEX_SPACE=1 constant LEX_BEGIN=2 constant LEX_END=3 constant LEX_KEYWORD=4 constant LEX_COMMENT=5 constant LEX_STRING=6 constant LEX_COMMA=7 constant LEX_WARN=8 constant LEX_BINARY=9 constant LEX_DIGIT=10 constant LEX_KEYWORD2=11 constant FILE_NAME=1 constant FILE_TEXT=2 constant FILE_LINE_EXTENTS=3 constant POS_FILENAME=1 constant POS_LINE=2 constant POS_COLUMN=3 constant POS_TEXT=4 constant CMD_TEXT=1 constant CMD_POS=2 constant RESERVE_CORE=1 constant RESERVE_UNIMPLEMENTED=2 constant RESERVE_FLOW=3 constant RESERVE_FUNCTION=4 constant RESERVE_SCRIPT=5 constant RESERVE_GLOBAL=6 constant RESERVE_BEGIN=7 constant RESERVE_END=8 constant RESERVE_OPERATOR=9 constant RESERVE_BUILTIN=10 constant RESERVE_CONSTANT=11 constant RESERVE_MACRO=12 constant RESERVE_NAMES={"declaration" ,"unimplemented keyword/operator" ,"flow control" ,"hardcoded function" ,"user script" ,"global variable" ,"bracket "&COLYEL&"("&COLRED ,"bracket "&COLYEL&")"&COLRED ,"operator" ,"builtin command" ,"constant" ,"hardcoded function/macro" } constant PAIR_NUM=1 constant PAIR_NAME=2 constant PAIR_START=1 constant PAIR_END=2 constant OPER_TRUENAME=3 constant OPER_POS=4 constant FUNC_ARGS=3 constant FUNC_POS=4 constant GLB_POS=3 constant CONST_VALUE=1 constant CONST_POS=2 constant NO_DEFAULT={} --when appears in script_list[FUNC_ARGS], indicates arg with no default value constant VAR_ARGS=0 --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args constant KIND_NUMBER=1 constant KIND_FLOW=2 constant KIND_GLOBAL=3 constant KIND_LOCAL=4 constant KIND_MATH=5 constant KIND_FUNCTION=6 constant KIND_SCRIPT=7 constant KIND_REFERENCE=8 --converted to KIND_NUMBER in compiled script constant KIND_OPERATOR=9 --never appears in compiled script constant KIND_PARENS=10 --never appears in compiled script constant KIND_MACRO=11 --never appears in compiled script constant KIND_LONGNAMES={"number" ,"flow control statement" ,"global variable" ,"local variable" ,"built-in function" ,"hard-coded function" ,"script" ,"reference" ,"untranslated operator" ,"order-of-operations-enforcing parenthesis" ,"hard-coded function/macro" } constant TREE_TRUNK=1 constant TREE_BRANCHES=2 constant TIMING_DEPTH=1 constant TIMING_DESCRIPTION=2 constant TIMING_TIME=3 constant TIMING_ACTIVE=4 --Version number for the .hs/.hsp format (not including HSZ files!) constant HSP_FORMAT_VERSION=1 constant CODE_START_BYTE_OFFSET=12 constant SCRIPT_FORMAT_VERSION=2 constant MAXGLOBAL=16383 --------------------------------------------------------------------------- --globals-- --initializations-- sequence compiler_dir compiler_dir="" sequence source_file source_file="" sequence dest_file dest_file="" sequence optlist optlist={} sequence file_list file_list={} integer max_srcpos max_srcpos=0 integer total_lines total_lines=0 sequence cmd cmd={} sequence reuse_ids reuse_ids={} sequence constant_list constant_list=alpha_tree_create() sequence trigger_list trigger_list={} sequence operator_list operator_list={} sequence function_list function_list={} sequence global_list global_list={{},{},{}} sequence string_list string_list={} sequence script_list script_list={} sequence script_cmd script_cmd={} sequence reserved reserved=alpha_tree_create() atom start_time start_time=time() atom run_time run_time=0 sequence timing_data timing_data={} integer timing_depth timing_depth=-1 integer get_cmd_pointer get_cmd_pointer=0 integer autonumber_id autonumber_id=32767 sequence flow_list flow_list={ {0,"do"} ,{1,"begin"} ,{2,"end"} ,{3,"return"} ,{4,"if"} ,{5,"then"} ,{6,"else"} ,{7,"for"} ,{10,"while"} ,{11,"break"} ,{12,"continue"} ,{13,"exitscript"} ,{14,"exitreturning"} ,{15,"switch"} ,{16,"case"} --never appears in compiled script } sequence math_list math_list={ {0,"random",{0,1}} ,{1,"exponent",{0,2}} ,{2,"modulus",{0,1}} ,{3,"divide",{0,1}} ,{4,"multiply",{0,0}} ,{5,"subtract",{0,0}} ,{6,"add",{0,0}} ,{7,"xor",{0,0}} ,{8,"or",{0,0}} ,{9,"and",{0,0}} ,{10,"equal",{0,0}} ,{11,"notequal",{0,0}} ,{12,"lessthan",{0,0}} ,{13,"greaterthan",{0,0}} ,{14,"lessthanorequalto",{0,0}} ,{15,"greaterthanorequalto",{0,0}} ,{16,"setvariable",{0,0}} ,{17,"increment",{0,1}} ,{18,"decrement",{0,1}} ,{19,"not",{0}} ,{20,"logand",{0,0}} ,{21,"logor",{0,0}} ,{22,"logxor",{0,0}} ,{23,"abs",{0}} ,{24,"sign",{0}} ,{25,"sqrt",{0}} } --update is_simple_math_op on adding new operators sequence separator_list separator_list={ "+=","-=","$+","$=","$","+","--","/","*","^^","^" ,"==","<>",">>","<<","<=",">=",">","<",":=","=","&&","||" } sequence lexer_table lexer_table={} sequence lexer_table_key2 lexer_table_key2={} sequence hex_chars hex_chars="0123456789abcedfABCDEF" sequence hex_char_values hex_char_values={0,1,2,3,4,5,6,7,8,9,#a,#b,#c,#e,#d,#f,#A,#B,#C,#D,#E,#F} sequence compiled_data compiled_data={} --used inside binary_compile only; compiled data for the current script sequence all_scripts all_scripts={} sequence current_script current_script="" --name of current script integer cur_script_num cur_script_num=0 --index into script_cmd of current script integer colors_enabled colors_enabled=true integer simple_colorcodes simple_colorcodes=false integer error_file error_file=false sequence used_globals used_globals={} sequence used_locals used_locals={} sequence string_table string_table={} sequence plotscr_version plotscr_version="" integer max_used_function max_used_function=0 --maximum id of a function used in a script integer fast_mode fast_mode=false integer end_anchor_kludge end_anchor_kludge=false integer was_warnings was_warnings=false --------------------------------------------------------------------------- --types-- --A position in the script source. An integer, encoding an index in file_list --and a character number that file (where newlines are one character). --See encode_srcpos and decode_srcpos type srcpos(atom pos) --need to allow 0 as a dummy value if pos=floor(pos) and pos>=0 and pos<=max_srcpos then return true else return false end if end type --------------------------------------------------------------------------- --time spent waiting for a user-keypress shouldnt count function timeless_wait_key() atom skip_time integer key skip_time=time() key=wait_key() skip_time=time()-skip_time start_time+=skip_time for i=1 to length(timing_data) do if timing_data[i][TIMING_ACTIVE] then timing_data[i][TIMING_TIME]+=skip_time end if end for return(key) end function --------------------------------------------------------------------------- --Euphoria's built-in sprintf truncates all string elements to 8 bits, --so this is an Unicode-enabled replacement. Supports %d, %s, %g, %% --Beware! This will be way slower than sprintf, but luckily it's not --needed anywhere where speed matters. function sprintf_utf(sequence s,sequence printf_args) integer seg_start, seg_end integer arg_ctr integer code sequence ret if length(printf_args)=0 then return s end if ret="" arg_ctr=1 seg_start=1 seg_end=find('%',s) while seg_end do ret&=s[seg_start..seg_end-1] code=s[seg_end+1] if code='s' then ret&=printf_args[arg_ctr] arg_ctr+=1 elsif code='d' or code='g' then ret&=sprintf({'%',code},printf_args[arg_ctr]) arg_ctr+=1 elsif code='%' then ret&='%' else simple_error(sprintf("sprintf_utf: unrecognised format code %%%s\n",{s[seg_end+1]})) end if seg_start=seg_end+2 seg_end=find_from('%',s,seg_start) end while if seg_start<=length(s) then ret&=s[seg_start..$] end if if arg_ctr!=length(printf_args)+1 then simple_error(sprintf("sprintf_utf: recieved %d format arguments, only used %d\n",{length(printf_args),arg_ctr-1})) end if return ret end function --------------------------------------------------------------------------- --fprintf as UTF8 procedure print_utf(object fh,sequence s,sequence printf_args) if length(printf_args) then s=sprintf_utf(s,printf_args) end if s=toUTF(s,utf_32,utf_8) puts(fh,s) end procedure --------------------------------------------------------------------------- --prints a string with printf to stdout converting color codes procedure color_print(sequence s,sequence printf_args) sequence buffer s=sprintf_utf(s,printf_args) if simple_colorcodes then buffer=s else buffer="" for i=1 to length(s) do if s[i]>=COL_FIRST and s[i]<=COL_LAST then print_utf(stdout,buffer,{}) buffer="" if colors_enabled then text_color(HS_TO_EU_COLORS[s[i]-COL_FIRST+1]) end if else buffer&=s[i] end if end for end if if length(buffer) then print_utf(stdout,buffer,{}) end if end procedure --------------------------------------------------------------------------- procedure opt_wait_for_key() integer key if not find('k',optlist) then color_print("[Press Any Key]\n",{}) key=timeless_wait_key() end if end procedure --------------------------------------------------------------------------- procedure enter_timing_zone(sequence description) if not find('t',optlist) then return end if timing_depth+=1 timing_data=append(timing_data,{timing_depth,description,-time(),true}) end procedure --------------------------------------------------------------------------- procedure reenter_timing_zone(sequence description) if not find('t',optlist) then return end if for i=1 to length(timing_data) do if equal(timing_data[i][TIMING_DESCRIPTION],description) then timing_depth+=1 timing_data[i][TIMING_DEPTH]=timing_depth --could change... timing_data[i][TIMING_TIME]-=time() timing_data[i][TIMING_ACTIVE]=true return end if end for enter_timing_zone(description) end procedure --------------------------------------------------------------------------- procedure exit_timing_zone() if not find('t',optlist) then return end if for i=length(timing_data) to 1 by -1 do --there can only be one active timing job at each depth at a time if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then timing_data[i][TIMING_TIME]+=time() timing_data[i][TIMING_ACTIVE]=false exit end if end for timing_depth-=1 end procedure --------------------------------------------------------------------------- procedure print_timing_data() sequence indent sequence data sequence tmp1, tmp2 if run_time=0 then --so that all percentages show as 0% run_time=1e100 end if for i=1 to length(timing_data) do data=timing_data[i] indent=repeat(' ',data[TIMING_DEPTH]*4) tmp1=sprintf("%.2f",{data[TIMING_TIME]}) tmp2=sprintf("%5.2g",{100*data[TIMING_TIME]/run_time}) color_print("%s "&COLBWHI&"%s"&COLWHI&"s %s%% %s\n",{indent,tmp1,tmp2,data[TIMING_DESCRIPTION]}) end for end procedure --------------------------------------------------------------------------- function html_char_convert(sequence s) sequence buffer sequence result result="" buffer="" for i=1 to length(s) do if s[i]=' ' and i>1 then if s[i-1]=' ' then buffer&=" " else buffer&=s[i] end if elsif s[i]='<' then buffer&="<" elsif s[i]='>' then buffer&=">" else buffer&=s[i] end if end for if length(buffer) then result&=buffer end if return(result) end function --------------------------------------------------------------------------- function error_string_convert(sequence s) sequence buffer sequence result result="" buffer="" for i=1 to length(s) do if s[i]>=COL_FIRST and s[i]<=COL_LAST then if s[i]=COLYEL then buffer&="" elsif s[i]=COLRED then buffer&="" elsif s[i]=COLPNK then buffer&="" elsif s[i]=COLWHI then buffer&="" elsif s[i]=COLBWHI then buffer&="" end if elsif s[i]='\n' then buffer&="
\n" else buffer&=s[i] end if end for if length(buffer) then result&=buffer end if return(result) end function --------------------------------------------------------------------------- --prints a long string wrapped at 80 columns procedure wrap_print(sequence s,sequence arguments) sequence outstring s=sprintf_utf(s,arguments) while length(s) do outstring=before_wrap_point(s) s=after_wrap_point(s) color_print("%s\n",{outstring}) end while end procedure --------------------------------------------------------------------------- procedure error_file_print(sequence s) integer fh if error_file then fh=open(compiler_dir&"hs_error.htm","a") if fh!=failure then print_utf(fh,error_string_convert(s)&"\n",{}) close(fh) end if end if end procedure --------------------------------------------------------------------------- --prints out warning message in red with word wrap procedure simple_warn(sequence s) sequence pos if not find('w',optlist) then --do not warn if -w is set pos=get_position() if pos[2]>1 then print_utf(stdout,"\n",{}) end if wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s}) error_file_print(sprintf_utf("WARNING: %s",{html_char_convert(s)})) was_warnings = true end if end procedure --------------------------------------------------------------------------- --prints out an error message in red with word wrap, then aborts procedure simple_error(sequence s) sequence pos pos=get_position() if pos[2]>1 then print_utf(stdout,"\n",{}) end if wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s}) error_file_print(sprintf_utf("ERROR: %s",{html_char_convert(s)})) if end_anchor_kludge then error_file_print("\n
\n") end if opt_wait_for_key() abort(1) end procedure --------------------------------------------------------------------------- --prints out the copyright info, usage info, and command-line options procedure check_arg_count(sequence args) if length(args)=2 then wrap_print("HamsterSpeak semicompiler v%s%s (C)%s James Paige&Hamster Republic Productions\n",{COMPILER_VERSION,COMPILER_SUB_VERSION,COPYRIGHT_DATE}) wrap_print("Please read LICENSE.txt for GPL License details and disclaimer of liability",{}) wrap_print(COLYEL&"%s [-acdfknstwuyz] source.hss [dest.hs]"&COLWHI&"\n\n",{hs_upper(file_only(args[2]))}) color_print(" -f fast mode. Disables some optimization\n",{}) color_print(" -k do not wait for a keypress when finished\n",{}) color_print(" -n don't add debug info, or a copy of the source code, to .hs file\n",{}) color_print(" -w suppress minor warnings\n",{}) color_print(" -u show warnings for unused variables\n",{}) color_print(" -y overwrite the destination file without asking\n",{}) color_print("\nAdvanced options (not intended for normal use):\n",{}) color_print(" -c colors will be disabled\n",{}) color_print(" -d dump debug report to hs_debug.txt\n",{}) color_print(" -s print the name of each script as it is compiled\n",{}) color_print(" -a same as -s, but including autonumbered scripts\n",{}) color_print(" -t print detailed timing info (for HSpeak developers)\n",{}) color_print(" -z write error messages to hs_error.htm\n",{}) --Undocumented arguments: -- --unicode-cols -- used by Hamster Whisper for piped output -- --reuse-ids scripts.bin -- Read the provided scripts.bin file from a previous compilation, and reuse script ID mapping wrap_print("\nFor more info about HamsterSpeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{}) wrap_print("\nThis is a command-line program. You should either run it from the command-line or you should drag and drop your script file onto it.\n",{}) opt_wait_for_key() abort(0) end if end procedure --------------------------------------------------------------------------- --Build the lexer tables, which for each code point 0<=i<256, contains the type --of that character in entry i+1 (have to handle NUL) --Other characters are always LEX_CHAR procedure init_lexer() sequence table_insert --main table table_insert={" (),\n#\t\"[]{}.", {LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_COMMA,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN}} --by default each character is either binary, or an allowed identifier --character. This is for backwards compatibility lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,127-32) & repeat(LEX_BINARY,#A1-127) & repeat(LEX_CHAR,256-#A1) for i=1 to length(table_insert[1]) do lexer_table[table_insert[1][i]+1]=table_insert[2][i] end for for i=0 to 9 do lexer_table['0'+i+1]=LEX_DIGIT end for --add the one-character prefix of each operator for i=1 to length(separator_list) do lexer_table[separator_list[i][1]+1]=LEX_KEYWORD end for --keyword suffix parsing table lexer_table_key2=repeat(LEX_CHAR,256) lexer_table_key2[' '+1]=LEX_SPACE lexer_table_key2['\t'+1]=LEX_SPACE --2nd character of each operator for i=1 to length(separator_list) do if length(separator_list[i])=2 then lexer_table_key2[separator_list[i][2]+1]=LEX_KEYWORD2 end if end for --when floating point is added will want a table for number parsing end procedure --------------------------------------------------------------------------- --initializes global variables, and generally gets things ready to roll procedure init() sequence args integer index integer key integer fh args=command_line() compiler_dir=path_only(args[2]) check_arg_count(args) optlist={} index=3 while index<=length(args) do if equal(args[index],"--unicode-cols") then simple_colorcodes=true args=delete_element(args,index) elsif equal(args[index],"--reuse-ids") then color_print("Reusing script ID numbers from previous compile\n",{}) reuse_ids=read_scripts_dot_bin(args[index+1]) args=delete_slice(args,index,index+1) elsif args[index][1]='-' then optlist=optlist&hs_lower(args[index][2..length(args[index])]) args=delete_element(args,index) else index+=1 end if end while check_arg_count(args) source_file=normalize_filename(args[3]) if length(args)>3 then dest_file=normalize_filename(args[4]) else dest_file=normalize_filename(alter_extension(source_file,"hs")) end if if find('f',optlist) then fast_mode=true color_print("Using fast mode. Some size optimization disabled\n",{}) end if if find('c',optlist) then colors_enabled=false end if --'x' used by old versions of Hamster Whisper to specify color code format --However this clashes with Latin-1 characters and doesn't work with UTF-8 anyway --Replaced by --unicode-cols --the semi-undocumented command line argument -z writes a file called --hs_error.htm formatted for HssEd to read if find('z',optlist) then error_file=true if file_exists(compiler_dir&"hs_error.htm") then fh=open(compiler_dir&"hs_error.htm","w") if fh!=failure then puts(fh,"") close(fh) end if end if end if wrap_print("Semicompiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file}) if file_exists(dest_file) then if find('y',optlist) then --found the -y command line arg, overwrite automatically key='y' else while true do --prompt the user to overwrite wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file}) key=timeless_wait_key() color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key}) if hs_lower(key)='y' then exit elsif hs_lower(key)='n' then simple_error("output file overwrite cancelled by user") else wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)}) end if end while end if end if --why the alpha-tree? because the reserved-word list can get HUGE. --we want to be able to look up words in it quickly. A btree or some such --thing would have been even better, but thats alot of trouble :) --FIXME: variable is missing! It doesn't fit in any of the categories reserved=alpha_tree_mass_insert(reserved,{ {"defineconstant",RESERVE_CORE} ,{"definetrigger" ,RESERVE_CORE} ,{"defineoperator",RESERVE_CORE} ,{"globalvariable",RESERVE_CORE} ,{"definefunction",RESERVE_CORE} ,{"definescript" ,RESERVE_CORE} ,{"plotscrversion",RESERVE_CORE} ,{"include" ,RESERVE_CORE} --should never appear, in theory ,{"do" ,RESERVE_FLOW} ,{"begin" ,RESERVE_BEGIN} ,{"end" ,RESERVE_END} ,{"return" ,RESERVE_FLOW} ,{"if" ,RESERVE_FLOW} ,{"then" ,RESERVE_FLOW} ,{"else" ,RESERVE_FLOW} ,{"elseif" ,RESERVE_FLOW} ,{"for" ,RESERVE_FLOW} ,{"cfor" ,RESERVE_UNIMPLEMENTED} ,{"foreach" ,RESERVE_UNIMPLEMENTED} ,{"while" ,RESERVE_FLOW} ,{"break" ,RESERVE_FLOW} ,{"continue" ,RESERVE_FLOW} ,{"exitscript" ,RESERVE_FLOW} ,{"exitreturning" ,RESERVE_FLOW} ,{"switch" ,RESERVE_FLOW} ,{"case" ,RESERVE_FLOW} ,{"=" ,RESERVE_UNIMPLEMENTED} ,{"tracevalue" ,RESERVE_MACRO} ,{"assert" ,RESERVE_MACRO} })--end mass_insert for i=1 to length(math_list) do reserved=alpha_tree_insert(reserved,math_list[i][PAIR_NAME],RESERVE_BUILTIN) end for init_lexer() end procedure --------------------------------------------------------------------------- --returns {filenumber,file position} pair function decode_srcpos_file(srcpos pos) integer val val=pos --0 is an invalid srcpos if val=0 then simple_error("compiler bug: can't decode uninitialised srcpos") end if val-=1 for i=1 to length(file_list) do --an offset of 0 is reserved, but currently unused if val=0 then simple_error("compiler bug: 0 srcpos file offset") end if if val<=length(file_list[i][FILE_TEXT]) then return {i,val} end if val-=length(file_list[i][FILE_TEXT])+1 end for simple_error(sprintf_utf("compiler bug: can't decode invalid srcpos %d",{pos})) end function --------------------------------------------------------------------------- function srcpos_file_number(srcpos pos) sequence temp temp=decode_srcpos_file(pos) return temp[1] end function --------------------------------------------------------------------------- --Returns 1-based character number in file function srcpos_point(srcpos pos) sequence temp temp=decode_srcpos_file(pos) return temp[2] end function --------------------------------------------------------------------------- --Encode offset (1-based characters from start of file) in a file as a srcpos function encode_srcpos(integer filenum, integer offset) srcpos pos pos=1 for i=1 to filenum-1 do pos+=length(file_list[i][FILE_TEXT])+1 end for return pos+offset end function --------------------------------------------------------------------------- --possibly slow: prehaps avoid heavy use --returns {file name, line number, column number, line text}: index with POS_* --column number returned is 0-based function decode_srcpos(srcpos pos) integer point, filenum integer lineno sequence filetext sequence lines sequence temp temp=decode_srcpos_file(pos) filenum=temp[1] point=temp[2] filetext=file_list[filenum][FILE_TEXT] lines=file_list[filenum][FILE_LINE_EXTENTS] lineno=length(lines) for i=1 to length(lines) do if point<=lines[i][PAIR_END] then lineno=i exit end if end for return({file_list[filenum][FILE_NAME],lineno,point-lines[lineno][PAIR_START],filetext[lines[lineno][PAIR_START]..lines[lineno][PAIR_END]]}) end function --------------------------------------------------------------------------- function form_error_text(sequence s,srcpos pos) sequence line sequence src_position sequence column_display integer tab_compensate src_position=decode_srcpos(pos) --?pos --pretty_print(1,src_position,{2}) line=substring_replace(src_position[POS_TEXT],"\t"," ") tab_compensate=3*count('\t',src_position[POS_TEXT][1..src_position[POS_COLUMN]]) column_display=repeat(' ',src_position[POS_COLUMN]+tab_compensate) error_file_print(sprintf_utf("\n",{src_position[POS_FILENAME],src_position[POS_LINE]})) if length(current_script) then return( sprintf_utf( "in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n" &COLBWHI&"%s\n" &"%s^\n" &COLRED&"%s\n" ,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s} ) ) else return( sprintf_utf( "in line %d of "&COLPNK&"%s"&COLRED&"\n" &COLBWHI&"%s\n" &"%s^\n" &COLRED&"%s\n" ,{src_position[POS_LINE],src_position[POS_FILENAME],line,column_display,s} ) ) end if --note that the calling procedure must send/deal with closing tags to error_file_print end function --------------------------------------------------------------------------- procedure src_warn(sequence s,srcpos pos) if not find('w',optlist) then --do not warn if -w is set simple_warn(form_error_text(s,pos)) error_file_print("\n
\n") end if end procedure --------------------------------------------------------------------------- procedure src_error(sequence s,srcpos pos) end_anchor_kludge=true simple_error(form_error_text(s,pos)) end procedure --------------------------------------------------------------------------- procedure load_source(sequence filename,sequence reading_how,srcpos pos) integer fh integer line_start, line_end sequence file_text sequence line_extents -- {start of line, end of line} pairs fh=open(filename,"r") if fh!=failure then wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename}) line_extents={} reenter_timing_zone("read_file") --Autodetect the encoding file_text=read_file(fh, TEXT_MODE, UTF) exit_timing_zone() close(fh) --read_file already converts lineendings, so this usually won't be needed file_text=exclude(file_text,"\r") line_start=1 line_end=find('\n',file_text) while line_end do line_extents=append(line_extents,{line_start,line_end-1}) line_start=line_end+1 line_end=find_from('\n',file_text,line_start) end while if line_start<=length(file_text) then line_extents=append(line_extents,{line_start,length(file_text)}) end if total_lines+=length(line_extents) file_list=append(file_list,{filename,file_text,line_extents}) max_srcpos+=length(file_text)+1 else if pos then src_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos) else simple_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename})) end if end if end procedure --------------------------------------------------------------------------- procedure load_include(sequence include_name,srcpos pos) reenter_timing_zone("load_source/include") --try source directory if file_exists(path_only(source_file)&include_name) then load_source(normalize_filename(path_only(source_file)&include_name),"including",pos) else --try current directory if file_exists(include_name) then load_source(include_name,"including",pos) else --try compiler_directory if file_exists(compiler_dir&include_name) then load_source(normalize_filename(compiler_dir&include_name),"including",pos) else --give up load_source(include_name,"including",pos) end if end if end if exit_timing_zone() end procedure --------------------------------------------------------------------------- procedure show_source_info() if total_lines then wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)}) else simple_error("no data to compile\n") end if end procedure --------------------------------------------------------------------------- procedure lexer_binary_error(srcpos pos) --better use src_error, to track down garbage if accidentally creeping in src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos) end procedure --------------------------------------------------------------------------- --reads and processes the escape codes in a string and returns it enclosed in quote marks function lexer_read_string(sequence s,integer i,integer stop,srcpos pos) sequence string integer escaping integer at1, at2 escaping=false string="\"" while true do if i>stop then --did not find a closing " so not a valid string src_error("Expected \" at end of line to end string (multi-line strings are not supported)",pos+stop) end if if s[i]<256 and lexer_table[s[i]+1]=LEX_BINARY then lexer_binary_error(pos+i) end if if escaping then if s[i]='"' or s[i]='\\' or s[i]='\'' then --including ' for closer compatibility with Python string&=s[i] elsif s[i]='n' then string&=10 elsif s[i]='t' then string&=9 elsif s[i]='x' then --escape 8 bit values (later will add \u and \U for 16 bit and 32 bit escapes) at1=0 if i+2<=stop then at1=find(s[i+1],hex_chars) at2=find(s[i+2],hex_chars) end if if at1=0 or at2=0 then src_error("Expected two hexidecimal characters to follow \\x escape code",pos+i) end if string&={16*hex_char_values[at1]+hex_char_values[at2]} i+=2 else --invalid sequence string&='\\' string&=s[i] end if escaping=false else if s[i]='"' then return({i,string&'"'}) elsif s[i]='\\' then escaping=true else string&=s[i] end if end if i+=1 end while end function --------------------------------------------------------------------------- function lexer_read_number(sequence s,integer i,integer stop,srcpos pos) integer state atom val integer sign sequence text integer starti starti=i if s[i]='-' then text="-" i+=1 sign=-1 else text="" sign=1 end if val=0 while i<=stop do if s[i]<256 then state=lexer_table[s[i]+1] else state=LEX_CHAR end if --color_print("lexing number i=%d %d=%s state %d\n",{i,s[i],{s[i]},state}) if state=LEX_DIGIT then text&=s[i] val=val*10+s[i]-'0' elsif state=LEX_SPACE then --yes, allowed in numbers too elsif state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_KEYWORD or state=LEX_COMMENT or state=LEX_BINARY then --backtrack one character --known bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead i-=1 exit else --currently, state=LEX_CHAR or state=LEX_WARN or state=LEX_STRING if s[i]='.' then src_error("Floating point values are not yet implemented",pos+i) end if src_error( sprintf_utf("Expected "&COLYEL&"%s"&COLRED&" to be followed by "&COLYEL&","&COLRED&" or "&COLYEL&"("&COLRED&" or "&COLYEL&")"&COLRED&" or an operator" ,{text}) ,pos+i ) end if i+=1 end while if not int32(sign*val) then src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32 bit signed integer. Integers must be between -2147483648 and 2147483647",{text}),pos+starti) end if return({i,text}) end function --------------------------------------------------------------------------- --Clean the rest of a line for seek_include after 'include' occurs function lexer_read_include_line(sequence tokens, sequence s, integer i, srcpos pos) integer at sequence temp if not equal(tokens[1][CMD_TEXT],"include") then src_error(COLYEL&"include"&COLRED&" must be the only statement on the line. Maybe you tried to use this reserved keyword illegally?",tokens[$][CMD_POS]) end if i+=1 s=s[i..$] at=find('\n',s) if at then s=s[1..at-1] end if at=find('#',s) if at then s=s[1..at-1] end if if length(s) then while s[1]=' ' or s[1]='\t' do s=s[2..length(s)] i+=1 end while s=trim_whitespace(s) if s[1]='"' then temp=lexer_read_string(s,2,length(s),pos+i-1) s=temp[2] --so we ignore everything after the string.... ugh end if tokens=append(tokens,{s,pos+i-1}) end if if length(tokens)=1 then src_error("Expected name of file to follow "&COLYEL&"include"&COLRED,tokens[1][CMD_POS]) end if if length(tokens)>2 then src_error(sprintf_utf("Found unexpected syntax "&COLYEL&"%s"&COLRED&" instead of filename to include. Syntax of an include statement is "&COLYEL&"include, filename"&COLRED&" possibly with a path",{tokens[2][CMD_TEXT]}),tokens[2][CMD_POS]) end if return(tokens) end function --------------------------------------------------------------------------- -- s is the text for the whole file, the interval i to stop is a single line, -- and pos is the srcpos for the beginning of the file function lex_line(sequence s,integer i,integer stop,srcpos pos) integer state sequence tokens sequence ident --identifier text being built up sequence temp sequence masked integer oldi integer found integer textstart --the column at which this identifier starts, otherwise 0 sequence remem_ident integer remem_textstart integer nonnumeral --whether we are definitely inside an identifier, ie. a digit doesn't indicate a number integer backtracked ident="" nonnumeral=false textstart=0 tokens={} while i<=stop do if s[i]<256 then state=lexer_table[s[i]+1] else state=LEX_CHAR end if --color_print("lexing i=%d %d=%s state %d nonnum=%d ident=%s\n",{i,s[i],{s[i]},state,nonnumeral,ident}) if state=LEX_CHAR then ident=ident & toLower(s[i]) if textstart=0 then textstart=i end if nonnumeral=true elsif state=LEX_SPACE then elsif state=LEX_DIGIT then if nonnumeral then ident=ident & s[i] else if length(ident) then --either ident is "-" or it is "" i=textstart ident="" textstart=0 end if temp=lexer_read_number(s,i,stop,pos) tokens=append(tokens,{temp[2],pos+i}) i=temp[1] --nonnumeral remains false end if elsif state=LEX_WARN then ident=ident & toLower(s[i]) if textstart=0 then textstart=i end if nonnumeral=true src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is reserved and not allowed in names (identifiers)!", {s[i]}), pos+i) else remem_ident=ident --for backtracking remem_textstart=textstart backtracked=false if length(ident) then tokens=append(tokens,{ident,pos+textstart}) end if ident="" nonnumeral=false textstart=0 if state=LEX_BEGIN then tokens=append(tokens,{"begin",pos+i}) elsif state=LEX_END then tokens=append(tokens,{"end",pos+i}) elsif state=LEX_COMMA then elsif state=LEX_COMMENT then exit elsif state=LEX_STRING then temp=lexer_read_string(s,i+1,stop,pos) tokens=append(tokens,{temp[2],pos+i}) i=temp[1] elsif state=LEX_KEYWORD then --the following mess is in order to allow whitespace in the middle of separators textstart=i masked=s[i..i] --we only support length 1 and 2 separators while i length(s) found=false for j=1 to length(separator_list) do if match(separator_list[j],masked)=1 then tokens=append(tokens,{separator_list[j],pos+textstart}) if length(separator_list[j])=1 then i=textstart end if found=true exit end if end for if found=false then --need to backtrack, in particular for - --color_print("backtracking on '%s'\n", {masked}) backtracked=true ident=remem_ident & masked[1] oldi=i+1 i=textstart if length(remem_ident) then tokens=tokens[1..$-1] textstart=remem_textstart nonnumeral=true elsif equal(ident,"-") then --nonnumeral remains false: we've seen exactly "-" else --a keyword character followed by garbage src_error( sprintf_utf("Expected to see the rest of an operator (like "&COLYEL&":="&COLRED&") but found "&COLYEL&"%s"&COLRED&" instead",{{s[oldi]}}), pos+oldi) end if else textstart=0 end if elsif state=LEX_BINARY then lexer_binary_error(pos+i) end if if backtracked=false then if equal(remem_ident,"include") then --include is a special case, as normal lexing rules don't apply for rest of line if length(ident) then simple_error("compiler bug!\n") end if return lexer_read_include_line(tokens,s,i,pos) end if end if end if i+=1 end while if length(ident) then --ought to check whether ident is 'include'... tokens=append(tokens,{ident,pos+textstart}) end if return(tokens) end function --------------------------------------------------------------------------- --this also checks for rogue strings and $'s function translate_plotstrings(sequence tokens) integer state integer start_token integer i sequence string_func sequence string_token srcpos pos state=0 i=1 while i<=length(tokens) do if compare(tokens[i][CMD_TEXT],"$")=0 then pos=tokens[i][CMD_POS] if state=0 then state=1 start_token=i else --jump to error throw state=1 exit end if end if if state=2 then if tokens[i][CMD_TEXT][1]='"' then string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1]) string_token={sprintf("@$string%d",{length(string_list)}),pos} tokens=tokens[1..start_token-1]&{string_func,{"begin",pos}}&tokens[start_token+1..i-2]&{string_token,{"end",pos}}&tokens[i+1..$] state=0 --may have multiple strings on one line else --we saw a + (or illegal =) inside the string number expression state=1 end if end if if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then string_func={"setstringfromtable",pos} state=2 elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then string_func={"appendstringfromtable",pos} state=2 end if i+=1 end while if state!=0 then --we've seen a surplus $, not right src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos) end if return(tokens) end function --------------------------------------------------------------------------- --Given a command tree, get the original text from the file --OK, this function is horribly complicated just so tracevalue can print --"hero X (me)" instead of "herox(me)". It could be implemented much simpler if --macros were expanded in the lexer, but then compile_commands and --convert_macros would have to be as well function tree_original_text(sequence tree) sequence script_toks sequence subtree srcpos leftmost, rightmost integer left_at, right_at integer filenum integer left_point, right_point integer depth sequence tokens sequence text script_toks=script_cmd[cur_script_num][4] --find leftmost and rightmost tokens making up the expression, not including brackets leftmost=tree[TREE_TRUNK][CMD_POS] subtree=tree while length(subtree[TREE_BRANCHES]) do subtree=subtree[TREE_BRANCHES][1] if subtree[TREE_TRUNK][CMD_POS]32767 then src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos) n=32767 elsif n<-32768 then src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos) n=-32768 end if return(n) end function --------------------------------------------------------------------------- function try_string_to_number(sequence s) atom result result=floor(string_to_object(s[CMD_TEXT],0)) if not string_is_int32(s[CMD_TEXT]) then src_error(sprintf_utf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS]) end if return(result) end function --------------------------------------------------------------------------- function enforce_constants(sequence s) --enforces both constants object v v=alpha_tree_data(constant_list,s,{}) if length(v) then v=v[CONST_VALUE] if int32(v) then return(sprintf("%d",{v})) end if end if return(s) end function --------------------------------------------------------------------------- function get_cmd() sequence result if get_cmd_pointer>length(cmd) then src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS]) end if result=cmd[get_cmd_pointer] get_cmd_pointer+=1 result[CMD_TEXT]=enforce_constants(result[CMD_TEXT]) return(result) end function --------------------------------------------------------------------------- function get_cmd_no_constants() sequence result if get_cmd_pointer>length(cmd) then src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS]) end if result=cmd[get_cmd_pointer] get_cmd_pointer+=1 result[CMD_TEXT]=result[CMD_TEXT] return(result) end function --------------------------------------------------------------------------- function get_cmd_block(integer convert_constants) sequence this sequence result result={} this=get_cmd() if compare("begin",this[CMD_TEXT])!=0 then src_error(sprintf_utf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS]) end if while true do if convert_constants then this=get_cmd() else this=get_cmd_no_constants() end if if compare("begin",this[CMD_TEXT])=0 then src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS]) elsif compare("end",this[CMD_TEXT])=0 then exit--break out of the while else result=append(result,this) end if end while return(result) end function --------------------------------------------------------------------------- procedure parse_constant_block(sequence block) atom num sequence name for i=1 to length(block) by 2 do if i+1>length(block) then src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS]) end if num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]}) check_undefined_constant(block[i+1],num) name=block[i+1][CMD_TEXT] constant_list=alpha_tree_insert(constant_list,name,{num,block[i+1][CMD_POS]}) reserved=alpha_tree_insert(reserved,name,RESERVE_CONSTANT) end for end procedure --------------------------------------------------------------------------- procedure parse_trigger_block(sequence block) integer num sequence name for i=1 to length(block) by 2 do if i+1>length(block) then src_error("Expected name but script trigger definition block ended",block[i][CMD_POS]) end if num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS]) check_undefined_string(block[i+1],"script trigger name") name=block[i+1][CMD_TEXT] trigger_list=append(trigger_list,{num,name}) reserved=alpha_tree_insert(reserved,name,RESERVE_CORE) end for end procedure --------------------------------------------------------------------------- procedure parse_version_block(sequence block) sequence req_version if length(block)<2 then --Don't have a srcpos simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (empty 'plotscr version' block)") end if plotscr_version=block[1][CMD_TEXT] --version ID of this plotscr.hsd version (equal to the version of HSpeak at the time) req_version=block[2][CMD_TEXT] --the minimum version of HSpeak which supports this plotscr.hsd --trim quote marks if plotscr_version[1]!='"' or req_version[1]!='"' then simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (expected strings in 'plotscr version')") end if plotscr_version=plotscr_version[2..$-1] req_version=req_version[2..$-1] if compare(plotscr_version, MIN_PLOTSCR_VERSION) < 0 then simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the latest version.") end if --printf(stdout,"'%s' '%s'\n", {req_version, COMPILER_VERSION&COMPILER_SUB_VERSION}) if compare(req_version, COMPILER_VERSION&COMPILER_SUB_VERSION) > 0 then simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from a newer version of the OHRRPGCE than supported by this version of HSpeak. Please use the latest version of HSpeak.") end if end procedure --------------------------------------------------------------------------- procedure create_global(integer id,sequence name,srcpos pos) integer at at=find(id,global_list[PAIR_NUM]) if at then src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos) else if id>=0 and id<=MAXGLOBAL then global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id) global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name) global_list[GLB_POS]=append(global_list[GLB_POS],pos) reserved=alpha_tree_insert(reserved,name,RESERVE_GLOBAL) else src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to %d",{id,MAXGLOBAL}),pos) end if end if end procedure --------------------------------------------------------------------------- procedure parse_global_block(sequence block) integer num for i=1 to length(block) by 2 do if i+1>length(block) then src_error("expected name but globalvariable block ended",block[i][CMD_POS]) end if num=try_string_to_number(block[i]) check_undefined_string(block[i+1],"global variable name") create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS]) end for end procedure --------------------------------------------------------------------------- procedure parse_operator_block(sequence block) integer num sequence name,true for i=1 to length(block) by 3 do if i+2>length(block) then src_error("expected name but defineoperator block ended",block[i][CMD_POS]) end if num=try_string_to_number(block[i]) mustnt_be_a_number(block[i+1]) mustnt_be_a_number(block[i+2]) name=block[i+1][CMD_TEXT] true=block[i+2][CMD_TEXT] operator_list=append(operator_list,{num,name,true,block[i+2][CMD_POS]}) reserved=alpha_tree_insert(reserved,name,RESERVE_OPERATOR) end for end procedure --------------------------------------------------------------------------- --arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to --VAR_ARGS indicating a variable (unlimited) number of arguments. function create_function(sequence list,integer id,sequence name,object arglist,integer func_type,srcpos pos) integer at sequence encoded if func_type=RESERVE_SCRIPT then at=find(id,column(list,PAIR_NUM)) if at then src_error(sprintf_utf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{RESERVE_NAMES[func_type],id,list[at][PAIR_NAME]}),pos) end if if id=0 then src_error(sprintf_utf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos) elsif id<0 then if length(reuse_ids) then encoded=encode_ohr(name) if length(encoded)>36 then encoded=encoded[1..36] end if at=find_in_column(encoded,reuse_ids,PAIR_NAME) if at then id=reuse_ids[at][PAIR_NUM] end if end if if id<0 then id=autonumber_id loop do autonumber_id-=1 id=autonumber_id until find_in_column(id,reuse_ids,PAIR_NUM)=0 end loop end if end if end if list=append(list,{id,name,arglist,pos}) reserved=alpha_tree_insert(reserved,name,func_type) return(list) end function --------------------------------------------------------------------------- function parse_define_block(sequence block,sequence list,integer func_type) integer num sequence name integer args sequence arglist srcpos name_pos integer i i=1 while i<=length(block) do num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS]) if i+1>length(block) then src_error(sprintf_utf("expected %s name but define block ended",{RESERVE_NAMES[func_type]}),block[i][CMD_POS]) else i+=1 check_undefined_string(block[i],RESERVE_NAMES[func_type]&" name") name=block[i][CMD_TEXT] name_pos=block[i][CMD_POS] if i+1>length(block) then src_error("expected argument count but define block ended",block[i][CMD_POS]) else i+=1 args=try_string_to_number(block[i]) arglist={} if args<0 then list=create_function(list,num,name,VAR_ARGS,func_type,name_pos) else for j=1 to args do if i+1>length(block) then src_error("expected argument default but define block ended",block[i][CMD_POS]) else i+=1 arglist=append(arglist,try_string_to_number(block[i])) end if end for list=create_function(list,num,name,arglist,func_type,name_pos) end if i+=1 end if end if end while return(list) end function --------------------------------------------------------------------------- procedure primary_parse_pass() sequence this enter_timing_zone("Preliminary pass") color_print("preliminary pass\n",{}) get_cmd_pointer=1 while get_cmd_pointer<=length(cmd) do --read a top-level command this=get_cmd() if equal("defineconstant",this[CMD_TEXT]) then parse_constant_block(get_cmd_block(false)) elsif equal("definetrigger",this[CMD_TEXT]) then parse_trigger_block(get_cmd_block(false)) elsif equal("plotscrversion",this[CMD_TEXT]) then parse_version_block(get_cmd_block(false)) end if end while if equal(plotscr_version,"") then simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the latest version.") end if if find_in_column("script",trigger_list,PAIR_NUM)=0 then trigger_list=append(trigger_list,{0,"script"}) end if exit_timing_zone() end procedure --------------------------------------------------------------------------- procedure parse_script(sequence trigger) sequence name sequence arglist sequence s sequence this sequence err_string integer depth integer temp name=get_cmd() if not find_in_column(name[CMD_TEXT],script_list,PAIR_NAME) then --Doesn't appear in a definescript block; if it did the name is already reserved --Note that we won't add it to script_list yet; that happens later in check_script_declarations check_undefined_string(name,"user script name") end if current_script=name[CMD_TEXT] arglist={} while true do if get_cmd_pointer>length(cmd) then src_error(sprintf_utf("script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]}),name[CMD_POS]) end if this=get_cmd() if compare("begin",this[CMD_TEXT])=0 then exit--break the while end if --Note that arglist isn't the actual list of arguments, it's just a string of tokens right now. --The actual arglist is built in process_arglist, while compiling each script. arglist=append(arglist,this) end while --every script is nested inside a big fat do() block s={{"do",this[CMD_POS]}} depth=0 while true do s=append(s,this) if compare("end",this[CMD_TEXT])=0 then depth-=1 if depth=0 then exit--break while end if elsif compare("begin",this[CMD_TEXT])=0 then depth+=1 else temp=alpha_tree_data(reserved,this[CMD_TEXT],3) if temp<=RESERVE_UNIMPLEMENTED then err_string=sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",{RESERVE_NAMES[alpha_tree_data(reserved,this[CMD_TEXT],0)],this[CMD_TEXT]} ) if temp=RESERVE_CORE then err_string&=sprintf_utf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{name[CMD_TEXT]}) end if src_error(err_string,this[CMD_POS]) end if end if if get_cmd_pointer>length(cmd) then src_error( sprintf_utf( "script "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED ,{name[CMD_TEXT]} ) ,name[CMD_POS] ) end if this=get_cmd() end while script_cmd=append(script_cmd,{trigger,name,arglist,s}) current_script="" end procedure --------------------------------------------------------------------------- procedure parse_top_level() sequence this sequence ignore sequence triggers enter_timing_zone("Top level pass") color_print("parsing top-level\n",{}) triggers=column(trigger_list,PAIR_NAME) get_cmd_pointer=1 while get_cmd_pointer<=length(cmd) do --read a top-level command this=get_cmd() if compare("defineconstant",this[CMD_TEXT])=0 then ignore=get_cmd_block(true) elsif compare("definetrigger",this[CMD_TEXT])=0 then ignore=get_cmd_block(true) elsif compare("globalvariable",this[CMD_TEXT])=0 then parse_global_block(get_cmd_block(true)) elsif compare("defineoperator",this[CMD_TEXT])=0 then parse_operator_block(get_cmd_block(true)) elsif compare("definefunction",this[CMD_TEXT])=0 then function_list=parse_define_block(get_cmd_block(true),function_list,RESERVE_FUNCTION) elsif compare("definescript",this[CMD_TEXT])=0 then script_list=parse_define_block(get_cmd_block(true),script_list,RESERVE_SCRIPT) elsif compare("plotscrversion",this[CMD_TEXT])=0 then --ignore get_cmd_block(false) elsif find(this[CMD_TEXT],triggers)>0 then parse_script(this) else check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration") src_error( sprintf_utf( "Expected top-level declaration but found "&COLYEL&"%s"&COLRED ,{this[CMD_TEXT]} ) ,this[CMD_POS] ) end if end while cmd={} exit_timing_zone() end procedure --------------------------------------------------------------------------- procedure dump_script_and_function_info(integer fh,sequence list) sequence this sequence id_string sequence src_position for i=1 to length(list) do this=list[i] if this[PAIR_NUM]>autonumber_id then id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]}) else id_string=sprintf("ID=%d",{this[PAIR_NUM]}) end if src_position=decode_srcpos(this[FUNC_POS]) print_utf(fh,"%s %d\t%s\t%s(",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,id_string ,this[PAIR_NAME] }) if equal(this[FUNC_ARGS],VAR_ARGS) then print_utf(fh,"VARIABLE ARGS",{}) else for j=1 to length(this[FUNC_ARGS]) do if j>1 then print_utf(fh,",",{}) end if if equal(this[FUNC_ARGS][j],NO_DEFAULT) then print_utf(fh,"NONE",{}) else print_utf(fh,"%d",{this[FUNC_ARGS][j]}) end if end for end if print_utf(fh,")\n",{}) end for end procedure --------------------------------------------------------------------------- function seek_string_by_id(integer id,sequence list,sequence name) integer at at=find_in_column(id,list,PAIR_NUM) if at then return(list[at][PAIR_NAME]) else simple_error(sprintf_utf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id})) end if return("") end function --------------------------------------------------------------------------- function name_lookup(sequence pair) integer at if pair[1]=KIND_NUMBER then return(sprintf("%d",{pair[2]})) elsif pair[1]=KIND_LOCAL then return(sprintf("local%d",{pair[2]})) elsif pair[1]=KIND_GLOBAL then at=find(pair[2],global_list[PAIR_NUM]) if at then return(global_list[PAIR_NAME][at]) else simple_error(sprintf_utf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]})) end if elsif pair[1]=KIND_FLOW then return(seek_string_by_id(pair[2],flow_list,"flow control structure")) elsif pair[1]=KIND_SCRIPT then return(seek_string_by_id(pair[2],script_list,"user script")) elsif pair[1]=KIND_FUNCTION then return(seek_string_by_id(pair[2],function_list,"hardcoded function")) elsif pair[1]=KIND_MATH then return(seek_string_by_id(pair[2],math_list,"built-in function")) else simple_error(sprintf_utf("decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]})) end if end function --------------------------------------------------------------------------- function binstring_to_int(sequence encoded) --bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles atom temp temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000 if and_bits(temp,#80000000) then return(temp-#100000000) end if return(temp) end function --------------------------------------------------------------------------- function read_word(sequence encoded) --opposite of output_word integer temp temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100 if and_bits(temp,#8000) then return(temp-#10000) end if return(temp) end function --------------------------------------------------------------------------- function dump_script_binary(sequence bin,integer offset,integer depth) sequence result sequence kind_and_id integer kind integer argcount integer new_offset result="" kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])} kind=kind_and_id[1] result&=sprintf_utf("%s%s",{ repeat(' ',depth) --indent ,name_lookup(kind_and_id) }) if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11]) if argcount then result&="(\n" for i=0 to argcount-1 do new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3]) result&=dump_script_binary(bin,new_offset,depth+2) end for result&=repeat(' ',depth)&")\n" else result&="()\n" end if else result&="\n" end if return(result) end function --------------------------------------------------------------------------- function dump_script_tree(sequence tree,integer depth) sequence result result="" for i=1 to length(tree) do result&=sprintf_utf("%s%s",{ repeat(' ',depth)--indent ,tree[i][TREE_TRUNK][CMD_TEXT] }) if length(tree[i][TREE_BRANCHES])>0 then result&="(\n" result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2) result&=repeat(' ',depth)&")\n" else result&="\n" end if end for return(result) end function --------------------------------------------------------------------------- function dump_script_strings(sequence bin) sequence result integer table_start integer offset integer len result="" table_start=binstring_to_int(bin[9..12]) if table_start=0 then return("") end if bin=bin[table_start+1..$] offset=1 while offset1 then debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt") else debug_file="hs_debug.txt" end if fh=open(debug_file,"w") if fh!=failure then wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file}) ------------------------------------- print_utf(fh,"[Scripts]\n",{}) dump_script_and_function_info(fh,script_list) print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Global Variables]\n",{}) for i=1 to length(global_list[PAIR_NUM]) do src_position=decode_srcpos(global_list[GLB_POS][i]) print_utf(fh,"%s %d\tID=%d\t%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,global_list[PAIR_NUM][i] ,global_list[PAIR_NAME][i] }) end for print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Builtin Functions]\n",{}) dump_script_and_function_info(fh,function_list) print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Operators]\n",{}) for i=1 to length(operator_list) do src_position=decode_srcpos(operator_list[i][OPER_POS]) print_utf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,operator_list[i][PAIR_NAME] ,operator_list[i][OPER_TRUENAME] ,operator_list[i][PAIR_NUM] }) end for print_utf(fh,"\n",{}) ------------------------------------- print_utf(fh,"[Script Dumps]\n",{}) for i=1 to length(all_scripts) do src_position=decode_srcpos(all_scripts[i][4]) print_utf(fh,"%s %d\tID=%d\t%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,all_scripts[i][1] ,all_scripts[i][3] }) src_position=decode_srcpos(script_cmd[i][1][CMD_POS]) print_utf(fh,"%s %d\tTrigger=%d\t%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,all_scripts[i][2] ,script_cmd[i][1][CMD_TEXT] }) for j=1 to length(all_scripts[i][6]) do src_position=decode_srcpos(all_scripts[i][6][j][CMD_POS]) print_utf(fh,"%s %d\tvar=%s\n",{ src_position[POS_FILENAME] ,src_position[POS_LINE] ,all_scripts[i][6][j][CMD_TEXT] }) end for print_utf(fh,"%d bytes compiled\n",{length(all_scripts[i][7])}) -- print_utf(fh,"%s\n\n",{dump_script_tree(all_scripts[i][5],0)}) print_utf(fh,"%s",{dump_script_binary(all_scripts[i][7][CODE_START_BYTE_OFFSET+1..$],0,0)}) print_utf(fh,"%s\n\n",{dump_script_strings(all_scripts[i][7])}) end for print_utf(fh,"\n",{}) ------------------------------------- close(fh) else wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file}) end if end if end procedure --------------------------------------------------------------------------- function get_cmd_depth(integer ptr,sequence data,integer depth) sequence result sequence this result={} while true do --if get_key()=27 then abort(1/0) end if if ptr>length(data) then src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS]) end if this=data[ptr] ptr+=1 if compare("end",this[CMD_TEXT])=0 then depth-=1 elsif compare("begin",this[CMD_TEXT])=0 then depth+=1 end if if depth=0 then exit --break out of the while else result=append(result,this) end if end while return({ptr,result}) end function --------------------------------------------------------------------------- --identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis function what_kind_and_id(sequence command,sequence local_vars) integer kind atom id integer keyword sequence s s=command[CMD_TEXT] keyword=alpha_tree_data(reserved,s,0) if string_is_int32(s) then kind=KIND_NUMBER id=string_to_object(s,{}) elsif length(s) and s[1] = '@' then kind=KIND_REFERENCE id=0 -- ID always resolves to 0 for references here, since it is too early to know all -- script IDs. The real work is done in binary_compile_recurse elsif find_in_column(s,local_vars,CMD_TEXT) then kind=KIND_LOCAL id=find_in_column(s,local_vars,CMD_TEXT) elsif keyword=RESERVE_GLOBAL then kind=KIND_GLOBAL id=global_list[PAIR_NUM][find(s,global_list[PAIR_NAME])] elsif keyword=RESERVE_FLOW then kind=KIND_FLOW id=flow_list[find_in_column(s,flow_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_FUNCTION then kind=KIND_FUNCTION id=function_list[find_in_column(s,function_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_SCRIPT then kind=KIND_SCRIPT id=script_list[find_in_column(s,script_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_BUILTIN then kind=KIND_MATH id=math_list[find_in_column(s,math_list,PAIR_NAME)][PAIR_NUM] elsif keyword=RESERVE_MACRO then kind=KIND_MACRO id=0 elsif length(s) and s[1] = '"' then src_error( sprintf_utf( "The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct", {shorten_string(s,15)}), command[CMD_POS] ) else src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS]) end if return({kind,id}) end function --------------------------------------------------------------------------- --identify the kind of a text command function what_kind(sequence command,sequence local_vars, integer look_for_operators) integer kind integer keyword sequence s s=command[CMD_TEXT] keyword=alpha_tree_data(reserved,s,0) if string_is_int32(s) then kind=KIND_NUMBER elsif length(s) and s[1] = '@' then kind=KIND_REFERENCE elsif find_in_column(s,local_vars,CMD_TEXT) then kind=KIND_LOCAL elsif length(s)=0 then kind=KIND_PARENS elsif look_for_operators and find_in_column(s,operator_list,PAIR_NAME) then kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name elsif keyword=RESERVE_GLOBAL then kind=KIND_GLOBAL elsif keyword=RESERVE_FLOW or keyword=RESERVE_BEGIN or keyword=RESERVE_END then kind=KIND_FLOW elsif keyword=RESERVE_FUNCTION then kind=KIND_FUNCTION elsif keyword=RESERVE_SCRIPT then kind=KIND_SCRIPT elsif keyword=RESERVE_BUILTIN then kind=KIND_MATH elsif keyword=RESERVE_MACRO then kind=KIND_MACRO elsif length(s) and s[1] = '"' then src_error( sprintf_utf( "The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct", {shorten_string(s,15)}), command[CMD_POS] ) else src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS]) end if return(kind) end function --------------------------------------------------------------------------- --Check whether a math operator is not an assignment, a compare operator, or 'random' function is_simple_math_op(integer id) return (id >= 1 and id <= 9) or id >= 19 end function --------------------------------------------------------------------------- --this function not used anywhere function how_many_args(sequence name,integer kind) integer result integer at if kind=KIND_PARENS then result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P elsif kind=KIND_FLOW then result=-1 --flow supports an unknown number of args elsif kind=KIND_OPERATOR then result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions elsif kind=KIND_MATH then at=find_in_column(name[CMD_TEXT],math_list,PAIR_NAME) result=length(math_list[at][FUNC_ARGS]) elsif kind=KIND_FUNCTION then at=find_in_column(name[CMD_TEXT],function_list,PAIR_NAME) if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then result=-1 else result=length(function_list[at][FUNC_ARGS]) end if elsif kind=KIND_SCRIPT then at=find_in_column(name[CMD_TEXT],script_list,PAIR_NAME) result=length(script_list[at][FUNC_ARGS]) else --numbers, variables, etc do not permit args result=0 end if return(result) end function --------------------------------------------------------------------------- function takes_args(integer kind) if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO then return(true) else return(false) end if end function --------------------------------------------------------------------------- function get_script_cmd(integer ptr,sequence data,sequence vars) sequence command sequence this sequence after integer kind after={} command=data[ptr] ptr+=1 if compare("end",command[CMD_TEXT])=0 then src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS]) elsif compare("begin",command[CMD_TEXT])=0 then --floaty brackets for order-of-operations-enforcement ptr-=1 command[CMD_TEXT]="" elsif compare("variable",command[CMD_TEXT])=0 then --must ignore variable declaration if ptr<=length(data) then --there is room for args ptr+=1--only increment the pointer when we have args after=get_cmd_depth(ptr,data,1) ptr=after[1] --this is a hack, because we cannot say {n,n}=func() end if --non-command return({ptr,{}}) end if if ptr<=length(data) then --there is room for args kind=what_kind(command,vars,true) --distinguishing between functions with and without args means wait() would be ok but noop() would not if takes_args(kind) then this=data[ptr] if compare("begin",this[CMD_TEXT])=0 then --yes, it has args ptr+=1--only increment the pointer when we have args after=get_cmd_depth(ptr,data,1) ptr=after[1] --this is a hack, because we cannot say {n,n}=func() after=after[2] if length(after)=0 and kind=KIND_PARENS then src_error("found empty parentheses not associated with a function call",command[CMD_POS]) end if end if end if end if return({ptr,{command,after}}) end function --------------------------------------------------------------------------- function compile_commands(sequence script_data,sequence vars) integer ptr sequence this sequence command sequence result result={} ptr=1 while true do this=get_script_cmd(ptr,script_data,vars) ptr=this[1] command=this[2] if length(command)>0 then if length(command[2])>0 then --this command has arguments that need parsing command[2]=compile_commands(command[2],vars) end if result=append(result,command) end if if ptr>length(script_data) then exit --break out of while when there is no more data end if end while return(result) end function --------------------------------------------------------------------------- function gather_local_vars(sequence vars,sequence data) sequence this integer at integer ptr sequence src_position ptr=1 while true do this=data[ptr] ptr+=1 if compare("variable",this[CMD_TEXT])=0 then if ptr>length(data) then src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS]) end if this=data[ptr] ptr+=1 if compare("begin",this[CMD_TEXT])=0 then while true do if ptr>length(data) then src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS]) end if this=data[ptr] ptr+=1 if compare("end",this[CMD_TEXT])=0 then exit--break the while end if check_undefined_string(this,"local variable name") at=find_in_column(this[CMD_TEXT],vars,CMD_TEXT) if at then src_position=decode_srcpos(vars[at][CMD_POS]) src_error( sprintf_utf( "local variable/argument "&COLYEL&"%s"&COLRED&" is already defined in line %d of "&COLPNK&"%s"&COLRED, {this[CMD_TEXT],src_position[POS_LINE],src_position[POS_FILENAME]} ) ,this[CMD_POS] ) else vars=append(vars,this) end if end while else src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS]) end if end if if ptr>length(data) then exit --break out of the while end if end while return(vars) end function --------------------------------------------------------------------------- --there are only two, hardcoded, macros right now: tracevalue, assert function expand_macros(sequence tree) sequence newtree sequence newargs sequence new_string sequence string_token sequence src_position sequence temp srcpos pos integer ptr ptr=1 while ptr<=length(tree) do --recurse first, because tree[ptr] might be split up (as in 'assert') if length(tree[ptr][TREE_BRANCHES]) then tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES]) end if if equal("tracevalue",tree[ptr][TREE_TRUNK][CMD_TEXT]) then newargs={} tree[ptr][TREE_TRUNK][CMD_TEXT]="tracevalueinternal" pos=tree[ptr][TREE_TRUNK][CMD_POS] for i=1 to length(tree[ptr][TREE_BRANCHES]) do new_string=tree_original_text(tree[ptr][TREE_BRANCHES][i]) string_list=append(string_list,new_string) string_token={sprintf("@$string%d",{length(string_list)}),pos} newargs&={{string_token,{}},tree[ptr][TREE_BRANCHES][i]} end for tree[ptr][TREE_BRANCHES]=newargs end if if equal("assert",tree[ptr][TREE_TRUNK][CMD_TEXT]) then --assert(condition) --> if (not(condition)) then ($assert expression string="condition", assertfailure) pos=tree[ptr][TREE_TRUNK][CMD_POS] src_position=decode_srcpos(pos) if length(tree[ptr][TREE_BRANCHES])!=1 then src_error(sprintf_utf(COLYEL&"assert"&COLRED&" statement takes exactly one argument: a condition. It has %d",{length(tree[ptr][TREE_BRANCHES])}),pos) end if newtree={{{"if",pos},{}}, {{"then",pos},{}}} newtree[1][TREE_BRANCHES]={{{"not",pos},{tree[ptr][TREE_BRANCHES][1]}}} new_string=sprintf_utf("%s:%d: %s",{src_position[POS_FILENAME],src_position[POS_LINE],tree_original_text(tree[ptr][TREE_BRANCHES][1])}) string_list=append(string_list,new_string) string_token={sprintf("@$string%d",{length(string_list)}),pos} temp={{{"setstringfromtable",pos},{}}, {{"assertfailure",pos},{}}} temp[1][TREE_BRANCHES]={{{enforce_constants("assertexpressionstring"),pos},{}},{string_token,{}}} --stringnum, string newtree[2][TREE_BRANCHES]=temp tree=replace_slice(tree,ptr,ptr,newtree) --tree[ptr..ptr]=newtree ptr+=1 end if ptr+=1 end while return(tree) end function --------------------------------------------------------------------------- --tree[ptr] is a partially constructed if node (lacking else child), followed either by an else or elseif node. --Handle else/elseif, parenting to tree[pt], and return the new tree. function build_else_node(sequence tree, integer ptr) sequence elseargs sequence nextcmd integer eptr srcpos pos nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT] pos=tree[ptr+1][TREE_TRUNK][CMD_POS] if equal("else",nextcmd) then --found else tree[ptr][TREE_BRANCHES]&={tree[ptr+1]} tree=delete_element(tree,ptr+1) else -- equal("elseif",nextcmd) --Split off the if elseargs={ {{"if",pos},tree[ptr+1][TREE_BRANCHES]} } --Grab the rest of this if,elseif,...,else chain, to put inside the else eptr=ptr+2 while eptr<=length(tree) do nextcmd=tree[eptr][TREE_TRUNK][CMD_TEXT] --Stop when we see an else but otherwise we don't check correctness until we recurse if equal(nextcmd,"else") then eptr+=1 exit elsif find(nextcmd,{"elseif","then"}) then --continue else exit end if eptr+=1 end while elseargs&=tree[ptr+2..eptr-1] tree=tree[1..ptr]&tree[eptr..$] tree[ptr][TREE_BRANCHES]&={ {{"else",pos},elseargs} } end if return tree end function --------------------------------------------------------------------------- --Build switch AST nodes, return new tree function build_switch_node(sequence tree,sequence vars,integer ptr) srcpos pos srcpos argpos sequence args sequence thiscmd sequence rawargs sequence caseargs sequence casecmd sequence lastnodename sequence collected sequence temp integer have_else srcpos elsepos integer expect_case integer kind integer id sequence else_error_msg else_error_msg=COLYEL&"else"&COLRED&" case should be placed last inside a "&COLYEL&"switch"&COLRED&" block" pos=tree[ptr][TREE_TRUNK][CMD_POS] if length(tree[ptr][TREE_BRANCHES])>1 then src_error(sprintf_utf( COLYEL&"switch"&COLRED&" statement has %d expressions. It should have only one." ,{length(tree[ptr][TREE_BRANCHES])} ),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS]) elsif length(tree[ptr][TREE_BRANCHES])=0 then src_error(COLYEL&"switch"&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED,pos) end if if ptr>=length(tree) then src_error(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,pos) end if thiscmd=tree[ptr+1][TREE_TRUNK] if equal("do",thiscmd[CMD_TEXT])=0 then src_error(sprintf_utf(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{thiscmd[CMD_TEXT]}),thiscmd[CMD_POS]) end if --found do, processing starts here (build arguments to switch, after the expression) args=tree[ptr][TREE_BRANCHES] --start with the condition, length 1 rawargs=tree[ptr+1][TREE_BRANCHES] --contents of the 'do' lastnodename="" have_else=false expect_case=false collected={} for j=1 to length(rawargs) do thiscmd=rawargs[j][TREE_TRUNK] argpos=thiscmd[CMD_POS] if j=1 and equal("case",thiscmd[CMD_TEXT])=0 then src_error(sprintf_utf(COLYEL&"switch() do("&COLRED&" should be followed with a "&COLYEL&"case"&COLRED&", not with "&COLYEL&"%s"&COLRED&"." ,{thiscmd[CMD_TEXT]}),argpos) end if --to resolve the ambiguity of an else, we always suppose that they belong to an 'if' (or elseif) if possible if equal("else",thiscmd[CMD_TEXT]) and find(lastnodename,{"if","then","elseif"})=0 then --Old syntax else(...) block if length(collected) then args&={collected} collected={} end if if have_else then src_error(else_error_msg,elsepos) end if expect_case=false if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then --Last case is new-style syntax and wasn't followed by any statements --Lets just disallow this to prevent a little confusion src_error(COLYEL&"case(...) else(...)"&COLRED&" is ambiguous. "& "Write either "&COLYEL&"case(..., else) ..."&COLRED&" or "&COLYEL&"case(...) do() else(...)"&COLRED&" as fits your intentions." ,rawargs[j-1][TREE_TRUNK][CMD_POS]) end if if j!=length(rawargs) then if length(rawargs[j][TREE_BRANCHES])=0 then else_error_msg&=". Did you forget to enclose the else block with brackets, like "&COLYEL&"else(...)"&COLRED&"?" end if src_error(else_error_msg,argpos) end if --convert it to a do args&={ {{"do",argpos},rawargs[j][TREE_BRANCHES]} } elsepos=argpos have_else=true elsif equal("case",thiscmd[CMD_TEXT]) then if length(collected) then args&={collected} collected={} end if if have_else then --Note we disallow case(else) case(1) but allow case(else,1) src_error(else_error_msg,elsepos) end if expect_case=false caseargs=rawargs[j][TREE_BRANCHES] --Call a couple passes early here, so that constant expressions are allowed caseargs=collapse_floaty_brackets(caseargs) caseargs=optimize_script(caseargs,vars) --Check contents are only either integers, or "else", and append to args for k=1 to length(caseargs) do casecmd=caseargs[k][TREE_TRUNK] if equal(casecmd[CMD_TEXT],"else") then if have_else then src_error(COLYEL&"else"&COLRED&" appears multiple times inside this "&COLYEL&"switch"&COLRED&" block",casecmd[CMD_POS]) end if have_else=true elsepos=argpos --don't emit anything: next do is the 'else' one else temp=what_kind_and_id(casecmd,vars) kind=temp[1] id=temp[2] if kind=KIND_FLOW or (kind=KIND_MATH and is_simple_math_op(id)=false) then src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is not allowed inside a "&COLYEL&"case"&COLRED ,{casecmd[CMD_TEXT]}),casecmd[CMD_POS]) end if args&={caseargs[k]} end if end for else --Disallow "case() do() garbage" if expect_case then src_error(sprintf_utf("Expected "&COLYEL&"case"&COLRED&" or "&COLYEL&"else"&COLRED&" to follow "&COLYEL&"case(...) do(...)"&COLRED&" but found "&COLYEL&"%s"&COLRED,{thiscmd[CMD_TEXT]}),argpos) end if if length(collected)=0 then --Start collecting into a do. But if it is a do (old syntax), don't double encapsulate it if equal("do",thiscmd[CMD_TEXT]) then args&={rawargs[j]} expect_case=true else collected={{"do",argpos},{rawargs[j]}} end if else --Continue collecting collected[TREE_BRANCHES]&={rawargs[j]} end if end if lastnodename=thiscmd[CMD_TEXT] end for if length(collected) then args&={collected} end if if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then --Last case (possibly an else) is new-style syntax and wasn't followed by any statements, --so add the missing do. Don't need to do this, but I'd like to keep things tidy --(srcpos is of the last case) args&={ {{"do",argpos},{}} } end if if have_else=false then --Insert a dummy do default block since else has been left out args&={ {{"do",pos},{}} } end if tree[ptr][TREE_BRANCHES]=args --delete the do block return delete_element(tree,ptr+1) end function --------------------------------------------------------------------------- --parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements function normalize_flow_control(sequence tree,sequence vars,sequence parent) integer ptr sequence s sequence nextcmd srcpos pos integer argkind integer var_at ptr=1 while ptr<=length(tree) do s=tree[ptr][TREE_TRUNK][CMD_TEXT] pos=tree[ptr][TREE_TRUNK][CMD_POS] if compare("if",s)=0 then if length(tree[ptr][TREE_BRANCHES])>1 then src_error(sprintf_utf( COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" for complex conditions" ,{length(tree[ptr][TREE_BRANCHES])} ),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS]) elsif length(tree[ptr][TREE_BRANCHES])=0 then src_error(sprintf_utf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos) end if if ptr1 then src_error(sprintf_utf( COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" for complex conditions" ,{length(tree[ptr][TREE_BRANCHES])} ),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS]) elsif length(tree[ptr][TREE_BRANCHES])=0 then src_error(sprintf_utf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos) end if if ptr4 then src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(tree[ptr][TREE_BRANCHES])}),tree[ptr][TREE_BRANCHES][5][TREE_TRUNK][CMD_POS]) elsif length(tree[ptr][TREE_BRANCHES])=3 then --append default step value tree[ptr][TREE_BRANCHES]&={ {{"1",tree[ptr][TREE_TRUNK][CMD_POS]},{}} } end if argkind=what_kind(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK],vars,true) if argkind=KIND_LOCAL then --translate into a numeric reference to a variable used_locals=append(used_locals,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]) var_at=find_in_column(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],vars,CMD_TEXT) tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at}) elsif argkind=KIND_GLOBAL then --warn, then translate into a numeric reference to a variable src_warn(sprintf_utf( "Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop" ,{tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]} ),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS]) var_at=find(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME]) tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]}) else --only variables allowed as the first argument of a "for" src_error( sprintf_utf("first argument of "&COLYEL&"for"&COLRED&" statement must be a variable, not %s "&COLYEL&"%s"&COLRED,{ KIND_LONGNAMES[argkind] ,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT] }) ,tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS] ) end if if ptr1 then src_error(sprintf_utf( COLYEL&s&COLRED&" statement has %d arguments. It should have only one." ,{length(tree[ptr][TREE_BRANCHES])} ),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS]) elsif length(tree[ptr][TREE_BRANCHES])=0 then src_error(sprintf_utf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit script"&COLRED,{}),pos) end if elsif compare("break",s)=0 or compare("continue",s)=0 then if length(tree[ptr][TREE_BRANCHES])>1 then src_error(sprintf_utf( COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one." ,{length(tree[ptr][TREE_BRANCHES])} ),tree[ptr][TREE_BRANCHES][2][TREE_TRUNK][CMD_POS]) elsif length(tree[ptr][TREE_BRANCHES])=0 then --append default value tree[ptr][TREE_BRANCHES]={{{"1",tree[ptr][TREE_TRUNK][CMD_POS]},{}}} end if elsif compare("exitscript",s)=0 then if length(tree[ptr][TREE_BRANCHES])>0 then src_error(sprintf_utf( COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit returning"&COLRED ,{length(tree[ptr][TREE_BRANCHES])} ),tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_POS]) end if elsif compare("switch",s)=0 then tree=build_switch_node(tree,vars,ptr) elsif compare("case",s)=0 then src_error(COLYEL&"case"&COLRED&" is not allowed outside of "&COLYEL&"switch"&COLRED,pos) end if tree[ptr][TREE_BRANCHES]=normalize_flow_control(tree[ptr][TREE_BRANCHES],vars,tree[ptr][TREE_TRUNK][CMD_TEXT]) ptr+=1 end while return(tree) end function --------------------------------------------------------------------------- --A modification of Dijkstra's shunting-yard algorithm --(This could probably subsume compile_commands too) function convert_operators(sequence tree) sequence new_tree sequence stack sequence opnode integer ptr integer at integer priority integer nonoperator_before stack={} new_tree={} ptr=1 nonoperator_before=false while ptr<=length(tree) do --for each peer branch on the tree at=find_in_column(tree[ptr][TREE_TRUNK][CMD_TEXT],operator_list,PAIR_NAME) if at>0 then if nonoperator_before=false then --there is no room for the before-operand. Bad! src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its left-side operand (did not expect to see an operator here)",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS]) elsif ptr=length(tree) then --there is no room for the after-operand. Bad! src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its right-side operand",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS]) end if priority=operator_list[at][PAIR_NUM] --convert the operator to its true function name (which might be the same) tree[ptr][TREE_TRUNK][CMD_TEXT]=operator_list[at][OPER_TRUENAME] while length(stack) and priority>=stack[$][1] do --pop the operator from the stack and "apply" it to the tree opnode=stack[$][2] stack=stack[1..$-1] opnode[TREE_BRANCHES]=new_tree[$-1..$] new_tree=new_tree[1..$-2]&{opnode} end while stack&={{priority,tree[ptr]}} nonoperator_before=false else if nonoperator_before then --Two non-ops in a row means moved onto the new argument in this argument list, so finish processing operators first while length(stack) do --pop the operator from the stack and "apply" it to the tree opnode=stack[$][2] stack=stack[1..$-1] opnode[TREE_BRANCHES]=new_tree[$-1..$] new_tree=new_tree[1..$-2]&{opnode} end while end if tree[ptr][TREE_BRANCHES]=convert_operators(tree[ptr][TREE_BRANCHES]) new_tree&={tree[ptr]} nonoperator_before=true end if ptr+=1 end while --copy of above while length(stack) do --pop the operator from the stack and "apply" it to the tree opnode=stack[$][2] stack=stack[1..$-1] opnode[TREE_BRANCHES]=new_tree[$-1..$] new_tree=new_tree[1..$-2]&{opnode} end while return new_tree end function --------------------------------------------------------------------------- function fix_arguments(sequence tree,integer kind,sequence list,sequence vars) integer at,var_at integer argkind integer argnum,maxargs argnum=length(tree[TREE_BRANCHES]) at=find_in_column(tree[TREE_TRUNK][CMD_TEXT],list,PAIR_NAME) if equal(list[at][FUNC_ARGS],VAR_ARGS) then return(tree[TREE_BRANCHES]) else maxargs=length(list[at][FUNC_ARGS]) end if if maxargs < argnum then src_error(sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" takes at most %d arguments but is being passed %d arguments" ,{KIND_LONGNAMES[kind],tree[TREE_TRUNK][CMD_TEXT],maxargs,argnum} ),tree[TREE_BRANCHES][maxargs+1][TREE_TRUNK][CMD_POS]) elsif maxargs > argnum then --add defaults if not enough args are present if kind=KIND_MATH then --special processing for math if list[at][PAIR_NUM]<16 or list[at][PAIR_NUM]>=19 then --math shouldnt have defaults src_error(sprintf_utf( "math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d" ,{tree[TREE_TRUNK][CMD_TEXT],argnum,maxargs} ),tree[TREE_TRUNK][CMD_POS]) else --variable assignment commands can have defaults if argnum = 0 then --no defaults for first argument of variable function src_error(sprintf_utf( "variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1" ,{tree[TREE_TRUNK][CMD_TEXT],argnum} ),tree[TREE_TRUNK][CMD_POS]) elsif argnum = 1 then --make defaults for second arg of variable function if list[at][PAIR_NUM]=16 then --setvariable tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{ {"0",tree[TREE_TRUNK][CMD_POS]} ,{} }) else --increment and decrement tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{ {"1",tree[TREE_TRUNK][CMD_POS]} ,{} }) end if end if end if else --normal processing for script and function for i=argnum+1 to maxargs do if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then src_error(sprintf_utf( "%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d" ,{KIND_LONGNAMES[kind],tree[TREE_TRUNK][CMD_TEXT],i} ),tree[TREE_TRUNK][CMD_POS]) end if tree[TREE_BRANCHES]=append(tree[TREE_BRANCHES],{ {sprintf("%d",{list[at][FUNC_ARGS][i]}),tree[TREE_TRUNK][CMD_POS]} ,{} }) end for end if end if --this is as good a time as any to make sure that var manipulation functions point to real variables if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<19 then argkind=what_kind(tree[TREE_BRANCHES][1][TREE_TRUNK],vars,false) if argkind=KIND_LOCAL then --its local. translate it to a numeric reference var_at=find_in_column(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],vars,CMD_TEXT) tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("-%d",{var_at}) elsif argkind=KIND_GLOBAL then --its global. translate it to a numeric reference var_at=find(tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],global_list[PAIR_NAME]) tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]=sprintf("%d",{global_list[PAIR_NUM][var_at]}) else --its not a variable. bad! src_error(sprintf_utf( "first argument of variable manipulation function "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED ,{tree[TREE_TRUNK][CMD_TEXT],KIND_LONGNAMES[argkind],tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT]} ),tree[TREE_BRANCHES][1][TREE_TRUNK][CMD_POS]) end if end if return(tree[TREE_BRANCHES]) end function --------------------------------------------------------------------------- function normalize_arguments(sequence tree,sequence vars) integer kind --unlike the flow normalization and operator translation we do not insert/delete elements from the current level, so we can use a "for" safely, and dont need a "while" for i=1 to length(tree) do kind=what_kind(tree[i][TREE_TRUNK],vars,false) if kind=KIND_SCRIPT then tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,script_list,vars) elsif kind=KIND_FUNCTION then tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,function_list,vars) elsif kind=KIND_MATH then tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,math_list,vars) end if --number, flow, global, local, parens need no argchecking if length(tree[i][TREE_BRANCHES]) then --if there are sub-arguments, recurse tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars) end if end for return(tree) end function --------------------------------------------------------------------------- --returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order function output_word(integer n) integer b1,b2 b1=and_bits(n,#FF) b2=floor(and_bits(n,#FFFF)/256) return({b1,b2}) end function --------------------------------------------------------------------------- function convert_to_bytes(sequence s) sequence result result={} for i=1 to length(s) do result&=int_to_bytes(s[i]) end for return(result) end function --------------------------------------------------------------------------- --Encode a unicode (UTF32/native Euphoria) string in the format the engine --requires, which is currently Latin-1 function encode_ohr(sequence string, integer maxbytes=0) for i=1 to length(string) do if string[i]>=256 then string[i]='?' end if end for if maxbytes and maxbytes7 and equal("$string",str_temp[1..7]) then value_temp=value(str_temp[8..length(str_temp)]) result&=KIND_NUMBER result&=table_string(value_temp[2]) else src_error(sprintf_utf("reference "&COLYEL&"@%s"&COLRED&" could not be resolved: must be the name of a global variable or a script",{str_temp}),tree[TREE_TRUNK][CMD_POS]) end if end if end if elsif kind=KIND_GLOBAL then at=find(s,global_list[PAIR_NAME]) result&=kind result&=global_list[PAIR_NUM][at] elsif kind=KIND_LOCAL then at=find_in_column(s,vars,CMD_TEXT) result&=kind result&=at-1 elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then if kind=KIND_SCRIPT then at=find_in_column(s,script_list,PAIR_NAME) id=script_list[at][PAIR_NUM] elsif kind=KIND_FUNCTION then at=find_in_column(s,function_list,PAIR_NAME) id=function_list[at][PAIR_NUM] if max_used_function= initial_data_len then can_reference=false end if result[3+i]=recurse_result end for --Now either remove or replace the temporary stuff if can_reference then compiled_data=compiled_data[1..initial_data_len] else compiled_data[initial_data_len+1 .. initial_data_len+length(result)]=result return(initial_data_len) end if else src_error(sprintf_utf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),tree[TREE_TRUNK][CMD_POS]) end if if not fast_mode and can_reference then at=match(result,compiled_data) if at>0 then if initial_data_len!=length(compiled_data) then simple_error("compiler bug! Unexpected compiled_data length") end if --found existing data exactly like this command, so just return a reference to it if at=1 then simple_error("compiler bug! node matches root node") end if return(at-1) end if end if --append data directly compiled_data&=result return(initial_data_len) end function --------------------------------------------------------------------------- function binary_compile(integer id,sequence tree,sequence vars) sequence result integer at integer recurse_ret string_table={} compiled_data={} --binary data is in a mix of 16-bit and 32-bit signed words. --the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit. --output header --the first word is the zero-rooted byte-offset of the first executable code byte result=output_word(CODE_START_BYTE_OFFSET) --the second word is the number of local variables result&=output_word(length(vars)) --the third word is the number of arguments the script takes (also in SCRIPTS.TXT) at=find_in_column(id,script_list,PAIR_NUM) result&=output_word(length(script_list[at][FUNC_ARGS])) --the fourth word is the format of the command data (presently used to indicate 32-bit encoding) result&=output_word(SCRIPT_FORMAT_VERSION) --the fifth&sixth words are a 32bit pointer to the string literal table (in bytes), we don't know it yet result&=output_word(0) result&=output_word(0) --what follows is command data in the format [kindID,Value,argcount,argpointerlist] --numbers and variables have no argcount or argpointerlist --an argpointer is the zero-rooted word-offset of the argument relative --to the start of the executable commands. I realise that this format is --unnecessarily complicated. I had hoped to get benefits of being able to --store frequently reused commands only once and then just point to them, --but in actual practice, it isnt worth the trouble, since the only --commands that tend to be redundant are the really short ones. --the first command is always a "do". there can be only one top-level command if length(tree)!=1 then simple_error(sprintf_utf("compiler bug! script tree has %s root nodes",{length(tree)})) end if reenter_timing_zone("binary_compile_recurse") recurse_ret=binary_compile_recurse(tree[1],vars) if recurse_ret!=0 then simple_error(sprintf_utf("compiler bug! Root binary_compile_recurse call returned %d",{recurse_ret})) end if exit_timing_zone() result&=convert_to_bytes(compiled_data) compiled_data={} --append with a table of string literals and give its offset if length(string_table)>0 then result[9..12]=int_to_bytes(length(result)) result&=string_table end if return(result) end function --------------------------------------------------------------------------- --floaty brackets are un-needed after the operators have been translated. function collapse_floaty_brackets(sequence tree) integer i sequence graft i=1 while i<=length(tree) do if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then --found a floaty-bracket graft=tree[i][TREE_BRANCHES] if length(graft)!=1 then src_error(sprintf_utf("Found unexpected pair of brackets containing %d expressions. Either the brackets should not exist at all, or should only enclose a single expression",{length(graft)}),tree[i][TREE_TRUNK][CMD_POS]) end if tree[i]=graft[1] --delete_element(tree,i) --tree=insert_sequence(tree,graft,i) else if length(tree[i][TREE_BRANCHES]) then tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES]) end if i+=1 end if end while return(tree) end function --------------------------------------------------------------------------- function sanity_check(sequence tree,sequence vars,sequence parent) sequence s sequence kind_and_id integer kind atom id for i=1 to length(tree) do s=tree[i][TREE_TRUNK][CMD_TEXT] kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars) kind=kind_and_id[1] id=kind_and_id[2] if (compare("if",parent)=0) and i=1 then if kind=KIND_NUMBER then if id then src_warn(sprintf_utf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),tree[i][TREE_TRUNK][CMD_POS]) else src_warn("Condition is always false",tree[i][TREE_TRUNK][CMD_POS]) end if elsif kind=KIND_FLOW then src_warn(sprintf_utf("Should not use flow control command "&COLYEL&"%s"&COLRED&" as condition for if",{s}),tree[i][TREE_TRUNK][CMD_POS]) end if elsif compare("do",parent)=0 or compare("then",parent)=0 or compare("else",parent)=0 then if kind=KIND_NUMBER then src_warn(sprintf_utf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here." ,{id}),tree[i][TREE_TRUNK][CMD_POS]) elsif kind=KIND_GLOBAL then src_warn(sprintf_utf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here." ,{s}),tree[i][TREE_TRUNK][CMD_POS]) elsif kind=KIND_LOCAL then src_warn(sprintf_utf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here." ,{vars[id][CMD_TEXT]}),tree[i][TREE_TRUNK][CMD_POS]) elsif kind=KIND_MATH and (id<=15 or id>=19) then src_warn(sprintf_utf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded" ,{s}),tree[i][TREE_TRUNK][CMD_POS]) end if end if if kind=KIND_GLOBAL then if not find(s,used_globals) then used_globals=append(used_globals,s) end if elsif kind=KIND_LOCAL then if not find(s,used_locals) then used_locals=append(used_locals,s) end if end if if length(tree[i][TREE_BRANCHES]) then --if there are sub-arguments, recurse tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT]) end if end for return(tree) end function --------------------------------------------------------------------------- --simulate a result overflowing a signed 32 bit register function overflow_int32(atom val) --maybe check whether val is so large that this is could be inaccurate? atom ret ret=mod(val,#100000000) if ret>2147483647 then ret-=#100000000 end if return(ret) end function --------------------------------------------------------------------------- function optimized_arg(sequence tree,sequence vars) sequence kind_and_id integer kind atom id object arg1,arg2 kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars) kind=kind_and_id[1] id=kind_and_id[2] if kind=KIND_NUMBER then return(id) elsif kind=KIND_MATH and (id<=15 or id>=19) then arg1=optimized_arg(tree[TREE_BRANCHES][1],vars) if equal(arg1,{}) then arg1=tree[TREE_BRANCHES][1] end if if length(tree[TREE_BRANCHES])>=2 then arg2=optimized_arg(tree[TREE_BRANCHES][2],vars) else arg2=0 end if if equal(arg2,{}) then arg2=tree[TREE_BRANCHES][2] end if if atom(arg1) and atom(arg2) then if id=0 then --random if arg1=arg2 then return(arg1) end if elsif id=1 then --exponent if arg2>0 then --if too large, Euphoria will signal an overflow, but I'm also worried --about overflow_int32 giving inaccurate results if log(abs(arg1)+1)+log(abs(arg2)+1)arg2) elsif id=14 then --lessthanorequalto return(arg1<=arg2) elsif id=15 then --greaterthanorequalto return(arg1>=arg2) elsif id=19 then --not return(arg1=0) elsif id=22 then --logxor return((arg1=0)!=(arg2=0)) elsif id=23 then --abs return(overflow_int32(abs(arg1))) elsif id=24 then --sign return((arg1>0)-(arg1<0)) elsif id=25 then --sqrt if arg1<0 then src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS]) end if return(floor(sqrt(arg1)+0.5)) end if end if --have to allow for side effects of shortcut evaluating operators by only checking first arg if atom(arg1) then if id=4 then --multiply if arg1=0 then return 0 elsif arg1=1 then return arg2 end if elsif id=6 then --add if arg1=0 then return arg2 end if elsif id=20 then --logand if arg1=0 then return 0 end if elsif id=21 then --logor if arg1!=0 then return 1 end if end if end if if atom(arg2) then if id=4 then --multiply if arg2=0 then return 0 elsif arg2=1 then return arg1 end if elsif id=5 then --subtract if arg2=0 then return arg1 end if elsif id=6 then --add if arg2=0 then return arg1 end if end if end if end if return({}) --return nonatom end function --------------------------------------------------------------------------- --goes through a script simplifying expressions that always have the same value function optimize_script(sequence tree,sequence vars) object arg for i=1 to length(tree) do arg=optimized_arg(tree[i],vars) if atom(arg) then tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg) tree[i][TREE_BRANCHES]={} elsif sequence(arg) and length(arg) then tree[i]=arg end if if length(tree[i][TREE_BRANCHES]) then --if there are sub-arguments, recurse tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars) end if end for return(tree) end function --------------------------------------------------------------------------- procedure warn_unused_locals(sequence vars) if find('u',optlist) then for i=1 to length(vars) do if not find(vars[i][CMD_TEXT],used_locals) then src_warn(sprintf_utf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS]) end if end for end if end procedure --------------------------------------------------------------------------- procedure warn_unused_globals() integer at if find('u',optlist) then for i=1 to length(global_list[PAIR_NUM]) do at=find(global_list[PAIR_NAME][i],used_globals) if not at then src_warn(sprintf_utf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],global_list[PAIR_NUM][i]}),global_list[GLB_POS][i]) end if end for end if end procedure --------------------------------------------------------------------------- procedure compile_a_script(integer id,sequence trigger_data,sequence name_data,sequence arg_data,sequence script_data) sequence script_tree sequence local_vars sequence binary integer trigger trigger=trigger_list[find(trigger_data[CMD_TEXT],column(trigger_list,PAIR_NAME))][PAIR_NUM] current_script=name_data[CMD_TEXT] local_vars=arg_data --start with argument names (so we can check for conflicts) reenter_timing_zone("gather_local_vars") local_vars=gather_local_vars(local_vars,script_data) exit_timing_zone() used_locals={} reenter_timing_zone("compile_commands") script_tree=compile_commands(script_data,local_vars) exit_timing_zone() reenter_timing_zone("convert_operators") script_tree=convert_operators(script_tree) exit_timing_zone() reenter_timing_zone("expand_macros") script_tree=expand_macros(script_tree) exit_timing_zone() reenter_timing_zone("normalize_flow_control") script_tree=normalize_flow_control(script_tree,local_vars,"") exit_timing_zone() reenter_timing_zone("normalize_arguments") script_tree=normalize_arguments(script_tree,local_vars) exit_timing_zone() reenter_timing_zone("collapse_floaty_brackets") script_tree=collapse_floaty_brackets(script_tree) exit_timing_zone() if not fast_mode then reenter_timing_zone("optimize_script") script_tree=optimize_script(script_tree,local_vars) exit_timing_zone() reenter_timing_zone("sanity_check") script_tree=sanity_check(script_tree,local_vars,"") exit_timing_zone() warn_unused_locals(local_vars) end if reenter_timing_zone("binary_compile") binary=binary_compile(id,script_tree,local_vars) exit_timing_zone() all_scripts=append(all_scripts,{ id --id ,trigger --trigger type ,name_data[CMD_TEXT] --name ,name_data[CMD_POS] --source line ,script_tree ,local_vars ,binary --compiled data to go into the HSZ lumps }) current_script="" end procedure --------------------------------------------------------------------------- --Parse argument declarations in a script argument list function process_arglist(sequence args) sequence arglist sequence defaults integer i integer using_defaults arglist={} defaults={} using_defaults=false i=1 while i<=length(args) do if compare("=",args[i][CMD_TEXT])=0 then src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS]) end if check_undefined_string(args[i],"argument name") if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then src_error(sprintf_utf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS]) end if if i2048 then simple_warn(sprintf_utf("Maximum function id was %d, but will not export function names above id 2047",{records-1})) records=2048 end if --header size in bytes (offset to location table) result=output_word(6) --file format version result&=output_word(0) --number of records in location table result&=output_word(records) offset=length(result)+records*2 id=0 i=1 encoded_names={} while i<=length(function_list) do if function_list[i][PAIR_NUM]0 then temp=without_extension(fname) --you can't slice a function result directly! :( fname=sprintf("%s~%d.%s",{temp[1..$-1],suffix,extension_only(fname)}) end if --color_print("writing %d %s\n",{i,fname}) if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then simple_error("unable to write a script source lump") end if end for end procedure --------------------------------------------------------------------------- procedure write_output_file() sequence header integer fh object lh enter_timing_zone("Writing output file") header=sprintf("HamsterSpeak\n%s\n%d\n%s\n%d\n%d\n",{COMPILER_VERSION & COMPILER_SUB_VERSION,HSP_FORMAT_VERSION,plotscr_version,SCRIPT_FORMAT_VERSION,max_used_function}) if length(all_scripts) then fh=open(dest_file,"wb") if fh!=-1 then wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file}) --write header and version if write_lump(fh,"HS",header)=false then simple_error("unable to write header") end if --write script index (old file) if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then simple_error("unable to write script index") end if --write script index (new file) if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then simple_error("unable to write binary script index") end if --write script commands list if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then simple_error("unable to write commands listing") end if --write each script for i=1 to length(all_scripts) do if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][1]}),all_scripts[i][7])=false then simple_error(sprintf_utf("unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][3]})) end if end for if not find('n',optlist) then --no debug info --write copy of scripts wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file}) lh=begin_lump(fh,"source.lumped") if equal(lh,false) then simple_error("unable to write scripts source file") end if write_script_files(fh) if end_lump(lh)=false then simple_error("unable to finish write of scripts source file") end if end if close(fh) else simple_error(sprintf_utf("attempt to open"&COLYEL&"%s"&COLRED&" failed",{dest_file})) end if else color_print("no scripts to output\n",{}) end if exit_timing_zone() end procedure --------------------------------------------------------------------------- init() load_and_lex() show_source_info() primary_parse_pass() parse_top_level() compile_each_script() dump_debug_report() write_output_file() run_time=time()-start_time color_print("done (%g seconds)\n",{run_time}) if find('t',optlist) then print_timing_data() end if opt_wait_for_key() if was_warnings = true then abort(2) end if