-- HamsterSpeak Compiler
-- (C) Copyright 2001-2024 James Paige, Ralph Versteegen, and the OHRRPGCE Developers
-- Dual licensed under the GNU GPL v2+ and MIT Licenses. Read LICENSE.txt for terms and disclaimer of liability.
-- This is a compiler for HamsterSpeak scripts used for plotscripting in
-- the O.H.R.RPG.C.E. For more info, visit http://HamsterRepublic.com
-- This code is written in Euphoria 4.0. You can get the Free Open-Source
-- Euphoria compiler from http://OpenEuphoria.com . I also highly recommend
-- David Cuny's EE editor
---------------------------------------------------------------------------
--Changelog
--3W 2024-02-06 Add srcpos debug info, local var names, and srcfiles.txt
--3Vb 2024-02-06 Disallow local/subscript or script/global with the same name
-- 2024-01-11 Fix lexing of identifiers ending in : & | or -
--3Va 2022-04-08 Add exit
--3V 2022-04-07 Add showvalueof, and tracevalueof alias
--3Ug 2021-12-12 Change license to Dual GPL+MIT (with consent of all contributors)
--3Uf 2020-02-29 Accept non-breaking space as whitespace
--3Ue 2019-09-09 Windows: fix double-inclusion detection
--3Ud 2019-07-12 Windows: double-include detection now case-insensitive
--3Uc 2019-03-25 Enforce at most 32 args allowed
--3Ub 2018-11-17 Fix some error messages
--3Ua 2018-08-18 Script can also have attributes
--3U 2018-08-12 @obsolete attribute
--3Tl 2018-05-14 Delete (possibly-bad) .hs output file on error
--3Tk 2018-03-18 Optimise binary_compile; puts nodes in opposite order
--3Tj 2018-03-18 Fix some number and - lexing bugs
--3Ti 2018-03-17 More compiler global_scope optimisations
--3Th 2018-03-15 Optimize compiler, rewriting global_scope lookup
--3Tg 2017-12-31 Fix crash due to missing KIND_LONGNAMES[KIND_KEYWORD]
--3Tf 2017-11-08 Specialise assert(x==y) to use _asserteq
--3Te 2017-10-29 Look for plotscr.hsd/scancode.hsi first in global dirs
--3Td 2017-10-07 Add parent ID to scripts.bin, for --reuse-ids
--3Tc 2017-10-02 Make -j wait for warnings too
--3Tb 2017-09-28 Added -j option; warn on unknown options
--3Ta 2017-07-28 Increased MAXGLOBAL to 50000
-- 2017-06-13 0x, 0o and 0b number prefixes
--3T 2017-06-11 Allow subscript vars/args to shadow outer locals
--3Se 2017-05-17 Disallow extra commas
--3Sd 2017-03-17 --incdir option; check ../share/games/ohrrpgce/ instead
--3Sc 2016-04-11 Check /usr/share/games/ohrrpgce/ for include files
--3Sb 2014-11-11 Better 'include' line lexing & garbage detection
--3Sa 2014-08-23 Fix printing of a lexer error message
--3S 2014-07-22 Automatically include plotscr.hsd and ignore
-- double inclusion; added --include and -b
--3Rg 2013-02-23 Fix a subscript related crash
--3Rf 2013-02-17 Fix several 31-bit int overflows
--3Re 2014-02-17 Trivial change to math builtin warnings
----- 2013-11-09 Prevent misuse of betweenable-operators in
-- declarations, and allow constants for global ids
--3Rc 2013-10-11 subscript implementation changes (add nonlocal kind)
--3Rb 2013-10-10 Remove variable() later, for syntax checking
--3Ra 2013-10-09 Fixed \r lineendings (AGAIN) and slow \r\n handling
--3R 2013-10-05 Add subscript
--3Qc 2013-10-04 Disallow flow statements used as expressions
--3Qb 2013-07-24 Require begin after then, do, etc. (else excluded)
--3Qa 2013-04-11 Warn when plotscr.hsd not included
----- 2013-02-28 Allow non-constant expressions in case blocks again
--3Pe 2013-01-03 Faster compile times
--3Pd 2012-10-24 Fix \x escape code
--3Pc 2012-10-09 Change srcpos encoding to remove source size limits
--3Pb 2012-10-08 \x escape code
--3Pa 2012-08-24 --reuse-ids option
--3P 2012-08-18 Change way include is lexed: quotes no longer needed
-- [], {}, . now totally disallowed
-- Add < and > operators
-- Add plotscrversion block and new HS header lump
-- New switch syntax: do after case optional, case(else)
-- Added elseif
--3Of 2012-08-18 Lexer bugfix
--3Oe 2012-04-11 Add \n and \t escape codes
--3Od 2012-03-03 Change directory search order slightly
--3Oc 2012-02-07 Increased MAXGLOBAL to 16383
--3Ob 2012-02-07 Make the max script global number set by a constant
--3Oa 2012-01-20 Fix lexer allowing newlines in strings
--3O 2012-01-19 Unicode support
--3Nd 2011-10-16 Fix commands.bin generation bug
--3Nc 2011-08-28 Additional expression optimisations
--3Nb 2011-08-28 Fix overflow_int32 and bitwise operator overflow
--3Na 2011-07-25 Add -x option for Hamster Whisper's benefit
--3N 2011-07-12 Add assert
--3Ma 2010-10-21 Correctly strip \r to fix newlines ... again.
--3M 2010-05-02 abs, sign, sqrt; fix error reporting
--3L 2010-04-29 tracevalue support, involving a few internal changes
--3Ke 2010-04-18 Add -t commandline option
--3Kd 2010-04-14 Quite a few bugfixes in optimizer
--3Kc 2010-04-14 Start checking well-formedness of numbers with lexer
--3Kb 2010-04-12 Whole lot of bugfixes in script declarations and
-- checking for unreserved identifier names
--3Ka 2010-04-12 Throw error if source file contains binary
--3K 2010-04-12 Error column reporting (introducing srcpos); lump
-- original source files into source.lumped
--3Ja 2010-04-07 Wrote a (nearly) proper lexer
--3J 2009-12-05 Multiple function names may now alias the same id
--3I 2009-07-27 Export a backup copy of full source to source.txt lump
-- disable with -n option
--3H 2009-06-18 Export function names to commands.bin lump
--3Gk 2009-05-25 Fix short references crash
--3Gj 2009-01-27 Show future-compatability warning when using square
-- brackets [] and curly brackets {} in names
--3Gi 2008-08-03 Disallow orphaned then() and else(), a common mistake
--3Gh 2008-07-14 Disable warnings for unused variables by default, and
-- Add -u option to enable warnings for unused variables.
--3Gg 2008-03-01 Raise global variable limits
--3Gf 2008-02-11 Give user another chance on missing Y key :P
--3Ge 2007-07-01 String conversion rewrite to allow more whitespace
--3Gd 2007-07-01 Allow constants for new syntax default args
--3Gc 2007-05-29 Handle empty paratheses correctly, and use
-- lowercase ".hs" extension
--3Gb 2007-05-09 Throw error on missing include file
--3Ga 2007-05-09 String length not limited to 40
--3G 2007-02-09 Offset to string literal tables given as a 32-bit
-- int, script format version++ to 2.
--3F 2006-12-11 Support for variable number of arguments to functions
-- by giving -1 as the number of args in the define block
--3Ec 2006-11-30 while(true) no longer produces a warning
--3Eb 2006-10-23 Two small fixes to some rare bugs causing crashes
--3Ea 2006-09-03 Actually export triggers in scripts.bin
--3E 2006-08-30 "define trigger" support
--3D 2006-08-16 String literal lookup tables added to end of
-- HSZ lumps replacing setstring + appendstring
--3C 2006-08-15 New syntax for giving default script arguments,
-- and for leaving out definescript
--3B 2006-08-13 HSX lumps renamed to HSZ
--3A 2006-08-10 HamsterSpeak becomes 32-bit. Also, script format
-- version added to HSX header. Current is 1
--2Na 2006-07-07 Minor update to add new logical operators
--2N 2006-07-07 Added not() logic function
--2M 2006-07-06 Set exit code on warnings
--2L 2006-05-13 Added switch statement (+ case keyword)
--2K 2006-05-01 Added @scriptname and @globalvariable syntax to
-- return script or global ID number at compile-time
-- not run-time (for use with "run script by ID" and
-- "read global" and "write global")
--2J 2006-04-10 Added break, continue, exitscript, exitreturning
-- flow statements. Also fixed some return bugs
--2I 2006-04-04 Extended HSX header to include number of arguments
-- to a script, to really fix arguments-overflow-into
-- -locals bug
--2H 2006-03-29 Display better help and wait for keypress when run
-- by double-clicking the icon. Added -k command line
-- option to skip waiting for keypress
--2G 2005-10-03 Additional operators $+ and $=
-- Mention GPL in help text
--2F 2005-07-24 Strings implemented:
-- $id="..." -> setstring
-- $id+"..." -> appendstring
--2E 2005-02-15 Changed license to GPL
-- Added += and -= operators
--2D 2002-08-03 Only a small change, strip out \r from lines of
-- the script as we read them to avoid errors related
-- to busted newlines
--2C 2002-03-05 Fixed some bugs that could cause crashes when
-- non-printable characters exist in the input file.
-- (naturally the script will not compile right, but
-- at least it will not crash)
--2B 2001-06-06 Added := as a commaless separater so it can be
-- defined as an operator
--2A 2001-05-04 Fixed -w command line option when used with -z
--2 First Release
---------------------------------------------------------------------------
without warning --to avoid annoying warnings
without type_check --for a small speed boost
--with profile_time --time profiling
include hsspiffy.e --various routines, sequence manipulation - James Paige
include machine.e --needed for int_to_bytes
include std/math.e --needed for mod
include std/graphics.e --needed for color output
include std/filesys.e
include std/map.e
include euphoria/unicode.e
include euphoria/ucstypes.e
include euphoria/io_with_unicode.e
---------------------------------------------------------------------------
--constants--
constant false=0
constant true=1
-- increment COMPILER_VERSION only for major language-altering changes
constant COMPILER_VERSION="3"
-- For small changes increment the second letter of COMPILER_SUB_VERSION
-- For large changes increment the first letter and reset the second letter
--*Please make sure this is 2 chararacters long! Append a space if required.*
constant COMPILER_SUB_VERSION="W "
constant COPYRIGHT_DATE="2024"
--Known single-character commandline options
constant VALID_OPTIONS="abcdfjknstwuyz"
--This is the oldest version of plotscr.hsd which is supported (feel free to
--increment this whenever using an older plotscr.hsd means you're missing out
--on new but nonessential features; there's no harm)
--*Should be 3 characters long, same captialisation as COMPILER_SUB_VERSION*
constant MIN_PLOTSCR_VERSION="3Tf"
--these constants are color-flags. May add more colors, up to #F8FF
--These values are also passed directly to Hamster Whisper.
--(these codes are at the end of the first private use area)
constant COL_FIRST=#F8E0
constant COLYEL=#F8E0
constant COLRED=#F8E1
constant COLPNK=#F8E2
constant COLWHI=#F8E3
constant COLBWHI=#F8E4
constant COL_LAST=#F8FF
--Map from the above to the color constants in std/graphics.e
--(which incidentally vary from the constants in graphics.e)
constant HS_TO_EU_COLORS={YELLOW, RED, BRIGHT_RED, WHITE, BRIGHT_WHITE}
--These are character classes used in lexer_table
constant LEX_CHAR=0 --Most character which are valid in identifiers. Everything above char 255
constant LEX_SPACE=1 --space and tab
constant LEX_BEGIN=2 --(
constant LEX_END=3 --)
constant LEX_OPERATOR=4 --The first character of an operator
constant LEX_COMMENT=5 --#
constant LEX_STRING=6 --"
constant LEX_COMMA=7 --,
constant LEX_WARN=8 --[]{}.
constant LEX_BINARY=9 --Nonprintable characters
constant LEX_DIGIT=10 --0-9
constant LEX_HEXDIGIT=11 --a-fA-F
constant LEX_OPERATOR2=12 --Only used in lexer_table_key2!!
constant LEX_NEWLINE=13 --\n
constant FILE_NAME=1 --With whatever path was needed to open it
constant FILE_TEXT=2 --Entire contents of the file
constant FILE_LINE_EXTENTS=3 --List of {start,end} pairs; inclusive indices, not including newlines
constant FILE_OFFSET=4 --Used by srcpos's
constant FILE_LUMP=5
--POS_* index sequence returned by decode_srcpos
constant POS_FILENAME=1 --FILE_NAME of the file
constant POS_LINE=2 --1-based line number
constant POS_COLUMN=3 --0-based column number
constant POS_LENGTH=4 --token length in characters (might be 0)
constant POS_LINETEXT=5 --The line containing the token
constant POS_TOKTEXT=6 --The text of the token
constant CMD_TEXT=1
constant CMD_POS=2
--For global_scope
constant SYM_KIND=1 --KIND_*
constant SYM_ID=2 --ID or 0
constant SYM_POS=3 --srcpos or 0
constant PAIR_NUM=1
constant PAIR_NAME=2
constant PAIR_START=1
constant PAIR_END=2
constant OPER_TRUENAME=3
constant OPER_POS=4
--FUNC_* are for elements of math_list, function_list or script_list
--(When changing these, also update create_function() and math_list)
constant FUNC_ID=1 --(aka PAIR_NUM)
constant FUNC_NAME=2 --(aka PAIR_NAME)
constant FUNC_ARGS=3 --Either list of default values (each an integer or NO_DEFAULT), or VAR_ARGS
constant FUNC_ATTRS=4 --List of attributes as a list of {string,TokenList} pairs where the
--TokenList are the arguments. Like a Node except children are Tokens instead of Nodes.
constant FUNC_FLAGS=5 --Bitvector of FLAG_* bits. Also indicates presence of attributes. See attribute_list
constant FUNC_POS=6 --srcpos of start of function/script. Not used by math_list
--ATTR_* are for attribute_list
constant ATTR_NAME=1
constant ATTR_FLAGS=2 --An integer which gets OR'd into FUNC_FLAGS
constant FLAG_OBSOLETE=#01 --Has @obsolete attribute
constant GLB_POS=3 --srcpos of globalvariable definition
constant CONST_VALUE=1
constant CONST_POS=2
constant VAR_FRAME=3 --VarToken index, integer giving frame number of a variable (0 is current script, 1 is enclosing...)
constant VAR_ID=4 --VarToken index, integer giving ID in frame
constant REUSE_PARENT_AND_NAME=2
constant NO_DEFAULT={} --when appears in script_list[FUNC_ARGS], indicates arg with no default value
constant VAR_ARGS=0 --when appears AS function_list[FUNC_ARGS], indicates unlimited num of args
--KIND_* constants are used BOTH to describe the types of raw tokens, and tokens
--which been parsed into Nodes in an AST, although many of them can only appear
--as one or the other.
constant KIND_NUMBER=1 --int32. ID is the value
constant KIND_FLOW=2 --Flow control (flow_list), including begin and end.
constant KIND_GLOBAL=3 --Global variable (global_list)
constant KIND_LOCAL=4 --Local variable
constant KIND_MATH=5 --Math builtin function (math_list)
constant KIND_FUNCTION=6 --Builtin command (function_list)
constant KIND_SCRIPT=7 --Call to script (script_list)
constant KIND_NONLOCAL=8 --Nonlocal variable
--The following never appear in compiled scripts
constant KIND_REFERENCE=9 --Reference to a global or script. Converted to KIND_NUMBER in compiled script
constant KIND_OPERATOR=10 --Infix binary operator created by defineoperator. (operator_list)
--If a name like 'mod' is both operator and something else, it gets added
--to global_scope as the other thing, not KIND_OPERATOR.
constant KIND_PARENS=11 --Node created by extra pair of brackets around an expression
constant KIND_MACRO=12 --Expanded by hspeak, e.g. assert
constant KIND_KEYWORD=13 --Other builtin keywords that can appear in scripts but aren't flow constructs.
--Currently just 'variable' and 'subscript'.
constant KIND_CONSTANT=14 --A constant which hasn't been translated to a KIND_NUMBER yet
constant KIND_SEPARATOR=15 --Auto-delimiting tokens like +, if they haven't been turned into a
--KIND_OPERATOR yet. Can only be used in defineoperator. (separator_list)
constant KIND_ATTRIBUTE=16 --An @attribute
constant KIND_LAST_VALID_IN_SCRIPT=16
constant KIND_TOPLEVEL=17 --Toplevel keyword such as defineconstant, or script trigger, eg. plotscript
constant KIND_UNIMPLEMENTED=18
constant KIND_LAST=18
constant KIND_LONGNAMES={"number"
,"flow control statement"
,"global variable"
,"local variable"
,"built-in function"
,"hard-coded function"
,"script"
,"non-local variable"
,"reference"
,"operator"
,"order-of-operations-enforcing parentheses"
,"hard-coded function/macro"
,"keyword"
,"constant"
,"untranslated operator"
,"attribute declaration"
,"top-level declaration"
,"unimplemented keyword"
,"function attribute"
}
constant nil_sym={0,0,0}
constant TREE_TRUNK=1
constant TREE_BRANCHES=2
constant TIMING_DEPTH=1
constant TIMING_DESCRIPTION=2
constant TIMING_TIME=3
constant TIMING_ACTIVE=4
--Version number for the .hs/.hsp format (not including HSZ files!)
constant HSP_FORMAT_VERSION=1
constant CODE_START_BYTE_OFFSET=32
constant HSZ_FORMAT_VERSION=3
constant MAXGLOBAL=50000 --largest ID
constant MAX_LOCALS=100 --max number of locals per script (not including nonlocals)
constant MAX_ARGS=32 --max number of arguments to a script, function or builting
constant MAX_NEST_DEPTH=4
---------------------------------------------------------------------------
--globals-- --initializations--
string compiler_dir compiler_dir=""
string include_dir include_dir=""
string source_file source_file=""
string dest_file dest_file=""
integer dest_file_fh dest_file_fh=0 --For use by error handlers only!
integer cleanup_dest cleanup_dest=false --Whether to delete dest_file on error
sequence optlist optlist={}
sequence file_list file_list={} --type {FILE_NAME,FILE_TEXT,FILE_LINE_EXTENTS}
sequence additional_includes
additional_includes={}
srcpos max_srcpos max_srcpos=0 --Maximum possible value of a valid srcpos (for typechecking)
srcpos script_position script_position=0 --All srcpos's emitted to a .hsz are relative to this. (Length 0)
integer debug_info debug_info=false
integer total_lines total_lines=0
sequence cmd cmd={} --TokenList (but type checking is very slow)
sequence reuse_ids reuse_ids={} --type {PAIR_NUM, REUSE_PARENT_AND_NAME={parent_id, PAIR_NAME}}
map constant_list constant_list=map:new()
sequence trigger_list trigger_list={} --type {PAIR_NUM, PAIR_NAME}
sequence operator_list operator_list={} --type {PAIR_NUM, PAIR_NAME, OPER_TRUENAME, OPER_POS}
sequence function_list function_list={} --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS, FUNC_ATTRS, FUNC_FLAGS}
sequence global_list global_list={{},{},{}} --NOTE: three lists {PAIR_NUM, PAIR_NAME, GLB_POS}, not a list of globals
sequence string_list string_list={} --list of strings
sequence script_list script_list={} --type {PAIR_NUM, PAIR_NAME, FUNC_ARGS, FUNC_POS, FUNC_ATTRS, FUNC_FLAGS}
--See also math_list, below
--(PAIR_NAME is the full name)
map global_scope global_scope=map:new() --Maps all names in global scope EXCEPT operators to {SYM_KIND, SYM_ID, SYM_POS} tuples
--SCRIPT_* are for all_scripts.
--The number of data fields used is variable, between _SCRIPT_MIN_LAST and _SCRIPT_MAX_LAST
constant SCRIPT_TRIGGER_TOK=1 --Trigger token (type {CMD_TEXT, CMD_POS}), e.g. "script"
constant SCRIPT_NAME=2 --String, not fully-scoped
constant SCRIPT_POS=3 --srcpos of script name token
constant SCRIPT_FULL_NAME=4 --Full-scoped script name
constant SCRIPT_ARG_TOKS=5 --List of argument name tokens (type {CMD_TEXT, CMD_POS})
--Initially the tokens may include default values (like "a=4"), which are later removed
constant SCRIPT_BODY_TOKS=6 --List of tokens (type {CMD_TEXT, CMD_POS}) in the body, of the form do,begin,...,end
constant SCRIPT_PARENT_IDX=7 --Index in all_scripts of parent, or 0 for none
constant SCRIPT_NEST_DEPTH=8 --Nesting depth. A script is 0, subscript 1, sub-subscript 2...
constant SCRIPT_ATTRIBUTES=9 --Temporary store, do not use. These attributes get copied to script_list's FUNC_ATTRS
constant SCRIPT_END_POS=10 --A srcpos of the last token of the script ('end' or ')')
constant _SCRIPT_MIN_LAST=10
--The following are added later
constant SCRIPT_ID=11 --Script ID. Value not appended until all IDs are assigned in check_script_declarations
constant SCRIPT_TRIGGER_ID=12 --trigger ID. SCRIPT_TRIGGER_ID and later not appended until compile_a_script
constant SCRIPT_NUM_NONLOCALS=13
constant SCRIPT_VARIABLES=14 --VarList of locals and nonlocals
constant SCRIPT_AST=15
constant SCRIPT_BINARY=16
constant _SCRIPT_MAX_LAST=16
sequence all_scripts all_scripts={} --list of 'Script's == {SCRIPT_*} (variable length)
atom start_time start_time=time()
atom run_time run_time=0
sequence timing_data timing_data={}
integer timing_depth timing_depth=-1
integer get_cmd_pointer get_cmd_pointer=0
integer autonumber_id autonumber_id=32767
sequence flow_list flow_list={
{0,"do"}
,{1,"begin"}
,{2,"end"}
,{3,"return"}
,{4,"if"}
,{5,"then"}
,{6,"else"}
,{7,"for"}
,{10,"while"}
,{11,"break"}
,{12,"continue"}
,{13,"exitscript"}
,{14,"exitreturning"}
,{15,"switch"}
,{16,"case"} --never appears in compiled script
,{17,"subscript"} --never appears in compiled script
}
--elseif is not a flow type
sequence flow_requiring_brackets --else is excluded, because of case(else)
flow_requiring_brackets={
"do",
"if",
"then",
"elseif",
"for",
"while",
"exitreturning",
"switch",
"case"
}
sequence math_list math_list={
--{FUNC_ID, FUNC_NAME, FUNC_ARGS, FUNC_ATTRS, FUNC_FLAGS} (FUNC_POS omitted)
{0,"random", {0,1}, {}, 0}
,{1,"exponent", {0,2}, {}, 0}
,{2,"modulus", {0,1}, {}, 0}
,{3,"divide", {0,1}, {}, 0}
,{4,"multiply", {0,0}, {}, 0}
,{5,"subtract", {0,0}, {}, 0}
,{6,"add", {0,0}, {}, 0}
,{7,"xor", {0,0}, {}, 0}
,{8,"or", {0,0}, {}, 0}
,{9,"and", {0,0}, {}, 0}
,{10,"equal", {0,0}, {}, 0}
,{11,"notequal", {0,0}, {}, 0}
,{12,"lessthan", {0,0}, {}, 0}
,{13,"greaterthan", {0,0}, {}, 0}
,{14,"lessthanorequalto", {0,0}, {}, 0}
,{15,"greaterthanorequalto", {0,0}, {}, 0}
,{16,"setvariable", {0,0}, {}, 0}
,{17,"increment", {0,1}, {}, 0}
,{18,"decrement", {0,1}, {}, 0}
,{19,"not", {0}, {}, 0}
,{20,"logand", {0,0}, {}, 0}
,{21,"logor", {0,0}, {}, 0}
,{22,"logxor", {0,0}, {}, 0}
,{23,"abs", {0}, {}, 0}
,{24,"sign", {0}, {}, 0}
,{25,"sqrt", {0}, {}, 0}
}
--update is_simple_math_op on adding new operators
sequence attribute_list attribute_list={ --type {ATTR_NAME, ATTR_FLAGS}
{"@obsolete",FLAG_OBSOLETE}
}
sequence separator_list separator_list={
"+=","-=","$+","$=","$","+","--","/","*","^^","^"
,"==","<>",">>","<<","<=",">=",">","<",":=","=","&&","||"
}
sequence lexer_table lexer_table={} --maps from ASCII value+1 (note +1!) to LEX_* constant
sequence lexer_table_key2 lexer_table_key2={}
string hex_chars hex_chars="0123456789abcedfABCDEF"
sequence hex_char_values hex_char_values={0,1,2,3,4,5,6,7,8,9,#a,#b,#c,#e,#d,#f,#A,#B,#C,#D,#E,#F}
sequence compiled_data compiled_data={} --used inside binary_compile only; compiled data for the current script
map node_lookup --used inside binary_compile only; map from serialised nodes to offsets
string current_script current_script="" --name of current script
integer cur_script_num cur_script_num=0 --index into all_scripts of current script
integer colors_enabled colors_enabled=true
integer simple_colorcodes simple_colorcodes=false
integer error_file error_file=false
map used_globals used_globals=map:new() --set of ids (keys are ids, values are true)
map used_locals used_locals=map:new() --set of ids (keys are ids, values are true)
sequence string_table string_table={} --binary data
string plotscr_version plotscr_version=""
integer max_used_function max_used_function=0 --maximum id of a function used in a script
integer fast_mode fast_mode=false
integer end_anchor_kludge end_anchor_kludge=false
integer was_warnings was_warnings=false
---------------------------------------------------------------------------
--types--
--A position in the script source (where newlines are one character).
--Encoded in 32-bit integer, where:
--bits 0..7 : Length of the token in characters, including whitespace
-- (aside from leading/trailing whitespace), capped to 254.
-- (255 reserved for future use)
--bit 8 : Virtual flag
--bits 9..31: The (1-based) character number of the start of the token in the file
-- plus the file's FILE_OFFSET
--
--The first file has FILE_OFFSET=1, to ensure srcpos 0 is never used.
--Offset FILE_OFFSET+0 is reserved to refer to the file as a whole (unused);
--FILE_OFFSET+1 is the start of the file.
--
--See also encode_srcpos and decode_srcpos
type srcpos(object pos)
--need to allow 0 as a dummy value, and ignore max_srcpos if it's uninitialised)
if atom(pos) and pos=floor(pos) and pos>=0 and (max_srcpos=0 or pos<=max_srcpos) then
--odd, 'or' and 'and' only seem to shortcut when used in if condition, not
--in a general expression
return true
else
return false
end if
end type
--Not the same as Euphoria's string type, which is limited to extended ASCII
type string(object str)
if not sequence(str) then
return false
end if
--Too slow
-- for i=1 to length(str) do
-- if not integer(str[i]) or str[i]<0 then
-- return false
-- end if
-- end for
return true
end type
type Kind(integer kind)
return kind>=0 and kind<=KIND_LAST
end type
--SYM_KIND,SYM_ID,SYM_POS tuple
type Symbol(object sym)
return length(sym)=3 and Kind(sym[SYM_KIND]) and srcpos(sym[SYM_POS])
end type
--A CMD_TEXT,CMD_POS pair
type Token(object tok)
if sequence(tok) and length(tok)=2 and string(tok[1]) and srcpos(tok[2]) then
return true
else
return false
end if
end type
--A CMD_TEXT,CMD_POS,VAR_FRAME,VAR_ID tuple describing a local/nonlocal variable
type VarToken(object var)
if length(var)=4 and string(var[1]) and srcpos(var[2]) and integer(var[3]) and integer(var[4]) then
return true
end if
return false
end type
--sequence of Tokens
type TokenList(object list)
if not sequence(list) then
return false
end if
for i=1 to length(list) do
if not Token(list[i]) then
return false
end if
end for
return true
end type
--sequence of VarTokens
type VarList(object vars)
if not sequence(vars) then
return false
end if
for i=1 to length(vars) do
if not VarToken(vars[i]) then
return false
end if
end for
return true
end type
--{Token,children}
type Node(object node)
if sequence(node) and length(node)=2 and Token(node[1]) and sequence(node[2]) then
for i=1 to length(node[2]) do
if not Node(node[2][i]) then
return false
end if
end for
return true
else
return false
end if
end type
--sequence of Nodes
type NodeList(object list)
if not sequence(list) then
return false
end if
for i=1 to length(list) do
if not Node(list[i]) then
return false
end if
end for
return true
end type
--An element of all_scripts
type Script(object script)
if sequence(script) and length(script)>=_SCRIPT_MIN_LAST and length(script)<=_SCRIPT_MAX_LAST then
return true
else
return false
end if
end type
---------------------------------------------------------------------------
--time spent waiting for a user-keypress shouldnt count
function timeless_wait_key()
atom skip_time
integer key
skip_time=time()
key=wait_key()
skip_time=time()-skip_time
start_time+=skip_time
for i=1 to length(timing_data) do
if timing_data[i][TIMING_ACTIVE] then
timing_data[i][TIMING_TIME]+=skip_time
end if
end for
return(key)
end function
---------------------------------------------------------------------------
--Euphoria's built-in sprintf truncates all string elements to 8 bits,
--so this is an Unicode-enabled replacement. Supports %d, %s, %g, %%
--Beware! This will be way slower than sprintf, but luckily it's not
--needed anywhere where speed matters.
function sprintf_utf(string s, sequence printf_args)
integer seg_start, seg_end
integer arg_ctr
integer code
string ret
if length(printf_args)=0 then
return s
end if
ret=""
arg_ctr=1
seg_start=1
seg_end=find('%',s)
while seg_end do
ret&=s[seg_start..seg_end-1]
code=s[seg_end+1]
if code='s' then
ret&=printf_args[arg_ctr]
arg_ctr+=1
elsif code='d' or code='g' then
ret&=sprintf({'%',code},printf_args[arg_ctr])
arg_ctr+=1
elsif code='%' then
ret&='%'
else
simple_error(sprintf("sprintf_utf: unrecognised format code %%%s\n",{s[seg_end+1]}))
end if
seg_start=seg_end+2
seg_end=find('%',s,seg_start)
end while
if seg_start<=length(s) then
ret&=s[seg_start..$]
end if
if arg_ctr!=length(printf_args)+1 then
simple_error(sprintf("sprintf_utf: recieved %d format arguments, only used %d\n",{length(printf_args),arg_ctr-1}))
end if
return ret
end function
---------------------------------------------------------------------------
--fprintf as UTF8
procedure print_utf(object fh, string s, sequence printf_args)
if length(printf_args) then
s=sprintf_utf(s,printf_args)
end if
s=toUTF(s,utf_32,utf_8)
puts(fh,s)
end procedure
---------------------------------------------------------------------------
--prints a string with printf to stdout converting color codes
procedure color_print(string s, sequence printf_args)
string buffer
s=sprintf_utf(s,printf_args)
if simple_colorcodes then
buffer=s
else
buffer=""
for i=1 to length(s) do
if s[i]>=COL_FIRST and s[i]<=COL_LAST then
print_utf(stdout,buffer,{})
buffer=""
if colors_enabled then
text_color(HS_TO_EU_COLORS[s[i]-COL_FIRST+1])
end if
else
buffer&=s[i]
end if
end for
end if
if length(buffer) then
print_utf(stdout,buffer,{})
end if
end procedure
---------------------------------------------------------------------------
procedure opt_wait_for_key(integer error_or_warning)
if (not error_or_warning and find('j',optlist)) or find('k',optlist) then
--skip
else
color_print("[Press Any Key]\n",{})
timeless_wait_key()
end if
end procedure
---------------------------------------------------------------------------
procedure enter_timing_zone(string description)
if not find('t',optlist) then
return
end if
timing_depth+=1
timing_data=append(timing_data,{timing_depth,description,-time(),true})
end procedure
---------------------------------------------------------------------------
procedure reenter_timing_zone(string description)
if not find('t',optlist) then
return
end if
for i=1 to length(timing_data) do
if equal(timing_data[i][TIMING_DESCRIPTION],description) then
timing_depth+=1
timing_data[i][TIMING_DEPTH]=timing_depth --could change...
timing_data[i][TIMING_TIME]-=time()
timing_data[i][TIMING_ACTIVE]=true
return
end if
end for
enter_timing_zone(description)
end procedure
---------------------------------------------------------------------------
procedure exit_timing_zone()
if not find('t',optlist) then
return
end if
for i=length(timing_data) to 1 by -1 do
--there can only be one active timing job at each depth at a time
if timing_data[i][TIMING_ACTIVE] and timing_data[i][TIMING_DEPTH]=timing_depth then
timing_data[i][TIMING_TIME]+=time()
timing_data[i][TIMING_ACTIVE]=false
exit
end if
end for
timing_depth-=1
end procedure
---------------------------------------------------------------------------
procedure print_timing_data()
string indent
sequence data
string tmp1, tmp2
if run_time=0 then
--so that all percentages show as 0%
run_time=1e100
end if
for i=1 to length(timing_data) do
data=timing_data[i]
indent=repeat(' ',data[TIMING_DEPTH]*4)
tmp1=sprintf("%.2f",{data[TIMING_TIME]})
tmp2=sprintf("%5.2g",{100*data[TIMING_TIME]/run_time})
color_print("%s "&COLBWHI&"%s"&COLWHI&"s %s%% %s\n",{indent,tmp1,tmp2,data[TIMING_DESCRIPTION]})
end for
end procedure
---------------------------------------------------------------------------
function html_char_convert(string s)
string buffer
string result
result=""
buffer=""
for i=1 to length(s) do
if s[i]=' ' and i>1 then
if s[i-1]=' ' then
buffer&=" "
else
buffer&=s[i]
end if
elsif s[i]='<' then
buffer&="<"
elsif s[i]='>' then
buffer&=">"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
function error_string_convert(string s)
string buffer
string result
result=""
buffer=""
for i=1 to length(s) do
if s[i]>=COL_FIRST and s[i]<=COL_LAST then
if s[i]=COLYEL then
buffer&=""
elsif s[i]=COLRED then
buffer&=""
elsif s[i]=COLPNK then
buffer&=""
elsif s[i]=COLWHI then
buffer&=""
elsif s[i]=COLBWHI then
buffer&=""
end if
elsif s[i]='\n' then
buffer&="
\n"
else
buffer&=s[i]
end if
end for
if length(buffer) then
result&=buffer
end if
return(result)
end function
---------------------------------------------------------------------------
--prints a long string wrapped at 80 columns
procedure wrap_print(string s, sequence arguments)
string outstring
s=sprintf_utf(s,arguments)
while length(s) do
outstring=before_wrap_point(s)
s=after_wrap_point(s)
color_print("%s\n",{outstring})
end while
end procedure
---------------------------------------------------------------------------
procedure error_file_print(string s)
integer fh
if error_file then
fh=open(compiler_dir&"hs_error.htm","a")
if fh!=failure then
print_utf(fh,error_string_convert(s)&"\n",{})
close(fh)
end if
end if
end procedure
---------------------------------------------------------------------------
--prints out warning message in red with word wrap
procedure simple_warn(string s)
sequence pos
if not find('w',optlist) then
--do not warn if -w is set
pos=get_position()
if pos[2]>1 then
print_utf(stdout,"\n",{})
end if
wrap_print(COLRED&"WARNING: %s"&COLWHI&"\n",{s})
error_file_print(sprintf_utf("WARNING: %s",{html_char_convert(s)}))
was_warnings = true
end if
end procedure
---------------------------------------------------------------------------
--prints out an error message in red with word wrap, then aborts
procedure simple_error(string s)
sequence pos
pos=get_position()
if pos[2]>1 then
print_utf(stdout,"\n",{})
end if
wrap_print(COLRED&"ERROR: %s"&COLWHI&"\n",{s})
error_file_print(sprintf_utf("ERROR: %s",{html_char_convert(s)}))
if end_anchor_kludge then
error_file_print("\n
\n")
end if
opt_wait_for_key(true)
--Custom doesn't check hspeak's error code (due to difficulty retrieving it after spawning),
--it only checks whether the output .hs file was created.
if cleanup_dest then
if dest_file_fh then
close(dest_file_fh)
end if
delete_file(dest_file)
end if
abort(1)
end procedure
---------------------------------------------------------------------------
--prints out usage info if not enough arguments
procedure check_arg_count(sequence args)
if length(args)=2 then
wrap_print("HamsterSpeak Compiler v%s%s\n",{COMPILER_VERSION,COMPILER_SUB_VERSION})
wrap_print("(C)2001-%s James Paige, Ralph Versteegen, and the OHRRPGCE Developers\n",{COPYRIGHT_DATE})
wrap_print("Please read LICENSE.txt for GNU GPL & MIT dual license details and disclaimer of liability",{})
wrap_print(COLYEL&"%s [-%s] [long-options] source.hss [dest.hs]"&COLWHI&"\n\n",{file_only(args[2]),VALID_OPTIONS})
color_print(" -f fast mode. Disables some optimization\n",{})
color_print(" -j don't wait for a keypress when finished if there's no error or warning\n",{})
color_print(" -k don't wait for a keypress when finished\n",{})
color_print(" -n don't add debug info, or a copy of the source code, to .hs file\n",{})
color_print(" -w suppress warnings\n",{})
color_print(" -u show warnings for unused variables\n",{})
color_print(" -y overwrite the destination file without asking\n",{})
color_print(" --incdir \n" &
" also search this directory for include files\n",{})
color_print(" --include \n" &
" include an additional file\n",{})
color_print("\nAdvanced options (not intended for normal use):\n",{})
color_print(" -b don't include plotscr.hsd automatically\n",{})
color_print(" -c colors will be disabled\n",{})
color_print(" -d dump debug report to hs_debug.txt\n",{})
color_print(" -s print the name of each plotscript as it is compiled\n",{})
color_print(" -a same as -s, but printing all scripts\n",{})
color_print(" -t print detailed timing info (for HSpeak developers)\n",{})
color_print(" -z write error messages to hs_error.htm\n",{})
--Undocumented arguments:
-- --unicode-cols
-- used by Hamster Whisper for piped output
-- --reuse-ids scripts.bin
-- Read the provided scripts.bin file from a previous compilation, and reuse script ID mapping
wrap_print("\nFor more info about HamsterSpeak visit "&COLBWHI&"http://HamsterRepublic.com/ohrrpgce"&COLWHI&"\n",{})
wrap_print("\nThis is a command-line program. You should either run it from the command-line, or drag and drop your script file onto it. You don't ever need to run HSpeak yourself; Custom will do that for you.\n",{})
opt_wait_for_key(false)
abort(0)
end if
end procedure
---------------------------------------------------------------------------
--Build the lexer tables, which for each code point 0<=i<256, contains the type
--of that character in entry i+1 (have to handle NUL)
--Other characters above code 255 are always LEX_CHAR
procedure init_lexer()
sequence table_insert
--main table
--(\xa0 is non-breaking space)
table_insert={" (),\n#\t\"[]{}.\xa0",
{LEX_SPACE,LEX_BEGIN,LEX_END,LEX_COMMA,LEX_NEWLINE,LEX_COMMENT,LEX_SPACE,LEX_STRING,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_WARN,LEX_SPACE}}
--by default each character is either binary, or an allowed identifier
--character. This is for backwards compatibility
--NOTE the +1!
lexer_table=repeat(LEX_BINARY,32) & repeat(LEX_CHAR,127-32) & repeat(LEX_BINARY,#A1-127) & repeat(LEX_CHAR,256-#A1)
for i=1 to length(table_insert[1]) do
lexer_table[table_insert[1][i]+1]=table_insert[2][i]
end for
for i=0 to 9 do
lexer_table['0'+i+1]=LEX_DIGIT
end for
for i=0 to 5 do
lexer_table['a'+i+1]=LEX_HEXDIGIT
lexer_table['A'+i+1]=LEX_HEXDIGIT
end for
--add the one-character prefix of each operator
for i=1 to length(separator_list) do
lexer_table[separator_list[i][1]+1]=LEX_OPERATOR
end for
--keyword suffix parsing table
lexer_table_key2=repeat(LEX_CHAR,256)
lexer_table_key2[' '+1]=LEX_SPACE
lexer_table_key2['\t'+1]=LEX_SPACE
--2nd character of each operator
for i=1 to length(separator_list) do
if length(separator_list[i])=2 then
lexer_table_key2[separator_list[i][2]+1]=LEX_OPERATOR2
end if
end for
--when floating point is added will want a table for number parsing
end procedure
---------------------------------------------------------------------------
--initializes global variables, and generally gets things ready to roll
procedure init()
sequence args
integer index
integer key
integer fh
sequence sym
--Process arguments
args=command_line()
compiler_dir=path_only(args[2])
check_arg_count(args)
wrap_print("HamsterSpeak Compiler v%s%s\n",{COMPILER_VERSION,COMPILER_SUB_VERSION})
optlist={}
index=3
while index<=length(args) do
--These are undocumented
if equal(args[index],"--unicode-cols") then
simple_colorcodes=true
args=delete_element(args,index)
elsif equal(args[index],"--reuse-ids") and index+1<=length(args) then
color_print(COLWHI&"Reading script IDs from previous compile\n",{})
reuse_ids=read_scripts_dot_bin(args[index+1])
args=delete_slice(args,index,index+1)
elsif equal(args[index],"--include") and index+1<=length(args) then
additional_includes&={args[index+1]}
args=delete_slice(args,index,index+1)
elsif equal(args[index],"--incdir") and index+1<=length(args) then
include_dir=args[index+1]
args=delete_slice(args,index,index+1)
elsif args[index][1]='-' then
optlist=optlist&hs_lower(args[index][2..length(args[index])])
args=delete_element(args,index)
else
index+=1
end if
end while
for i=1 to length(optlist) do
if not find(optlist[i], VALID_OPTIONS) then
simple_warn("Unknown commandline option -" & optlist[i])
end if
end for
check_arg_count(args)
source_file=normalize_filename(args[3])
if length(args)>3 then
dest_file=normalize_filename(args[4])
else
dest_file=normalize_filename(alter_extension(source_file,"hs"))
end if
if find('f',optlist) then
fast_mode=true
color_print("Using fast mode. Some size optimization disabled\n",{})
end if
if find('c',optlist) then
colors_enabled=false
end if
if not find('n',optlist) then
debug_info=true
end if
--'x' used by old versions of Hamster Whisper to specify color code format
--However this clashes with Latin-1 characters and doesn't work with UTF-8 anyway
--Replaced by --unicode-cols
--the semi-undocumented command line argument -z writes a file called
--hs_error.htm formatted for HssEd to read
if find('z',optlist) then
error_file=true
if file_exists(compiler_dir&"hs_error.htm") then
fh=open(compiler_dir&"hs_error.htm","w")
if fh!=failure then
puts(fh,"")
close(fh)
end if
end if
end if
wrap_print("Compiling "&COLBWHI&"%s"&COLWHI&" to "&COLBWHI&"%s"&COLWHI&"\n",{source_file,dest_file})
if file_exists(dest_file) then
if find(extension_only(dest_file),{"hss","txt"}) then
simple_error("Refusing to write output to "&dest_file&" because only input files should have hss/txt extension!")
end if
if find('y',optlist) then
--found the -y command line arg, overwrite automatically
else
while true do
--prompt the user to overwrite
wrap_print("file "&COLBWHI&"%s"&COLWHI&" already exists. Overwrite it? (Y/N)",{dest_file})
key=timeless_wait_key()
color_print(" "&COLYEL&"%s"&COLWHI&"\n",{key})
if hs_lower(key)='y' then
exit
elsif hs_lower(key)='n' then
simple_error("output file overwrite cancelled by user")
else
wrap_print(COLYEL&"%s"&COLWHI&"? "&COLYEL&"%s"&COLWHI&"!? How is that Y or N?",{key,hs_upper(key)})
end if
end while
end if
end if
--From now on, delete dest_file on error
cleanup_dest=true
--Init global data tables
global_scope=map:new_from_kvpairs({
{"defineconstant",{KIND_TOPLEVEL,0,0}}
,{"definetrigger" ,{KIND_TOPLEVEL,0,0}}
,{"defineoperator",{KIND_TOPLEVEL,0,0}}
,{"globalvariable",{KIND_TOPLEVEL,0,0}}
,{"definefunction",{KIND_TOPLEVEL,0,0}}
,{"definescript" ,{KIND_TOPLEVEL,0,0}}
,{"plotscrversion",{KIND_TOPLEVEL,0,0}}
,{"include" ,{KIND_TOPLEVEL,0,0}} --should never appear, in theory
,{"cfor" ,{KIND_UNIMPLEMENTED,0,0}}
,{"foreach" ,{KIND_UNIMPLEMENTED,0,0}}
,{"=" ,{KIND_UNIMPLEMENTED,0,0}}
,{"elseif" ,{KIND_FLOW,-1,0}} --not in flow_list... shouldn't that be a problem?
,{"exit" ,{KIND_FLOW,-1,0}} --not in flow_list
--keywords have special case handling
,{"subscript" ,{KIND_KEYWORD,0,0}} --subscript is also in flow_list!
,{"variable" ,{KIND_KEYWORD,0,0}}
,{"tracevalue" ,{KIND_MACRO,0,0}}
,{"tracevalueof" ,{KIND_MACRO,0,0}}
,{"showvalueof" ,{KIND_MACRO,0,0}}
,{"assert" ,{KIND_MACRO,0,0}}
-- Stray "$"'s are picked up by the lexer (not totally ideal maybe)
})
for i=1 to length(flow_list) do
sym=flow_list[i]
if map:has(global_scope, sym[PAIR_NAME])=false then --omit subscript
map:put(global_scope, sym[PAIR_NAME], {KIND_FLOW,sym[PAIR_NUM],0})
end if
end for
for i=1 to length(math_list) do
sym=math_list[i]
map:put(global_scope, sym[PAIR_NAME], {KIND_MATH,sym[PAIR_NUM],0})
end for
-- Adding the separators (operators) here prevents them from being used before
-- defineoperator is handled (either a block before include,plotscr.hsd, or in
-- e.g. defineconstant)
for i=1 to length(separator_list) do
if map:has(global_scope,separator_list[i])=false then --omit =
map:put(global_scope,separator_list[i],{KIND_SEPARATOR,i,0})
end if
end for
for i=1 to length(attribute_list) do
map:put(global_scope,attribute_list[i][ATTR_NAME],{KIND_ATTRIBUTE,i,0})
end for
--Append subscript trigger id (this is needed for scripts.bin)
trigger_list=append(trigger_list,{-1,"subscript"})
--Lexer finite state transition tables
init_lexer()
end procedure
---------------------------------------------------------------------------
--Returns offset component of a srcpos (usually you want to use decode_srcpos_file
--instead if you want to know the file offset)
function srcpos_offset(srcpos pos)
integer ret
--It appears to be a Euphoria euc bug that it's necessary to explicitly cast to integer
ret=floor(pos/power(2,9))
return ret
end function
---------------------------------------------------------------------------
--Returns length of a srcpos (between 0 and 254)
function srcpos_length(srcpos pos)
return and_bits(pos,power(2,8)-1)
end function
---------------------------------------------------------------------------
--returns {filenumber,file position} pair, later being 1-based character offset
function decode_srcpos_file(srcpos pos)
integer len, offset
--0 is an invalid srcpos
if pos=0 then
simple_error("compiler bug: can't decode uninitialised srcpos")
end if
integer point
point=srcpos_offset(pos)
if not integer(point) or point<=0 then
simple_error(sprintf("compiler bug: bad srcpos offset %d", {point}))
end if
for i=1 to length(file_list) do
len=length(file_list[i][FILE_TEXT])
offset=file_list[i][FILE_OFFSET]
if point<=offset+len then
return {i,point-offset}
exit
end if
end for
simple_error(sprintf_utf("compiler bug: can't decode invalid srcpos %d",{pos}))
end function
---------------------------------------------------------------------------
function srcpos_file_number(srcpos pos)
sequence temp
temp=decode_srcpos_file(pos)
return temp[1]
end function
---------------------------------------------------------------------------
--Returns 1-based character number in file
function srcpos_file_offset(srcpos pos)
sequence temp
temp=decode_srcpos_file(pos)
return temp[2]
end function
---------------------------------------------------------------------------
--Combine base/file offset (usually FILE_OFFSET), token offset (in
--characters from start of file counting from 1) and token length into
--a non-virtual a srcpos. The redundancy between baseoffset and
--offset is for clarity.
function encode_srcpos(integer baseoffset, integer offset, integer len)
if len > 254 then
len = 254
end if
if offset < 1 or len < 0 then
--offset 0 is allowed, it refers to the file. But if you wanted it you'd
--just write file_list[i][FILE_OFFSET]. So disallow it here.
simple_error(sprintf("Compiler bug! encode_srcpos(%d,%d,%d)\n",{baseoffset,offset,len}))
end if
return (baseoffset+offset)*power(2,9)+len
end function
---------------------------------------------------------------------------
--This transforms a srcpos into one marked as virtual. Indicates a roughly relevant position, but
--not something that actually occurs in the source. Use for inserted dummy code
function virtual_pos(srcpos pos)
return(or_bits(pos,256))
end function
---------------------------------------------------------------------------
--Return just the original text of a token.
--Unlike decode_srcpos, this is quite fast
function get_srcpos_text(srcpos pos)
integer point, tokenlen, filenum
sequence temp
temp=decode_srcpos_file(pos)
filenum=temp[1]
point=temp[2]
tokenlen=srcpos_length(pos)
if tokenlen=0 then
return ""
else
return file_list[filenum][FILE_TEXT][point..point+tokenlen-1]
end if
end function
---------------------------------------------------------------------------
--Probably slow: avoid heavy use.
--Returns {file name, line number, column number, token length, line text, token text}: index with POS_*
--Column number returned is 0-based
function decode_srcpos(srcpos pos)
integer point, tokenlen, filenum, columnnum
integer lineno, line_start, line_end
string filetext, linetext, tokentext
sequence lines
sequence temp
if pos=0 then
simple_error("compiler bug: can't decode uninitialised srcpos")
end if
temp=decode_srcpos_file(pos)
filenum=temp[1]
point=temp[2]
tokenlen=srcpos_length(pos)
--Find the line it's on
filetext=file_list[filenum][FILE_TEXT]
lines=file_list[filenum][FILE_LINE_EXTENTS]
for i=1 to length(lines) do
if point>=lines[i][PAIR_START] then
lineno=i
else
exit
end if
end for
line_start=lines[lineno][PAIR_START]
line_end=lines[lineno][PAIR_END]
linetext=filetext[line_start..line_end]
-- +1 for 1-based column indexing
columnnum=point-line_start+1
if tokenlen=0 then
tokentext=""
else
tokentext=linetext[columnnum..min({length(linetext),columnnum+tokenlen-1})]
end if
return({file_list[filenum][FILE_NAME],lineno,columnnum,tokenlen,linetext,tokentext})
end function
---------------------------------------------------------------------------
function form_error_text(string s, srcpos pos)
integer poscol, poslen, posend
string posline, line_display
sequence src_position
string column_display, caret_display
integer tab_compensate, token_tab_compensate
src_position=decode_srcpos(pos)
--?pos
--pretty_print(1,src_position,{2})
posline=src_position[POS_LINETEXT]
poscol=src_position[POS_COLUMN]
poslen=src_position[POS_LENGTH]
--Expand tabs, because we don't know how the terminal displays then.
line_display=substring_replace(posline,"\t"," ")
posline&=" " --Used to point to the newline at the end
posend=poscol+large(0,poslen-1)
if posend>length(posline) then --One past the end of the actual line is allowed
simple_warn(sprintf_utf("Compiler bug: srcpos past end of line %d", {src_position[POS_LINE]}))
posend=length(posline)
end if
--Count number of tabs in the line before and in the token,
--so that we can determine their display length
tab_compensate=3*count('\t',posline[1..poscol-1])
token_tab_compensate=3*count('\t',posline[poscol..posend])
caret_display=repeat('^',large(1,poslen)+token_tab_compensate)
column_display=repeat(' ',poscol-1+tab_compensate)
error_file_print(sprintf_utf("\n",{src_position[POS_FILENAME],src_position[POS_LINE]}))
if length(current_script) then
return(
sprintf_utf(
"in script "&COLYEL&"%s"&COLRED&" on line %d in "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s%s\n"
&COLRED&"%s\n"
,{current_script,src_position[POS_LINE],src_position[POS_FILENAME],line_display,column_display,caret_display,s}
)
)
else
return(
sprintf_utf(
"in line %d of "&COLPNK&"%s"&COLRED&"\n"
&COLBWHI&"%s\n"
&"%s%s\n"
&COLRED&"%s\n"
,{src_position[POS_LINE],src_position[POS_FILENAME],line_display,column_display,caret_display,s}
)
)
end if
--note that the calling procedure must send/deal with closing tags to error_file_print
end function
---------------------------------------------------------------------------
procedure src_warn(string s, srcpos pos)
if not find('w',optlist) then
--do not warn if -w is set
simple_warn(form_error_text(s,pos))
error_file_print("\n
\n")
end if
end procedure
---------------------------------------------------------------------------
procedure src_error(string s, srcpos pos)
end_anchor_kludge=true
simple_error(form_error_text(s,pos))
end procedure
---------------------------------------------------------------------------
--Find newlines in a file (possibly mixed type), returns line extents
--({start of line, end of line} pairs, both inclusive, newlines excluded)
--and the file_text with Mac lineendings converted to Unix, because the engine's
--decode_srcpos expects source.lumped contents don't use Mac lineendings.
--Note also read_file() adds a newline at the end if none.
function split_lines(string file_text)
integer line_start=1
integer last_13=-1
sequence line_extents = {}
for i=1 to length(file_text) do
if file_text[i]=13 then
line_extents=append(line_extents, {line_start,i-1})
last_13=i
line_start=i+1
if i+1<=length(file_text) and file_text[i+1]!=10 then -- A \r lineending
file_text[i]=10
end if
elsif file_text[i]=10 then
if last_13!=i-1 then -- handle {13,10} "\r\n"
line_extents=append(line_extents, {line_start,i-1})
end if
line_start=i+1
end if
end for
if line_start<=length(file_text) then
line_extents=append(line_extents, {line_start,length(file_text)})
end if
return {line_extents,file_text}
end function
---------------------------------------------------------------------------
--Read a file, adding to file_list.
--pos is used only for location of include statement, may be 0.
procedure load_source(string filename, string reading_how, srcpos pos)
integer fh
string file_text
sequence line_extents -- {start of line, end of line} pairs
integer offset -- FILE_OFFSET value
-- First file starts at FILE_OFFSET=1, to ensure srcpos 0 is never used,
-- and each file's FILE_OFFSET+0 is reserved. First char of first file
-- has offset 2.
offset=1
if length(file_list) then
offset=file_list[$][FILE_OFFSET]+length(file_list[$][FILE_TEXT])+1
end if
fh=open(filename,"r")
if fh!=failure then
wrap_print("%s "&COLBWHI&"%s"&COLWHI&"\n",{reading_how,filename})
reenter_timing_zone("read_file")
--Autodetect the encoding
file_text=read_file(fh, TEXT_MODE, UTF)
exit_timing_zone()
close(fh)
sequence temp=split_lines(file_text)
line_extents=temp[1]
file_text=temp[2]
total_lines+=length(line_extents)
file_list=append(file_list,{filename,file_text,line_extents,offset,""})
--Maximum valid srcpos points to length 1 virtual token at end of file.
--max_srcpos is used to do srcpos type checking... so we need to disable that
--check before we can assign to it!
max_srcpos=0
max_srcpos=virtual_pos(encode_srcpos(offset,length(file_text),1))
else
if pos then
src_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}),pos)
else
simple_error(sprintf_utf("file "&COLYEL&"%s"&COLRED&" not found\n",{filename}))
end if
end if
end procedure
---------------------------------------------------------------------------
--Load and lex, recursively finding includes
procedure load_and_lex_source(string filename, string reading_how, srcpos pos, sequence include_stack)
string canon_filename
--We don't really support \ in paths on Unix.
canon_filename=normalize_filename(filename)
ifdef WINDOWS then
canon_filename=hs_lower(canon_filename)
end ifdef
if find(canon_filename,include_stack) then
src_error(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included recursively",{filename}),pos)
end if
include_stack&={canon_filename}
if find(canon_filename,column(file_list,FILE_NAME)) then
--This exact same include file has already been included.
--Silently ignore this include. Double includes are allowed because of automatic inclusion.
--src_warn(sprintf_utf("File "&COLPNK&"%s"&COLRED&" is being included multiple times",{include_name}),pos)
return
end if
--Reads one additional file
reenter_timing_zone("load_and_lex_source/reading")
load_source(canon_filename,reading_how,pos)
exit_timing_zone()
--Lex that file, and load and lex all included files
lex_file(length(file_list), include_stack)
end procedure
---------------------------------------------------------------------------
function find_include_file_globally(string include_name)
if file_exists(compiler_dir&include_name) then
return normalize_filename(compiler_dir&include_name)
end if
--in case we're installed at $prefix/games, try $prefix/share/games/ohrrpgce/
if file_exists(compiler_dir&"../share/games/ohrrpgce/"&include_name) then
return normalize_filename(compiler_dir&"../share/games/ohrrpgce/"&include_name)
end if
--Failure: return filename, used for error messages
return include_name
end function
---------------------------------------------------------------------------
--Search for an include file
function find_include_file(string include_name)
sequence ret
if equal(include_name,"plotscr.hsd") or equal(include_name,"scancode.hsi") then
--Prefer to use fundamental include files found in global locations, because
--plotscr.hsd and scancode.hsi will be exported too, when exporting scripts
--from Custom.
return find_include_file_globally(include_name)
end if
--try source directory
if file_exists(path_only(source_file)&include_name) then
return normalize_filename(path_only(source_file)&include_name)
end if
if file_exists(include_dir&include_name) then
return include_dir&include_name
end if
--try current directory (which will be where the .rpg is)
if file_exists(include_name) then
return include_name
end if
--try global locations
return find_include_file_globally(include_name)
end function
---------------------------------------------------------------------------
procedure show_source_info()
if total_lines then
wrap_print("%d lines read from %d files\n",{total_lines,length(file_list)})
else
simple_error("no data to compile\n")
end if
if file_list[$][FILE_OFFSET]+length(file_list[$][FILE_TEXT])>power(2,23) then
simple_warn("You have over 8 MB of scripts! Unfortunately compile and in-game errors will report the wrong line numbers for some errors.")
end if
end procedure
---------------------------------------------------------------------------
procedure lexer_binary_error(srcpos pos)
--better use src_error, to track down garbage if accidentally creeping in
src_error("This file contains binary gunk. Are you sure that this is the text file you want to compile?",pos)
end procedure
---------------------------------------------------------------------------
--Lex a string, processing the escape codes.
--s is the source text (typically whole file contents), i is the starting offset in s,
--1 past the opening ", and stop is last permissible offset in s to include.
--Returns {closing_quote_offset,quoted_string} (i.e. enclosed in quote marks, but with escape codes processed)
function lexer_read_string(string s, integer i, integer stop, integer posoff)
string string
integer escaping
integer at1, at2
escaping=false
string="\""
while true do
if i>stop then --did not find a closing " so not a valid string
src_error("Expected \" at end of line to end string (multi-line strings are not supported)", encode_srcpos(posoff,i,1))
end if
if s[i]<256 and lexer_table[s[i]+1]=LEX_BINARY then
lexer_binary_error(encode_srcpos(posoff,i,1))
end if
if escaping then
if s[i]='"' or s[i]='\\' or s[i]='\'' then
--including ' for closer compatibility with Python
string&=s[i]
elsif s[i]='n' then
string&=10
elsif s[i]='t' then
string&=9
elsif s[i]='x' then
--escape 8 bit values (later will add \u and \U for 16 bit and 32 bit escapes)
at1=0
at2=0
if i+2<=stop then
at1=find(s[i+1],hex_chars)
at2=find(s[i+2],hex_chars)
end if
if at1=0 or at2=0 then
src_error("Expected two hexidecimal characters to follow \\x escape code",encode_srcpos(posoff,i-1,4))
end if
string&={16*hex_char_values[at1]+hex_char_values[at2]}
i+=2
else
--invalid sequence
string&='\\'
string&=s[i]
end if
escaping=false
else
if s[i]='"' then
return({i,string&'"'})
elsif s[i]='\\' then
escaping=true
else
string&=s[i]
end if
end if
i+=1
end while
end function
---------------------------------------------------------------------------
--Lex a signed or unsigned integer literal.
--Returns {newi, text} with text is the string representation of an integer.
function lexer_read_number(string s, integer i, integer stop, integer posoff)
integer state
atom val
integer sign,base
string text
integer textstart
integer textend
integer digits
textstart=i
textend=i
--Process negative prefix, because it isn't treated as a unary operator
if s[i]='-' then
text="-"
i+=1
sign=-1
else
text=""
sign=1
end if
--Skip whitespace
while i<=stop and (s[i]=' ' or s[i]='\t') do
i+=1
end while
--Check for base prefix 0x 0o or 0b. No separating space allowed!
base=10
if i+1<=stop and s[i]='0' then
if s[i+1]='x' or s[i+1]='X' then
base=16
i+=2
elsif s[i+1]='o' or s[i+1]='O' then
base=8
i+=2
elsif s[i+1]='b' or s[i+1]='B' then
base=2
i+=2
end if
end if
val=0
digits=0
while i<=stop do
if s[i]<256 then
state=lexer_table[s[i]+1]
else
state=LEX_CHAR
end if
--color_print("lexing number i=%d %d=%s state %d\n",{i,s[i],{s[i]},state})
if state=LEX_DIGIT then --0-9
integer digit
digit=s[i]-'0'
if digit>=base then
src_error(sprintf_utf("Found digit "&COLYEL&"%s"&COLRED&" which is not permitted in a base-%d number",{s[i],base}),encode_srcpos(posoff,i,1))
end if
text&=s[i]
textend=i
val=val*base+digit
digits+=1
elsif base=16 and state=LEX_HEXDIGIT then --a-fA-F
text&=s[i]
textend=i
val=val*16+(toLower(s[i])-'a'+10)
digits+=1
elsif state=LEX_SPACE then --yes, allowed in numbers too
elsif state=LEX_COMMA or state=LEX_BEGIN or state=LEX_END or state=LEX_OPERATOR or state=LEX_COMMENT or state=LEX_BINARY or state=LEX_NEWLINE then
--backtrack one character
--FIXME: bug: if this looks like part of a separator (KEYWORD) but isn't, then have granted a free comma where probably should throw an error instead
i-=1
exit
else --currently, state=LEX_CHAR or state=LEX_HEXDIGIT or state=LEX_WARN or state=LEX_STRING
if s[i]='.' then
src_error("Non-whole (decimal) numbers are not yet implemented", encode_srcpos(posoff,textstart,i-textstart+1))
end if
if equal(text,"-") then
exit --Jumps down to the 'if digits=0' error case
end if
src_error(
sprintf_utf("Found unexpected character "&COLYEL&"%s"&COLRED&" while reading a number, "&COLYEL&"%s"&COLRED&". Names such as variables can't start with a digit. Or maybe you forgot a comma or operator?"
,{s[i],text})
,encode_srcpos(posoff,i,1)
)
end if
i+=1
end while
if digits=0 then
--Encountered just '-' or '0x' or '-0x' (etc)
if equal(text,"-") then
src_error(COLYEL&"-"&COLRED&" can only be used to take the negation of a number, not other values/expressions (sorry!). Write "&COLYEL&"-1 * ..."&COLRED&" instead. Write "&COLYEL&"x -- y"&COLRED&" for subtraction.", encode_srcpos(posoff, textstart, 1))
else
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" looks like the start of a number, but there are no digits!",{s[textstart..i-1]}), encode_srcpos(posoff, textstart, i-textstart+1))
end if
end if
if base!=10 then
--Special behaviour for non-decimals: silently overflow large 32 bit unsigned values to negatives
if val>=#80000000 and val<#100000000 then
val=val-#100000000
end if
--Convert to text
text=sprintf("%d",{sign*val})
end if
if not int32(sign*val) then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is too big to be stored in a 32-bit signed integer. Numbers must be between -2147483648 and 2147483647",{text}),encode_srcpos(posoff,textstart,textend-textstart+1))
end if
return({textend,text})
end function
---------------------------------------------------------------------------
--Clean the rest of a line for seek_include after 'include' occurs
--s is the whole line, i is end of the 'include', posoff is the file position at start of line
function lexer_read_include_line(TokenList tokens, string s, integer i, integer stop, integer posoff)
integer at
integer whitespace_len
sequence temp
string filename_string
srcpos pos
integer isquoted
if not equal(tokens[1][CMD_TEXT],"include") then
src_error(COLYEL&"include"&COLRED&" must be the only statement on the line. Maybe you tried to use this reserved keyword illegally?",tokens[$][CMD_POS])
end if
filename_string=""
--Trim the 'include', comments, and whitespace,
i+=1 --Skip the comma or whatever character ended the 'include'
s=s[i..stop]
if length(s) then
--Need to increment i while stripping
while s[1]=' ' or s[1]='\t' do
s=s[2..length(s)]
i+=1
end while
whitespace_len=0
--Optionally allow quotes around the filename.
--A quote mark in the middle of a filename doesn't need any escaping
isquoted=(s[1]='"')
if isquoted then
temp=lexer_read_string(s,2,length(s),posoff+i-1)
-- temp is {closing_quote_offset,quoted_string}
pos=encode_srcpos(posoff,i,temp[1])
s=s[temp[1]+1..$]
--s is now whatever is after the string
i+=temp[1] --1 past the closing "
filename_string=temp[2] --enclosed in quotes
whitespace_len=length(s)
--Trim comment and whitespace to check for anything after the filename string
at=find('#',s)
if at then
s=s[1..at-1]
whitespace_len=at-1
end if
s=trim_whitespace(s)
if length(s) then
src_error("Found unexpected text after "&COLYEL&"include, \"filename\""&COLRED,encode_srcpos(posoff,i,whitespace_len))
end if
else
--Trim comment and whitespace: for an unquoted filename this delimits the filename
at=find('#',s)
if at then
s=s[1..at-1]
end if
s=trim_whitespace(s) --Trims only from the right because we already striped left
filename_string=s
pos=encode_srcpos(posoff,i,length(s))
end if
tokens=append(tokens,{filename_string,pos})
end if
if length(tokens)=1 then
src_error("Expected name of file to follow "&COLYEL&"include"&COLRED,tokens[1][CMD_POS])
end if
if length(tokens)>2 then
src_error(sprintf_utf("Found unexpected syntax "&COLYEL&"%s"&COLRED&" instead of filename to include. Syntax of an include statement is "&COLYEL&"include, filename"&COLRED&" possibly with a path",{tokens[2][CMD_TEXT]}),tokens[2][CMD_POS])
end if
return(tokens)
end function
---------------------------------------------------------------------------
-- s is usually the text for the whole file, the interval i..stop is a single line,
-- and posoff is the srcpos offset for the beginning of the file
function lex_line(sequence s, integer i, integer stop, integer posoff)
integer state
TokenList tokens
string ident --identifier text being built up
sequence temp
string masked
integer found
integer textstart --the offset at which this identifier starts, otherwise 0
integer textend --the offset at which the last non-whitespace character was read
string remem_ident
integer remem_textstart
integer backtracked
integer last_state --A LEX_* constant; the last state that was not LEX_SPACE
integer last_state_i --Value at i where last_state was set
ident=""
last_state=LEX_NEWLINE
last_state_i=0
textstart=0
tokens={}
--pretty_print(stdout, {"lex_line posoff: ", posoff, "i:", i, "srcpos:", decode_srcpos(encode_srcpos(posoff,i,0))}, {2})
while i<=stop do
if s[i]<256 then
state=lexer_table[s[i]+1]
else
state=LEX_CHAR --other unicode
end if
--color_print("lexing i=%d %d=%s state %d ident=%s\n",{i,s[i],{s[i]},state,ident})
if state=LEX_CHAR or state=LEX_HEXDIGIT then
ident=ident & toLower(s[i])
if textstart=0 then
textstart=i
end if
textend=i
elsif state=LEX_SPACE then
elsif state=LEX_DIGIT then
if length(ident) then
ident=ident & s[i]
textend=i
else
--Start of a number not starting with - (those are parsed below)
temp=lexer_read_number(s,i,stop,posoff) --{offset of end of number, number string}
tokens=append(tokens,{temp[2],encode_srcpos(posoff,i,temp[1]-i+1)})
i=temp[1]
end if
elsif state=LEX_WARN then
ident=ident & toLower(s[i])
if textstart=0 then
textstart=i
end if
textend=i
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is reserved and not allowed in names (identifiers)!", {s[i]}), encode_srcpos(posoff,i,1))
else
--We've reached the end of a token... unless it's only the first character
--of a two-character operator.
remem_ident=ident --for backtracking
remem_textstart=textstart
backtracked=false
if length(ident) then
tokens=append(tokens,{ident,encode_srcpos(posoff,textstart,textend+1-textstart)})
end if
ident=""
textstart=0
if state=LEX_BEGIN then --(
tokens=append(tokens,{"begin",encode_srcpos(posoff,i,1)})
elsif state=LEX_END then --)
if last_state=LEX_COMMA then
--foo(1,). This probably doesn't particularly need to be disallowed
--Could also check for ) but that will be caught later
end if
tokens=append(tokens,{"end",encode_srcpos(posoff,i,1)})
elsif state=LEX_COMMA then
--Check we didn't just finish an identifier, to allow using : (which is LEX_OPERATOR) on the end of an identifier.
if length(remem_ident)=0 and (last_state=LEX_COMMA or last_state=LEX_BEGIN or last_state=LEX_OPERATOR or last_state=LEX_OPERATOR2) then
--foo(, 1) or foo(1, , 2) or , but not ),
src_error(
sprintf_utf("Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",{}),
encode_srcpos(posoff,i,1))
end if
elsif state=LEX_NEWLINE then
elsif state=LEX_COMMENT then
exit
elsif state=LEX_STRING then
temp=lexer_read_string(s,i+1,stop,posoff) --{final offset, string}
tokens=append(tokens,{temp[2],encode_srcpos(posoff,i,temp[1]-i+1)})
i=temp[1]
elsif state=LEX_OPERATOR then
--A character which the start of some auto-separating sequence such as +=.
--Read the next non-space character. If it's LEX_OPERATOR2 we greedily
--add it, otherwise we do nothing (we only support length 1 and 2
--separators). However, we might have have been too greedy (not a value
--two-char separator), and need to put it back.
textstart=i
masked=s[i..i]
integer lookahead_state
--This while loop is in order to allow whitespace in the middle of separators.
while i stop
--Lookup for separators which are prefixes of masked
found=false
for j=1 to length(separator_list) do
if match(separator_list[j],masked)=1 then
--very ugly
if length(separator_list[j])=1 then
--Backtrack if necessary to first character in masked, otherwise remain at second
i=textstart
end if
tokens=append(tokens,{separator_list[j],encode_srcpos(posoff,textstart,i-textstart+1)})
found=true
exit
end if
end for
if found=false then
--need to backtrack, in particular for -
--color_print("backtracking on '%s'\n", {masked})
backtracked=true
ident=remem_ident & masked[1]
i=textstart
if length(remem_ident) then
--This character appeared inside a normal token, resume reading that token
tokens=tokens[1..$-1]
textstart=remem_textstart
elsif equal(ident,"-") then
--'-' appears at the start of a token: this is only allowed to be a number.
--We haven't appended anything to tokens that needs removing.
ident=""
temp=lexer_read_number(s,i,stop,posoff)
tokens=append(tokens,{temp[2],encode_srcpos(posoff,i,temp[1]-i+1)})
i=temp[1]
textstart=0
state=LEX_DIGIT --Sensible last_state to avoid 'extra comma' error
else
--a keyword character followed by garbage
src_error(
"Expected a two-character operator (like "&COLYEL&":="&COLRED&") but the second character (e.g. "&COLYEL&"="&COLRED&") is missing",
encode_srcpos(posoff,i,1))
end if
else
textstart=0
if last_state=LEX_COMMA then
-- ,
--(Could check for ( but that will be detected later anyway.)
--Allowed if it's a unary operator. There's only one, since - is not an operator.
if s[i]!='$' then
src_error(
"Found an extra comma which previous HSpeak versions ignored, but which might have a different meaning in the future. Remove it.",
encode_srcpos(posoff,last_state_i,1))
end if
end if
end if
elsif state=LEX_BINARY then
lexer_binary_error(encode_srcpos(posoff,i,1))
end if
if backtracked=false then
if equal(remem_ident,"include") then
--include is a special case, as normal lexing rules don't apply for rest of line
if length(ident) then
simple_error("compiler bug! [before lexer_read_include_line]\n")
end if
return lexer_read_include_line(tokens,s,i,stop,posoff)
end if
end if
--End of a complete token
end if
if state!=LEX_SPACE then
last_state_i=i
last_state=state
end if
i+=1
end while
if length(ident) then
--ought to check whether ident is 'include'...
tokens=append(tokens,{ident,encode_srcpos(posoff,textstart,textend+1-textstart)})
end if
return(tokens)
end function
---------------------------------------------------------------------------
--this also checks for rogue strings and $'s
function translate_plotstrings(TokenList tokens)
integer state
integer start_token
integer i
Token string_func
Token string_token
srcpos pos,vpos
state=0
i=1
while i<=length(tokens) do
if compare(tokens[i][CMD_TEXT],"$")=0 then
pos=tokens[i][CMD_POS]
vpos=virtual_pos(pos)
if state=0 then
state=1
start_token=i
else
--jump to error throw
state=1
exit
end if
end if
if state=2 then
if tokens[i][CMD_TEXT][1]='"' then
string_list=append(string_list,tokens[i][CMD_TEXT][2..$-1])
string_token={sprintf("@$string%d",{length(string_list)}),tokens[i][CMD_POS]}
tokens=tokens[1..start_token-1]&{string_func,{"begin",vpos}}&tokens[start_token+1..i-2]&{string_token,{"end",vpos}}&tokens[i+1..$]
state=0 --may have multiple strings on one line
else --we saw a + (or illegal =) inside the string number expression
state=1
end if
end if
if state=1 and compare(tokens[i][CMD_TEXT],"=")=0 then
string_func={"setstringfromtable",pos}
state=2
elsif state=1 and compare(tokens[i][CMD_TEXT],"+")=0 then
string_func={"appendstringfromtable",pos}
state=2
end if
i+=1
end while
if state!=0 then
--we've seen a surplus $, not right
src_error(COLYEL&"$"&COLRED&" may only be used as part of a $...=\"...\" or $...+\"...\" construct",pos)
end if
return(tokens)
end function
---------------------------------------------------------------------------
--Given a command tree, get the original text from the file
--OK, this function is horribly complicated just so tracevalue can print
--"hero X (me)" instead of "herox(me)". It could be implemented much simpler if
--macros were expanded in the lexer, but then build_ast and
--convert_macros would have to be as well
function tree_original_text(Node tree)
TokenList script_toks
Node subtree
srcpos leftmost, rightmost
integer left_at, right_at
integer filenum
integer left_point, right_point
integer depth
TokenList tokens
string text
script_toks=all_scripts[cur_script_num][SCRIPT_BODY_TOKS]
--Find leftmost and rightmost tokens making up the expression, not including brackets
--(Do we care about virtual flags?)
--NOTE: we're making an assumption about srcpos encoding here, that
--if token A begins before token B, then srcpos A < srcpos B
leftmost=tree[TREE_TRUNK][CMD_POS]
subtree=tree
while length(subtree[TREE_BRANCHES]) do
subtree=subtree[TREE_BRANCHES][1]
if subtree[TREE_TRUNK][CMD_POS]= SCRIPT_VARIABLES then
local_vars=all_scripts[cur_script_num][SCRIPT_VARIABLES]
for idx=1 to length(local_vars) do
if equal(local_vars[idx][CMD_TEXT],s[CMD_TEXT]) then
if local_vars[idx][VAR_FRAME]=0 then --is local
src_position=decode_srcpos(local_vars[idx][CMD_POS])
src_error(
sprintf_utf(
COLYEL&"%s"&COLRED&" can't be used as a %s because it's already defined as a local variable/argument on line %d of "&COLPNK&"%s"&COLRED,
{s[CMD_TEXT],expect,src_position[POS_LINE],src_position[POS_FILENAME]}
)
,s[CMD_POS]
)
end if
end if
end for
end if
end if
end procedure
---------------------------------------------------------------------------
function force_16_bit(atom n, srcpos pos)
if n>32767 then
src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"32767"&COLRED,{n}),pos)
n=32767
elsif n<-32768 then
src_warn(sprintf_utf("number "&COLYEL&"%d"&COLRED&" is out of range for a 16-bit signed integer, and will be truncated to "&COLYEL&"-32768"&COLRED,{n}),pos)
n=-32768
end if
return(n)
end function
---------------------------------------------------------------------------
function try_string_to_number(Token s)
atom result
result=floor(string_to_object(s[CMD_TEXT],0))
if not string_is_int32(s[CMD_TEXT]) then
src_error(sprintf_utf("Expected number but found "&COLYEL&"%s"&COLRED,{s[CMD_TEXT]}),s[CMD_POS])
end if
return(result)
end function
---------------------------------------------------------------------------
function enforce_constants(string s)
--enforces both constants
object v
v=map:get(constant_list,s,{})
if length(v) then
v=v[CONST_VALUE]
if int32(v) then
return(sprintf("%d",{v}))
end if
end if
return(s)
end function
---------------------------------------------------------------------------
function get_cmd()
Token result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=enforce_constants(result[CMD_TEXT])
return(result)
end function
---------------------------------------------------------------------------
function get_cmd_no_constants()
Token result
if get_cmd_pointer>length(cmd) then
src_error("Unexpected end of file",cmd[length(cmd)][CMD_POS])
end if
result=cmd[get_cmd_pointer]
get_cmd_pointer+=1
result[CMD_TEXT]=result[CMD_TEXT]
return(result)
end function
---------------------------------------------------------------------------
--Returns a list of tokens enclosed in begin,end, with no nested begin,end allowed.
function get_cmd_block(integer convert_constants)
Token this
TokenList result
result={}
this=get_cmd()
if compare("begin",this[CMD_TEXT])!=0 then
src_error(sprintf_utf("Expected "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" bracket but found "&COLYEL&"%s"&COLRED,{this[CMD_TEXT]}),this[CMD_POS])
end if
while true do
if convert_constants then
this=get_cmd()
else
this=get_cmd_no_constants()
end if
if compare("begin",this[CMD_TEXT])=0 then
src_error("Recursive "&COLYEL&"begin"&COLRED&" and "&COLYEL&"("&COLRED&" brackets are not permitted in this block",this[CMD_POS])
elsif compare("end",this[CMD_TEXT])=0 then
exit--break out of the while
else
result=append(result,this)
end if
end while
return(result)
end function
---------------------------------------------------------------------------
--Get a block containing balanced begin,end pairs.
--Returns {ptr,TokenList}
--Initial ptr is 1 if we already grabbed the opening 'begin'.
function get_cmd_depth(integer ptr, TokenList data, integer depth)
TokenList result
Token this
result={}
while true do
--if get_key()=27 then abort(1/0) end if
if ptr>length(data) then
src_error("block ended prematurely. Missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&"?",data[length(data)][CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
depth-=1
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
end if
if depth=0 then
exit --break out of the while
else
result=append(result,this)
end if
end while
return({ptr,result})
end function
---------------------------------------------------------------------------
--Like get_cmd_block, but allowed to contain balanced begin,end pairs.
function get_cmd_block_nestable(integer convert_constants)
sequence ptr_and_block
TokenList block
ptr_and_block=get_cmd_depth(get_cmd_pointer,cmd,0)
get_cmd_pointer=ptr_and_block[1]
block=ptr_and_block[2][2..$] --trim leading begin
if convert_constants then
for i=1 to length(block) do
block[i][CMD_TEXT]=enforce_constants(block[i][CMD_TEXT])
end for
end if
return block
end function
---------------------------------------------------------------------------
procedure parse_constant_block(TokenList block)
atom num
string name
srcpos pos
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name to follow but defineconstant block ended",block[i][CMD_POS])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
check_undefined_constant(block[i+1],num)
name=block[i+1][CMD_TEXT]
pos=block[i+1][CMD_POS]
map:put(constant_list,name,{num,pos})
map:put(global_scope,name,{KIND_CONSTANT,num,pos})
end for
end procedure
---------------------------------------------------------------------------
procedure parse_trigger_block(TokenList block)
integer num
string name
srcpos pos
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("Expected name but script trigger definition block ended",block[i][CMD_POS])
end if
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
check_undefined_string(block[i+1],"script trigger name")
name=block[i+1][CMD_TEXT]
pos=block[i+1][CMD_POS]
trigger_list=append(trigger_list,{num,name})
map:put(global_scope,name,{KIND_TOPLEVEL,0,pos})
end for
end procedure
---------------------------------------------------------------------------
procedure parse_version_block(TokenList block)
string req_version
if length(block)<2 then
--Don't have a srcpos
simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (empty 'plotscr version' block)")
end if
plotscr_version=block[1][CMD_TEXT] --version ID of this plotscr.hsd version (equal to the version of HSpeak at the time)
req_version=block[2][CMD_TEXT] --the minimum version of HSpeak which supports this plotscr.hsd
--trim quote marks
if plotscr_version[1]!='"' or req_version[1]!='"' then
simple_error(COLYEL&"plotscr.hsd"&COLRED&" is malformed (expected strings in 'plotscr version')")
end if
plotscr_version=plotscr_version[2..$-1]
req_version=req_version[2..$-1]
if compare(plotscr_version, MIN_PLOTSCR_VERSION) < 0 then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.")
end if
--printf(stdout,"'%s' '%s'\n", {req_version, COMPILER_VERSION&COMPILER_SUB_VERSION})
if compare(req_version, COMPILER_VERSION&COMPILER_SUB_VERSION) > 0 then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" (version "&trim_whitespace(plotscr_version)&") from a newer version of the OHRRPGCE than supported by this version of HSpeak. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE.")
end if
end procedure
---------------------------------------------------------------------------
procedure create_global(atom id, string name, srcpos pos)
integer at
at=find(id,global_list[PAIR_NUM])
if at then
src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{id,global_list[PAIR_NAME][at]}),pos)
else
if id>=0 and id<=MAXGLOBAL then
global_list[PAIR_NUM]=append(global_list[PAIR_NUM],id)
global_list[PAIR_NAME]=append(global_list[PAIR_NAME],name)
global_list[GLB_POS]=append(global_list[GLB_POS],pos)
map:put(global_scope,name,{KIND_GLOBAL,id,pos})
else
src_error(sprintf_utf("global variable ID "&COLYEL&"%d"&COLRED&" is not permitted. Valid IDs are 0 to %d",{id,MAXGLOBAL}),pos)
end if
end if
end procedure
---------------------------------------------------------------------------
procedure parse_global_block(TokenList block)
atom num
for i=1 to length(block) by 2 do
if i+1>length(block) then
src_error("expected name but globalvariable block ended",block[i][CMD_POS])
end if
num=try_string_to_number({enforce_constants(block[i][CMD_TEXT]),block[i][CMD_POS]})
check_undefined_string(block[i+1],"global variable name")
create_global(num,block[i+1][CMD_TEXT],block[i][CMD_POS])
end for
end procedure
---------------------------------------------------------------------------
procedure parse_operator_block(TokenList block)
atom num
Symbol sym
Kind kind
string name,truename
srcpos pos
for i=1 to length(block) by 3 do
if i+2>length(block) then
src_error("expected name but defineoperator block ended",block[i][CMD_POS])
end if
num=try_string_to_number(block[i])
mustnt_be_a_number(block[i+1])
mustnt_be_a_number(block[i+2])
name=block[i+1][CMD_TEXT]
truename=block[i+2][CMD_TEXT]
pos=block[i+2][CMD_POS]
operator_list=append(operator_list,{num,name,truename,pos})
sym=map:get(global_scope,name,nil_sym)
kind=sym[SYM_KIND]
if kind then
if kind!=KIND_SEPARATOR and kind!=KIND_MATH then
src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" may not be used as an operator",{KIND_LONGNAMES[kind],name}),pos)
else
--Existing names do not get added to global_scope, because we need to be able to use
--global_scope to map names like 'mod' to the math function rather than the operator.
--Instead, operator_list is consulted directly to lookup operators.
end if
else
--Add to global_scope to prevent it from being reused for anything else.
map:put(global_scope,name,{KIND_OPERATOR,length(operator_list),pos})
end if
end for
end procedure
---------------------------------------------------------------------------
--Decide which ID number to assign to an autonumbered script.
--name isn't fully-scoped.
function pick_script_id(integer parent_id, string name)
integer at, id
sequence encoded
if length(reuse_ids) then
encoded=encode_ohr(name)
if length(encoded)>36 then
encoded=encoded[1..36]
end if
at=find_in_column({parent_id,encoded},reuse_ids,REUSE_PARENT_AND_NAME)
if at then
return reuse_ids[at][PAIR_NUM]
end if
end if
loop do
autonumber_id-=1
id=autonumber_id
until find_in_column(id,reuse_ids,PAIR_NUM)=0
end loop
return id
end function
---------------------------------------------------------------------------
--Compute bits to add to FUNC_FLAGS from a FUNC_ATTRS sequence
function compute_attribute_bits(sequence attributes)
integer at
integer attr_bits
attr_bits=0
for i=1 to length(attributes) do
at=find_in_column(attributes[i][CMD_TEXT],attribute_list,ATTR_NAME)
if at=0 then
simple_error("Compiler bug! compute_attribute_bits can't handle "&attributes[i][CMD_TEXT])
end if
attr_bits=or_bits(attr_bits,attribute_list[at][ATTR_FLAGS])
end for
return attr_bits
end function
---------------------------------------------------------------------------
--arglist is either a sequence of default values (which may equal to the NO_DEFAULT constant), or it's equal to
--VAR_ARGS indicating a variable (unlimited) number of arguments.
--id may only be -1 (autonumbered) when there is no parent script (so that the unscoped
--and fully-scoped name are the same), otherwise pick_script_id must be called explicitly!
function create_function(sequence list, integer id, string name, object arglist, integer func_type, srcpos pos, sequence attributes)
integer at, attr_bits
if func_type=KIND_SCRIPT then
at=find(id,column(list,PAIR_NUM))
if at then
src_error(sprintf_utf("%s ID "&COLYEL&"%d"&COLRED&" is already defined as "&COLYEL&"%s"&COLRED,{KIND_LONGNAMES[func_type],id,list[at][PAIR_NAME]}),pos)
end if
if id=0 or id<-1 then
src_error(sprintf_utf("ID "&COLYEL&"%d"&COLRED&" is not valid",{id}),pos)
elsif id=-1 then
--autonumber
id=pick_script_id(0,name)
end if
end if
attr_bits=compute_attribute_bits(attributes)
list=append(list,{id,name,arglist,attributes,attr_bits,pos})
map:put(global_scope,name,{func_type,id,pos})
return(list)
end function
---------------------------------------------------------------------------
--Read zero or more attributes (token prefixed with @ to set them apart, optionally with parameters)
--from current position 'ptr' in 'block'.
--We assume that this is never called on a token that might be a reference instead of an attribute
--(eg by first checking for KIND_ATTRIBUTE)
function parse_attributes(TokenList block, integer ptr)
sequence attributes
sequence attr_args, ptr_and_block
string attr_name
integer at
attributes={}
while ptr<=length(block) and block[ptr][CMD_TEXT][1]='@' do
attr_name=block[ptr][CMD_TEXT]
at=find_in_column(attr_name,attribute_list,ATTR_NAME)
if at=0 then
src_error("Unrecognised function attribute "&COLYEL&attr_name&COLRED, block[ptr][CMD_POS])
end if
if ptr+10 and i>length(block) then
src_error(sprintf_utf("expected %s id to follow, but define block ended",{KIND_LONGNAMES[func_type]}),block[i-1][CMD_POS])
end if
--ID
num=force_16_bit(try_string_to_number(block[i]),block[i][CMD_POS])
if i+1>length(block) then
src_error(sprintf_utf("expected %s name but define block ended",{KIND_LONGNAMES[func_type]}),block[i][CMD_POS])
end if
--Name
i+=1
check_undefined_string(block[i],KIND_LONGNAMES[func_type]&" name")
name=block[i][CMD_TEXT]
name_pos=block[i][CMD_POS]
if i+1>length(block) then
src_error("expected argument count but define block ended",block[i][CMD_POS])
end if
--Get arglist
i+=1
args=try_string_to_number(block[i])
arglist={}
if args<0 then
if func_type=KIND_SCRIPT then
src_error("Number of arguments for a script can't be negative (variable number of arguments aren't supported)",block[i][CMD_POS])
end if
arglist=VAR_ARGS
else
for j=1 to args do
if i+1>length(block) then
src_error("expected argument default but define block ended",block[i][CMD_POS])
end if
i+=1
arglist=append(arglist,try_string_to_number(block[i]))
end for
if args>MAX_ARGS then
src_error(sprintf_utf("Number of arguments (%d) is more than allowed (%d)",{args,MAX_ARGS}),block[i][CMD_POS])
end if
end if
i+=1
list=create_function(list,num,name,arglist,func_type,name_pos,attributes)
end while
return(list)
end function
---------------------------------------------------------------------------
procedure preliminary_parse_pass()
Token this
enter_timing_zone("Preliminary pass")
color_print("preliminary pass\n",{})
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
if equal("defineconstant",this[CMD_TEXT]) then
parse_constant_block(get_cmd_block(false))
elsif equal("definetrigger",this[CMD_TEXT]) then
parse_trigger_block(get_cmd_block(false))
elsif equal("plotscrversion",this[CMD_TEXT]) then
parse_version_block(get_cmd_block(false))
end if
end while
--Check for very old plotscr.hsd
if equal(plotscr_version,"") then
if find("plotscr.hsd",column(file_list,FILE_NAME)) then
simple_error("You have included a copy of "&COLYEL&"plotscr.hsd"&COLRED&" from an old release of the OHRRPGCE. Please use the copies of plotscr.hsd and HSpeak provided with the OHRRPGCE..")
end if
--If plotscr.hsd is not included then we're compiling with -b, so don't complain
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
--Parse a script or subscript, finding the list of tokens (NOT yet transformed into a tree),
--and appending an entry to all_scripts.
--parent_name is a fully-scoped name of parent script, or ""
--if this script isn't a subscript of another.
--Returns Token with name of parsed script
function parse_script(Token trigger, string parent_name, integer parent_idx, sequence attributes)
Token name
string full_name
TokenList arglist
TokenList body
Token this
string err_string
string previous_script
sequence src_position
string msg
integer depth
integer nest_depth
Symbol sym
integer temp
integer idx
sequence subscript_attributes
sequence attrs_and_ptr
name=get_cmd()
if parent_idx=0 then
full_name=name[CMD_TEXT]
nest_depth=0
else
full_name=sprintf_utf("%s<-%s",{parent_name,name[CMD_TEXT]})
nest_depth=all_scripts[parent_idx][SCRIPT_NEST_DEPTH]+1
if nest_depth>MAX_NEST_DEPTH then
src_error(
sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is nested too deeply (%d layers deep), which is more than is supported. Reorganise your scripts to not use so much nesting."
,{trigger[CMD_TEXT],name[CMD_TEXT],nest_depth})
,name[CMD_POS]
)
end if
end if
--Check name not already in use... unless it appears in a definescript block; if it did the name is already reserved.
--Note that we won't add it to script_list or global_scope yet; that happens later in check_script_declarations
if not find_in_column(full_name,script_list,PAIR_NAME) then
cur_script_num=parent_idx --temporarily to check for subscripts
check_undefined_string(name, COLYEL &trigger[CMD_TEXT] &COLRED &" name")
cur_script_num=0 --This probably isn't needed
end if
previous_script=current_script
current_script=full_name --name[CMD_TEXT]
--Parse arguments
arglist={}
while true do
if get_cmd_pointer>length(cmd) then
src_error(sprintf_utf("%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{trigger[CMD_TEXT],name[CMD_TEXT]}),name[CMD_POS])
end if
this=get_cmd()
if compare("begin",this[CMD_TEXT])=0 then
exit--break the while
end if
--Note that arglist isn't the actual list of arguments, it's just a string of tokens right now (including defaults).
--The actual arglist is built in process_arglist, while compiling each script.
arglist=append(arglist,this)
end while
--Create all_scripts entry now for recursive calls; SCRIPT_BODY_TOKS and SCRIPT_END_POS filled in later,
--and more fields are appended in compile_a_script
all_scripts=append(all_scripts,{
trigger
,name[CMD_TEXT]
,name[CMD_POS]
,full_name
,arglist
,{} --SCRIPT_BODY_TOKS
,parent_idx
,nest_depth
,attributes
,0 --SCRIPT_END_POS
})
idx=length(all_scripts)
--Read the body
--Every script is nested inside a big fat do() block
body={{"do",virtual_pos(name[CMD_POS])}}
depth=0
subscript_attributes={}
while true do
--If we just read some attribute statements, they must precede a subscript
if length(subscript_attributes)>0 and equal("subscript",this[CMD_TEXT])=0 then
src_error(
sprintf_utf(
"Expected a subscript to follow function attribute "&COLYEL&"%s"&COLRED&", but found "&COLYEL&"%s"&COLRED
,{subscript_attributes[$][CMD_TEXT],this[CMD_TEXT]}
)
, this[CMD_POS])
end if
body=append(body,this)
if compare("end",this[CMD_TEXT])=0 then
depth-=1
if depth=0 then
all_scripts[idx][SCRIPT_END_POS]=this[CMD_POS]
exit--break while
end if
elsif compare("begin",this[CMD_TEXT])=0 then
depth+=1
elsif compare("subscript",this[CMD_TEXT])=0 then
--Recurse, so body of subscript doesn't appear in this script.
--The subscript node stays, for syntax checking, but is removed in normalize_flow_control
parse_script(this,full_name,idx,subscript_attributes)
subscript_attributes={}
body&={{"begin",this[CMD_POS]},{"end",this[CMD_POS]}}
else
sym=map:get(global_scope,this[CMD_TEXT],nil_sym)
temp=sym[SYM_KIND]
if temp=KIND_ATTRIBUTE then
--Read all subscript attributes, and don't put them in this script's body
body=body[1..$-1]
attrs_and_ptr=parse_attributes(cmd,get_cmd_pointer-1)
subscript_attributes=attrs_and_ptr[1]
get_cmd_pointer=attrs_and_ptr[2]
elsif temp>KIND_LAST_VALID_IN_SCRIPT then
err_string=sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" is not permitted inside a script.",
{KIND_LONGNAMES[temp], this[CMD_TEXT]}
)
if temp=KIND_TOPLEVEL then
err_string&=sprintf_utf(" Perhaps "&COLYEL&"%s"&COLRED&" has an extra "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED&" or is missing a "&COLYEL&")"&COLRED&" or "&COLYEL&"end"&COLRED&".",{name[CMD_TEXT]})
end if
src_error(err_string,this[CMD_POS])
end if
end if
if get_cmd_pointer>length(cmd) then
src_error(
sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" is missing "&COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED
,{trigger[CMD_TEXT],name[CMD_TEXT]}
)
,name[CMD_POS]
)
end if
this=get_cmd()
end while
all_scripts[idx][SCRIPT_BODY_TOKS]=body
current_script=previous_script
return name
end function
---------------------------------------------------------------------------
procedure parse_top_level()
Token this
sequence triggers, attributes
sequence attrs_and_ptr
integer triggeridx
enter_timing_zone("Top level pass")
color_print("parsing top-level\n",{})
triggers=column(trigger_list,PAIR_NAME)
attributes={}
get_cmd_pointer=1
while get_cmd_pointer<=length(cmd) do
--read a top-level command
this=get_cmd()
triggeridx=find(this[CMD_TEXT],triggers)
--If we just read some attribute statements, they must precede a script
if length(attributes)>0 and triggeridx=0 then
src_error(
sprintf_utf(
"Expected a script to follow function attribute "&COLYEL&"%s"&COLRED&", but found "&COLYEL&"%s"&COLRED
,{attributes[$][CMD_TEXT],this[CMD_TEXT]}
)
, this[CMD_POS])
end if
if compare("defineconstant",this[CMD_TEXT])=0 then
get_cmd_block(true)
elsif compare("definetrigger",this[CMD_TEXT])=0 then
get_cmd_block(true)
elsif compare("globalvariable",this[CMD_TEXT])=0 then
parse_global_block(get_cmd_block(true))
elsif compare("defineoperator",this[CMD_TEXT])=0 then
parse_operator_block(get_cmd_block(true))
elsif compare("definefunction",this[CMD_TEXT])=0 then
function_list=parse_define_block(get_cmd_block_nestable(true),function_list,KIND_FUNCTION)
elsif compare("definescript",this[CMD_TEXT])=0 then
script_list=parse_define_block(get_cmd_block(true),script_list,KIND_SCRIPT)
elsif compare("plotscrversion",this[CMD_TEXT])=0 then
--ignore
get_cmd_block(false)
elsif triggeridx>0 then
--"script"/etc. Negative trigger IDs are subscripts
if trigger_list[triggeridx][PAIR_NUM]<0 then
src_error(
sprintf_utf(
COLYEL&"%s"&COLRED&" can only occur inside another script. Use "&COLYEL&"script"&COLRED&" instead for regular scripts."
,{this[CMD_TEXT]}
)
,this[CMD_POS]
)
else
parse_script(this,{},0,attributes)
attributes={}
end if
elsif this[CMD_TEXT][1]='@' then
--This will keep reading all consecutive attributes until there are no more
attrs_and_ptr=parse_attributes(cmd,get_cmd_pointer-1)
attributes=attrs_and_ptr[1]
get_cmd_pointer=attrs_and_ptr[2]
else
check_for_reserved(this[CMD_TEXT],this[CMD_POS],"top-level declaration")
src_error(
sprintf_utf(
"Expected top-level declaration but found "&COLYEL&"%s"&COLRED
,{this[CMD_TEXT]}
)
,this[CMD_POS]
)
end if
end while
if length(attributes)>0 then
--If we just read some attribute statements, they must precede a script
src_error("Expected a script to follow function attribute, but file ended", this[CMD_POS])
end if
cmd={} --Free memory
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
procedure dump_script_and_function_info(integer fh, sequence list)
sequence this
string id_string
sequence src_position
for i=1 to length(list) do
this=list[i]
if this[PAIR_NUM]>autonumber_id then
id_string=sprintf("AUTONUMBER=%d",{this[PAIR_NUM]})
else
id_string=sprintf("ID=%d",{this[PAIR_NUM]})
end if
src_position=decode_srcpos(this[FUNC_POS])
print_utf(fh,"%s %d\t%s\t%s(",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,id_string
,this[PAIR_NAME]
})
if equal(this[FUNC_ARGS],VAR_ARGS) then
print_utf(fh,"VARIABLE ARGS",{})
else
for j=1 to length(this[FUNC_ARGS]) do
if j>1 then
print_utf(fh,",",{})
end if
if equal(this[FUNC_ARGS][j],NO_DEFAULT) then
print_utf(fh,"REQ",{})
else
print_utf(fh,"%d",{this[FUNC_ARGS][j]})
end if
end for
end if
print_utf(fh,")\n",{})
end for
end procedure
---------------------------------------------------------------------------
function seek_string_by_id(integer id, sequence list, string name)
integer at
at=find_in_column(id,list,PAIR_NUM)
if at then
return(list[at][PAIR_NAME])
else
simple_error(sprintf_utf("decompiler couldnt find %s ID "&COLYEL&"%d"&COLRED,{name,id}))
end if
return("")
end function
---------------------------------------------------------------------------
--Used while dumping script binary
function name_lookup(sequence pair, VarList locals)
integer at
integer frame,id
if pair[1]=KIND_NUMBER then
return(sprintf("%d",{pair[2]}))
elsif pair[1]=KIND_LOCAL or pair[1]=KIND_NONLOCAL then
frame=and_bits(pair[2],#FF00)/#100
id=and_bits(pair[2],#FF)
for i=1 to length(locals) do
if locals[i][VAR_FRAME]=frame and locals[i][VAR_ID]=id then
return locals[i][CMD_TEXT]
end if
end for
simple_error(sprintf_utf("HSpeak bug: decompiler found illegal variable ID "&COLYEL&"%d*256+%d"&COLRED,{frame,id}))
elsif pair[1]=KIND_GLOBAL then
at=find(pair[2],global_list[PAIR_NUM])
if at then
return(global_list[PAIR_NAME][at])
else
simple_error(sprintf_utf("decompiler couldnt find global variable ID "&COLYEL&"%d"&COLRED,{pair[2]}))
end if
elsif pair[1]=KIND_FLOW then
return(seek_string_by_id(pair[2],flow_list,"flow control structure"))
elsif pair[1]=KIND_SCRIPT then
return(seek_string_by_id(pair[2],script_list,"user script"))
elsif pair[1]=KIND_FUNCTION then
return(seek_string_by_id(pair[2],function_list,"hardcoded function"))
elsif pair[1]=KIND_MATH then
return(seek_string_by_id(pair[2],math_list,"built-in function"))
else
simple_error(sprintf_utf("HSpeak bug: decompiler found illegal kind "&COLYEL&"%d"&COLRED,{pair[1]}))
end if
end function
---------------------------------------------------------------------------
function binstring_to_int(sequence encoded)
--bytes_to_int is NOT the opposite of int_to_bytes, it can't handle negative numbers, which int_to_bytes mangles
atom temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100+and_bits(encoded[3],#FF)*#10000+and_bits(encoded[4],#FF)*#1000000
if and_bits(temp,#80000000) then
return(temp-#100000000)
end if
return(temp)
end function
---------------------------------------------------------------------------
function read_word(sequence encoded)
--opposite of output_word. encoded should be length 2 or more
integer temp
temp=and_bits(encoded[1],#FF)+and_bits(encoded[2],#FF)*#100
if and_bits(temp,#8000) then
return(temp-#10000)
end if
return(temp)
end function
---------------------------------------------------------------------------
--Convenience wrapper around read_word, to read an int16 with offset given
--in int16 words and starting from 0 instead of 1.
function read_int16(sequence encoded, integer offset)
return read_word(encoded[1+2*offset..2+2*offset])
end function
---------------------------------------------------------------------------
function dump_script_binary(sequence bin, integer offset, integer depth, VarList locals)
string result
sequence kind_and_id
integer kind
integer argcount
integer new_offset
result=""
kind_and_id={binstring_to_int(bin[offset*4+1..offset*4+4]),binstring_to_int(bin[offset*4+5..offset*4+8])}
kind=kind_and_id[1]
result&=sprintf_utf("%s%s",{
repeat(' ',depth) --indent
,name_lookup(kind_and_id,locals)
})
if kind=KIND_FLOW or kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_MATH then
argcount=binstring_to_int(bin[1+offset*4+8..1+offset*4+11])
if argcount then
result&="(\n"
for i=0 to argcount-1 do
new_offset=binstring_to_int(bin[1+(offset+3+i)*4..1+(offset+3+i)*4+3])
result&=dump_script_binary(bin,new_offset,depth+2,locals)
end for
result&=repeat(' ',depth)&")\n"
else
result&="()\n"
end if
else
result&="\n"
end if
return(result)
end function
---------------------------------------------------------------------------
function dump_script_tree(NodeList tree, integer depth)
string result
result=""
for i=1 to length(tree) do
result&=sprintf_utf("%s%s",{
repeat(' ',depth)--indent
,tree[i][TREE_TRUNK][CMD_TEXT]
})
if length(tree[i][TREE_BRANCHES])>0 then
result&="(\n"
result&=dump_script_tree(tree[i][TREE_BRANCHES],depth+2)
result&=repeat(' ',depth)&")\n"
else
result&="\n"
end if
end for
return(result)
end function
---------------------------------------------------------------------------
function dump_script_strings(sequence bin)
string result
integer table_start
integer offset
integer len
result=""
table_start=binstring_to_int(bin[9..12])
if table_start=0 then
return("")
end if
bin=bin[table_start+1..$]
offset=1
while offset1 then
debug_file=normalize_filename(path_only(dest_file)&"hs_debug.txt")
else
debug_file="hs_debug.txt"
end if
fh=open(debug_file,"w")
if fh!=failure then
wrap_print("writing debug report file "&COLBWHI&"%s"&COLWHI&"\n",{debug_file})
-------------------------------------
print_utf(fh,"[Scripts]\n",{})
dump_script_and_function_info(fh,script_list)
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Global Variables]\n",{})
for i=1 to length(global_list[PAIR_NUM]) do
src_position=decode_srcpos(global_list[GLB_POS][i])
print_utf(fh,"%s %d\tID=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,global_list[PAIR_NUM][i]
,global_list[PAIR_NAME][i]
})
end for
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Builtin Functions]\n",{})
dump_script_and_function_info(fh,function_list)
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Operators]\n",{})
for i=1 to length(operator_list) do
src_position=decode_srcpos(operator_list[i][OPER_POS])
print_utf(fh,"%s %d\t%s\t%s\tPriority=%d\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,operator_list[i][PAIR_NAME]
,operator_list[i][OPER_TRUENAME]
,operator_list[i][PAIR_NUM]
})
end for
print_utf(fh,"\n",{})
-------------------------------------
print_utf(fh,"[Script Dumps]\n",{})
for i=1 to length(all_scripts) do
script=all_scripts[i]
src_position=decode_srcpos(script[SCRIPT_POS])
print_utf(fh,"%s %d\tID=%d\t%s (%s)\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,script[SCRIPT_ID]
,script[SCRIPT_NAME]
,script[SCRIPT_FULL_NAME]
})
idx=script[SCRIPT_PARENT_IDX]
if idx then
src_position=decode_srcpos(script[SCRIPT_POS])
print_utf(fh,"%s %d\tparent=ID %d\t%s (access to %d nonlocals)\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,all_scripts[idx][SCRIPT_ID]
,all_scripts[idx][SCRIPT_NAME]
,script[SCRIPT_NUM_NONLOCALS]
})
end if
src_position=decode_srcpos(script[SCRIPT_TRIGGER_TOK][CMD_POS])
print_utf(fh,"%s %d\tTrigger=%d\t%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,script[SCRIPT_TRIGGER_ID]
,script[SCRIPT_TRIGGER_TOK][CMD_TEXT]
})
locals=script[SCRIPT_VARIABLES]
for j=1 to length(locals) do
src_position=decode_srcpos(locals[j][CMD_POS])
print_utf(fh,"%s %d\tvar(frame %d id %d)=%s\n",{
src_position[POS_FILENAME]
,src_position[POS_LINE]
,locals[j][VAR_FRAME]
,locals[j][VAR_ID]
,locals[j][CMD_TEXT]
})
end for
print_utf(fh,"%d bytes compiled\n",{length(script[SCRIPT_BINARY])})
-- print_utf(fh,"%s\n\n",{dump_script_tree(script[SCRIPT_AST],0)})
print_utf(fh,"%s",{dump_script_binary(script[SCRIPT_BINARY][CODE_START_BYTE_OFFSET+1..$],0,0,script[SCRIPT_VARIABLES])})
print_utf(fh,"%s\n\n",{dump_script_strings(script[SCRIPT_BINARY])})
end for
print_utf(fh,"\n",{})
-------------------------------------
close(fh)
else
wrap_print("Error opening debug report file "&COLBWHI&"%s"&COLRED&"\n",{debug_file})
end if
exit_timing_zone()
end if
end procedure
---------------------------------------------------------------------------
--Look up the name of a [sub]script in all current scopes, returning all_scripts
--index if found, or 0 if not
function lookup_scoped_script_name(string name)
string s
integer at
integer idx
idx=cur_script_num
while true do
if idx then
s=all_scripts[idx][SCRIPT_FULL_NAME]&"<-"&name
else
s=name
end if
at=find_in_column(s,all_scripts,SCRIPT_FULL_NAME)
if at then
return at
end if
if idx=0 then
return 0
end if
idx=all_scripts[idx][SCRIPT_PARENT_IDX]
end while
end function
---------------------------------------------------------------------------
--identify the kind and id of a text command. Does not support untranslated operators or floaty parethesis
function what_kind_and_id(Token command, VarList local_vars, integer look_for_operators)
integer kind
integer at
atom id
Symbol sym
string s
s=command[CMD_TEXT]
if length(s)=0 then
kind=KIND_PARENS
id=0
elsif (s[1] >= '0' and s[1] <= '9') or s[1] = '-' then
kind=KIND_NUMBER
id=string_to_object(s,{})
elsif look_for_operators and find_in_column(s,operator_list,PAIR_NAME) then
kind=KIND_OPERATOR --this MUST go before KIND_MATH, because some operators and math functions have the same name
id=0
else
sym=map:get(global_scope,s,nil_sym)
if sym[SYM_KIND] then
kind=sym[SYM_KIND]
id=sym[SYM_ID]
elsif s[1] = '@' then
kind=KIND_REFERENCE
id=0 -- ID always resolves to 0 for references here, since it is too early to know all
-- script IDs (really? Surely false). The real work is done in binary_compile_recurse
elsif s[1] = '"' then
src_error(
sprintf_utf(
"The string "&COLYEL&"%s"&COLRED&" is illegal here: strings may only be used as part of a $...=\"...\" or $...+\"...\" construct",
{shorten_string(s,15)}),
command[CMD_POS]
)
else
--Local variables can't shadow any names in global scope, they only shadow
--nonlocal variables. So cen check this last
at=find_in_column(s,local_vars,CMD_TEXT)
if at then
if local_vars[at][VAR_FRAME]=0 then
kind=KIND_LOCAL
else
kind=KIND_NONLOCAL
end if
id=local_vars[at][VAR_ID] + 256*local_vars[at][VAR_FRAME]
else
--It may be a subscript, in which case the scoped name needs to be looked up
at=lookup_scoped_script_name(s)
if at then
kind=KIND_SCRIPT
id=all_scripts[at][SCRIPT_ID]
else
src_error(sprintf_utf("Unrecognised name "&COLYEL&"%s"&COLRED&". It has not been defined as script, constant, variable, or anything else",{s}),command[CMD_POS])
end if
end if
end if
end if
return({kind,id})
end function
---------------------------------------------------------------------------
--Check whether a math operator is not an assignment, a compare operator, or 'random'
function is_simple_math_op(integer id)
return (id >= 1 and id <= 9) or id >= 19
end function
---------------------------------------------------------------------------
--Given name (CMD_TEXT) of a Node, return whether it is a block: all its arguments are
--statements as opposed to expressions (if, for, while, switch, which take a mix need special handling)
function block_command(string name)
return compare("do",name)=0 or compare("then",name)=0 or compare("else",name)=0
end function
---------------------------------------------------------------------------
procedure check_is_expression(Node node, string parent)
Symbol sym
Kind kind
--Local variables are always expressions
sym=map:get(global_scope,node[TREE_TRUNK][CMD_TEXT],nil_sym)
kind=sym[SYM_KIND]
--This function is always called after convert_macros, so we don't need to
--worry about whether a particular macro is an expression
if kind=KIND_FLOW or kind=KIND_KEYWORD then
src_error(
sprintf_utf(COLYEL&"%s"&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&"%s"&COLRED&")"
,{node[TREE_TRUNK][CMD_TEXT],parent})
,node[TREE_TRUNK][CMD_POS])
end if
end procedure
---------------------------------------------------------------------------
--this function not used anywhere
function how_many_args(string name, integer kind)
integer result
integer at
if kind=KIND_PARENS then
result=-1 --parens support (n,operator,n) but if one of n is an operator, it comes out to be more :P
elsif kind=KIND_FLOW then
result=-1 --flow supports an unknown number of args
elsif kind=KIND_OPERATOR then
result=0 -- its important that operators behave as zero-arg-thingamabobs before they are translated into builtin math functions
elsif kind=KIND_MATH then
at=find_in_column(name[CMD_TEXT],math_list,PAIR_NAME)
result=length(math_list[at][FUNC_ARGS])
elsif kind=KIND_FUNCTION then
at=find_in_column(name[CMD_TEXT],function_list,PAIR_NAME)
if equal(function_list[at][FUNC_ARGS],VAR_ARGS) then
result=-1
else
result=length(function_list[at][FUNC_ARGS])
end if
elsif kind=KIND_SCRIPT then
at=find_in_column(name[CMD_TEXT],script_list,PAIR_NAME)
result=length(script_list[at][FUNC_ARGS])
elsif kind=KIND_MACRO then
result=-1 --unknown number of args
else
--numbers, variables, etc do not permit args
result=0
end if
return(result)
end function
---------------------------------------------------------------------------
--Something that potentially takes args
function takes_args(integer kind)
if kind=KIND_PARENS or kind=KIND_FLOW or kind=KIND_MATH or kind=KIND_FUNCTION or kind=KIND_SCRIPT or kind=KIND_MACRO or kind=KIND_KEYWORD then
return(true)
else
return(false)
end if
end function
---------------------------------------------------------------------------
--Get next AST node from a list of tokens by pairing up begin,end
--Returns {new_ptr,{token,children}} where children is a TokenList not a NodeList
function get_script_node(integer ptr, TokenList data, VarList vars)
Token command
sequence ptr_and_block
TokenList block
sequence temp
integer kind
integer followed_by_begin
block={}
command=data[ptr]
ptr+=1
if compare("end",command[CMD_TEXT])=0 then
src_error(COLYEL&"end"&COLRED&" or "&COLYEL&")"&COLRED&" without "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,command[CMD_POS])
elsif compare("begin",command[CMD_TEXT])=0 then
--floaty brackets for order-of-operations-enforcement
ptr-=1
command[CMD_TEXT]=""
end if
temp=what_kind_and_id(command,vars,true) --check for KIND_OPERATOR too
kind=temp[1]
followed_by_begin=false
if ptr<=length(data) then
--there is room for args
followed_by_begin=equal("begin",data[ptr][CMD_TEXT])
--distinguishing between functions with and without args means wait() would be ok but noop() would not
if takes_args(kind) then
if followed_by_begin then
--yes, it has args
ptr+=1--only increment the pointer when we have args
ptr_and_block=get_cmd_depth(ptr,data,1)
ptr=ptr_and_block[1]
block=ptr_and_block[2]
if length(block)=0 and kind=KIND_PARENS then
src_error("found empty parentheses not associated with a function call",command[CMD_POS])
end if
end if
end if
end if
if kind=KIND_FLOW then
--Require certain flow control types to be followed by parentheses.
--We have to check this here or earlier, because after building the parse tree
--there will be no difference between "()" and omitted parentheses.
--(Note that several later checks in normalize_flow_control are redundant to this)
--FIXME: Ideally we wouldn't actually check anything here, but instead preserve presence
--of empty parentheses to check later (it will matter in future anyway according to typing plan)
--FIXME: excludes else, because of 'case(else)'
if find(command[CMD_TEXT],flow_requiring_brackets) then
if not followed_by_begin then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" must be followed by "&COLYEL&"begin"&COLRED&" or "&COLYEL&"("&COLRED,{command[CMD_TEXT]}),command[CMD_POS])
end if
end if
--else has a special check for 'else,if' mistake.
--'not followed_by_begin' excludes 'else(...)if'
if equal("else",command[CMD_TEXT]) and ptr<=length(data) and equal("if",data[ptr][CMD_TEXT]) and not followed_by_begin then
src_error(COLYEL&"else if"&COLRED&" may not be written with a separator in-between",command[CMD_POS])
--temp=get_script_node(ptr, data, vars) --hack to allow if necessary
--temp[2][1][CMD_TEXT]="elseif"
--return temp
end if
end if
return({ptr,{command,block}})
end function
---------------------------------------------------------------------------
--Returns a NodeList
function build_ast(TokenList script_body_toks, VarList vars)
integer ptr
sequence ptr_and_seminode
sequence seminode --Not a Node, has a TokenList instead of NodeList
Node node
NodeList result
result={}
ptr=1
while true do
ptr_and_seminode=get_script_node(ptr,script_body_toks,vars)
ptr=ptr_and_seminode[1]
seminode=ptr_and_seminode[2]
if length(seminode[TREE_BRANCHES])>0 then
--this seminode has arguments that need parsing
node={seminode[TREE_TRUNK],build_ast(seminode[TREE_BRANCHES],vars)}
else
node=seminode
end if
result=append(result,node)
if ptr>length(script_body_toks) then
exit --break out of while when there is no more data
end if
end while
return(result)
end function
---------------------------------------------------------------------------
--When variables are inherited by a subscript all frame numbers need to be incremented
function shift_variables_frame(VarList vars)
for i=1 to length(vars) do
vars[i][VAR_FRAME]+=1
end for
return vars
end function
---------------------------------------------------------------------------
--Given a VarList with all local & nonlocal variables, returns a VarList
--of just the locals, in-order
function filter_local_variables(VarList vars)
sequence ret
integer next_idx
ret={}
next_idx=0
for i=length(vars) to 1 by -1 do
if vars[i][VAR_FRAME]=0 then
--Is local
if vars[i][VAR_ID]!=next_idx then
--This is really an assumption, not a bug
simple_error("compiler bug! Local vars are not in order")
end if
next_idx+=1
ret=append(ret,vars[i])
end if
end for
return ret
end function
---------------------------------------------------------------------------
procedure add_local_variable(Token var)
Script script
VarList vars
integer id
script=all_scripts[cur_script_num]
vars=script[SCRIPT_VARIABLES]
--First check it's not already defined at global scope
check_undefined_string(var,"local variable name")
--Always add to scope of current script (frame == 0)
id=length(vars)-script[SCRIPT_NUM_NONLOCALS]
--Extend the Token into a VarToken, and prefix to the list, so that it takes
--precedence over shadowed variables
all_scripts[cur_script_num][SCRIPT_VARIABLES]={var & {0,id}} & vars
end procedure
---------------------------------------------------------------------------
--Find the variable declarations in a script
procedure gather_local_vars(TokenList data)
Token this
integer ptr
ptr=1
while true do
this=data[ptr]
ptr+=1
if compare("variable",this[CMD_TEXT])=0 then
if ptr>length(data) then
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("begin",this[CMD_TEXT])=0 then
while true do
if ptr>length(data) then
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
this=data[ptr]
ptr+=1
if compare("end",this[CMD_TEXT])=0 then
exit--break the while
end if
add_local_variable(this)
end while
else
src_error(sprintf_utf(COLYEL&"variable"&COLRED&" should be followed by "&COLYEL&"(name)"&COLRED,{}),this[CMD_POS])
end if
end if
if ptr>length(data) then
exit --break out of the while
end if
end while
end procedure
---------------------------------------------------------------------------
--There are only two, hardcoded, macros right now: tracevalue, assert.
function expand_macros(NodeList tree)
string cmd
string tracecmd
NodeList newtree
NodeList newargs
string new_string
Node string_node, assertstr_node
sequence src_position
sequence temp
srcpos pos, vpos
integer ptr
ptr=1
while ptr<=length(tree) do
--recurse first, because tree[ptr] might be split up (as in 'assert')
if length(tree[ptr][TREE_BRANCHES]) then
tree[ptr][TREE_BRANCHES]=expand_macros(tree[ptr][TREE_BRANCHES])
end if
cmd=tree[ptr][TREE_TRUNK][CMD_TEXT]
if equal(cmd,"tracevalue") or equal(cmd,"tracevalueof") then
tracecmd="tracevalueinternal"
elsif equal(cmd,"showvalueof") then
tracecmd="showvalueofinternal"
else
tracecmd=""
end if
if length(tracecmd) then
newargs={}
tree[ptr][TREE_TRUNK][CMD_TEXT]=tracecmd
pos=tree[ptr][TREE_TRUNK][CMD_POS]
for i=1 to length(tree[ptr][TREE_BRANCHES]) do
new_string=tree_original_text(tree[ptr][TREE_BRANCHES][i])
string_list=append(string_list,new_string)
string_node={ {sprintf("@$string%d",{length(string_list)}),virtual_pos(pos)}, {} }
newargs&={string_node,tree[ptr][TREE_BRANCHES][i]}
end for
tree[ptr][TREE_BRANCHES]=newargs
end if
if equal(cmd,"assert") then
--assert(condition) --> if (not(condition)) then ($assert expression string="condition", assertfailure)
-- and
--assert(x == y) --> if(_asserteq(x, y, assert expression string, "condition")) then (assertfailure)
pos=tree[ptr][TREE_TRUNK][CMD_POS]
vpos=virtual_pos(pos)
src_position=decode_srcpos(pos)
if length(tree[ptr][TREE_BRANCHES])!=1 then
src_error(sprintf_utf(COLYEL&"assert"&COLRED&" statement takes exactly one argument: a condition. It has %d",{length(tree[ptr][TREE_BRANCHES])}),pos)
end if
new_string=sprintf_utf("%s:%d: %s",{src_position[POS_FILENAME],src_position[POS_LINE],tree_original_text(tree[ptr][TREE_BRANCHES][1])})
string_list=append(string_list,new_string)
string_node={ {sprintf("@$string%d",{length(string_list)}),vpos}, {} }
assertstr_node={ {enforce_constants("assertexpressionstring"),vpos}, {} }
newtree={{{"if",vpos},{}}, {{"then",vpos},{}}}
if equal(tree[ptr][TREE_BRANCHES][1][TREE_TRUNK][CMD_TEXT],"equal") then
--Use _asserteq
sequence x,y
temp=tree[ptr][TREE_BRANCHES][1][TREE_BRANCHES] --x, y
x=temp[1]
y=temp[2]
--if(...)
newtree[1][TREE_BRANCHES]={{ {"_asserteq",vpos}, {x,y,assertstr_node,string_node} }}
--then(assertfailure)
newtree[2][TREE_BRANCHES]={{ {"assertfailure",vpos}, {} }}
else
--if(not(condition))
newtree[1][TREE_BRANCHES]={{ {"not",vpos}, {tree[ptr][TREE_BRANCHES][1]} }}
temp={{{"setstringfromtable",vpos},{}}, {{"assertfailure",vpos},{}}}
temp[1][TREE_BRANCHES]={assertstr_node,string_node} --stringnum, string
--then(...)
newtree[2][TREE_BRANCHES]=temp
end if
tree=replace_slice(tree,ptr,ptr,newtree) --tree[ptr..ptr]=newtree
ptr+=1
end if
ptr+=1
end while
return(tree)
end function
---------------------------------------------------------------------------
--tree[ptr] is a partially constructed if Node (lacking else child), followed either by an else or elseif Node.
--Handle else/elseif, parenting to tree[pt], and return the new tree.
function build_else_node(NodeList tree, integer ptr)
NodeList elseargs
string nextcmd
integer eptr
srcpos pos
nextcmd=tree[ptr+1][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr+1][TREE_TRUNK][CMD_POS]
if equal("else",nextcmd) then
--found else
tree[ptr][TREE_BRANCHES]&={tree[ptr+1]}
tree=delete_element(tree,ptr+1)
else -- equal("elseif",nextcmd)
--Split off the if
elseargs={ {{"if",pos},tree[ptr+1][TREE_BRANCHES]} }
--Grab the rest of this if,elseif,...,else chain, to put inside the else
eptr=ptr+2
while eptr<=length(tree) do
nextcmd=tree[eptr][TREE_TRUNK][CMD_TEXT]
--Stop when we see an else but otherwise we don't check correctness until we recurse
if equal(nextcmd,"else") then
eptr+=1
exit
elsif find(nextcmd,{"elseif","then"}) then
--continue
else
exit
end if
eptr+=1
end while
elseargs&=tree[ptr+2..eptr-1]
tree=tree[1..ptr]&tree[eptr..$]
tree[ptr][TREE_BRANCHES]&={ {{"else",pos},elseargs} }
end if
return tree
end function
---------------------------------------------------------------------------
--Build switch AST Nodes, return new tree
function build_switch_node(NodeList tree, VarList vars, integer ptr)
srcpos pos
srcpos argpos
NodeList args
Token thiscmd
NodeList rawargs
NodeList caseargs
Token casecmd
string lastnodename
sequence collected --either a Node or {}
sequence temp
integer have_else
srcpos elsepos
integer expect_case
integer kind
atom id
string else_error_msg
else_error_msg=COLYEL&"else"&COLRED&" case should be placed last inside a "&COLYEL&"switch"&COLRED&" block"
pos=tree[ptr][TREE_TRUNK][CMD_POS]
args=tree[ptr][TREE_BRANCHES] --start with the condition, length 1
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&"switch"&COLRED&" statement has %d expressions. It should have only one. (The body of the switch should go in its "&COLYEL&"do(...)"&COLRED&" block instead.)"
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(COLYEL&"switch"&COLRED&" statement has no expression to match! Write "&COLYEL&"switch (expression) do (...)"&COLRED,pos)
end if
check_is_expression(args[1],"switch")
if ptr>=length(tree) then
src_error(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED,pos)
end if
thiscmd=tree[ptr+1][TREE_TRUNK]
if equal("do",thiscmd[CMD_TEXT])=0 then
src_error(sprintf_utf(COLYEL&"switch"&COLRED&" should be followed by "&COLYEL&"do"&COLRED&", not by "&COLYEL&"%s"&COLRED&".",{thiscmd[CMD_TEXT]}),thiscmd[CMD_POS])
end if
--found do, processing starts here (build arguments to switch, after the expression)
rawargs=tree[ptr+1][TREE_BRANCHES] --contents of the 'do'
lastnodename=""
have_else=false
expect_case=false
collected={}
for j=1 to length(rawargs) do
thiscmd=rawargs[j][TREE_TRUNK]
argpos=thiscmd[CMD_POS]
if j=1 and equal("case",thiscmd[CMD_TEXT])=0 then
src_error(sprintf_utf(COLYEL&"switch() do("&COLRED&" should be followed with a "&COLYEL&"case"&COLRED&", not with "&COLYEL&"%s"&COLRED&"."
,{thiscmd[CMD_TEXT]}),argpos)
end if
--to resolve the ambiguity of an else, we always suppose that they belong to an 'if' (or elseif) if possible
if equal("else",thiscmd[CMD_TEXT]) and find(lastnodename,{"if","then","elseif"})=0 then
--Old syntax else(...) block
if length(collected) then
args&={collected}
collected={}
end if
if have_else then
src_error(else_error_msg,elsepos)
end if
expect_case=false
if length(args) and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
--Last case is new-style syntax and wasn't followed by any statements
--Lets just disallow this to prevent a little confusion
src_error(COLYEL&"case(...) else(...)"&COLRED&" is ambiguous. "&
"Write either "&COLYEL&"case(..., else) ..."&COLRED&" or "&COLYEL&"case(...) do() else(...)"&COLRED&" as fits your intentions."
,rawargs[j-1][TREE_TRUNK][CMD_POS])
end if
if j!=length(rawargs) then
if length(rawargs[j][TREE_BRANCHES])=0 then
else_error_msg&=". Did you forget to enclose the else block with brackets, like "&COLYEL&"else(...)"&COLRED&"?"
end if
src_error(else_error_msg,argpos)
end if
--convert it to a do
args&={ {{"do",argpos},rawargs[j][TREE_BRANCHES]} }
elsepos=argpos
have_else=true
elsif equal("case",thiscmd[CMD_TEXT]) then
if length(collected) then
args&={collected}
collected={}
end if
if have_else then
--Note we disallow case(else) case(1) but allow case(else,1)
src_error(else_error_msg,elsepos)
end if
expect_case=false
caseargs=rawargs[j][TREE_BRANCHES]
--Call a couple passes early here, so that constant expressions are allowed
caseargs=collapse_floaty_brackets(caseargs)
caseargs=optimize_script(caseargs,vars)
--Check contents are only either integers, or "else", and append to args
for k=1 to length(caseargs) do
casecmd=caseargs[k][TREE_TRUNK]
if equal(casecmd[CMD_TEXT],"else") then
if have_else then
src_error(COLYEL&"else"&COLRED&" appears multiple times inside this "&COLYEL&"switch"&COLRED&" block",casecmd[CMD_POS])
end if
have_else=true
elsepos=argpos
--don't emit anything: next do is the 'else' one
else
temp=what_kind_and_id(casecmd,vars,false)
kind=temp[1]
id=temp[2]
if kind=KIND_FLOW or (kind=KIND_MATH and is_simple_math_op(id)=false) then
src_error(sprintf_utf(COLYEL&"%s"&COLRED&" is not allowed inside a "&COLYEL&"case"&COLRED
,{casecmd[CMD_TEXT]}),casecmd[CMD_POS])
end if
args&={caseargs[k]}
end if
end for
else
--Disallow "case() do() garbage"
if expect_case then
src_error(sprintf_utf("Expected "&COLYEL&"case"&COLRED&" or "&COLYEL&"else"&COLRED&" to follow "&COLYEL&"case(...) do(...)"&COLRED&" but found "&COLYEL&"%s"&COLRED,{thiscmd[CMD_TEXT]}),argpos)
end if
if length(collected)=0 then
--Start collecting into a do. But if it is a do (old syntax), don't double encapsulate it
if equal("do",thiscmd[CMD_TEXT]) then
args&={rawargs[j]}
expect_case=true
else
collected={{"do",argpos},{rawargs[j]}}
end if
else
--Continue collecting
collected[TREE_BRANCHES]&={rawargs[j]}
end if
end if
lastnodename=thiscmd[CMD_TEXT]
end for
if length(collected) then
args&={collected}
end if
if length(args)>1 and equal(args[$][TREE_TRUNK][CMD_TEXT],"do")=false then
--Last case (possibly an else) is new-style syntax and wasn't followed by any statements,
--so add the missing do. Don't need to do this, but I'd like to keep things tidy
--(srcpos is of the last case)
args&={ {{"do",virtual_pos(argpos)},{}} }
end if
if have_else=false then
--Insert a dummy do default block since else has been left out
args&={ {{"do",virtual_pos(pos)},{}} }
end if
tree[ptr][TREE_BRANCHES]=args
--delete the do block
return delete_element(tree,ptr+1)
end function
---------------------------------------------------------------------------
--Given a variable token for 'for' or 'setvariable', etc. (parent) and list of locals/non-locals 'vars',
--return a an integer ID token, or throw an error if not a variable.
function convert_variable_to_id(Token var, VarList locals, string parent)
sequence kind_and_id
atom id
integer argkind
integer var_at
kind_and_id=what_kind_and_id(var,locals,false)
argkind=kind_and_id[1]
id=kind_and_id[2]
if argkind=KIND_LOCAL or argkind=KIND_NONLOCAL then
id=-1-id
elsif argkind=KIND_GLOBAL then
--id unmodified
else
--It's not a variable. bad!
src_error(sprintf_utf(
"first argument of "&COLYEL&"%s"&COLRED&" must be a variable, not %s "&COLYEL&"%s"&COLRED
,{parent,KIND_LONGNAMES[argkind],var[CMD_TEXT]}
),var[CMD_POS])
end if
return {sprintf("%d",{id}),var[CMD_POS]}
end function
---------------------------------------------------------------------------
--parse the script tree and make if absorb then and else, for and while absorb do, switch absorb stuff, check correctness of flow statements
--Note that several of these checks are now redundant to the check for 'begin' performed in get_script_node
function normalize_flow_control(NodeList tree, VarList vars, string parent)
integer ptr
string s
string nextcmd
NodeList args
Token ctr
srcpos pos
Symbol sym
Kind kind
sequence temp
integer var_at
ptr=1
while ptr<=length(tree) do
s=tree[ptr][TREE_TRUNK][CMD_TEXT]
pos=tree[ptr][TREE_TRUNK][CMD_POS]
args=tree[ptr][TREE_BRANCHES]
sym=map:get(global_scope,s,nil_sym)
kind=sym[SYM_KIND]
if kind=KIND_FLOW or kind=KIND_KEYWORD then
if equal(s,"then") or equal(s,"else") or equal(s,"do") then
--these are checked for correct parents below.
elsif block_command(parent)=false then
src_error(COLYEL&s&COLRED&" may not be used as part of an expression (found as an argument to "&COLYEL&parent&COLRED&")",pos)
end if
end if
if kind=KIND_FLOW then
if compare("if",s)=0 then
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&"if"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions"
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(sprintf_utf(COLYEL&"if"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
check_is_expression(args[1],s)
if ptr1 then
src_error(sprintf_utf(
COLYEL&"while"&COLRED&" statement has %d conditions. It should have only one. Use "&COLYEL&"&&"&COLRED&" and "&COLYEL&"||"&COLRED&" to combine multiple conditions"
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(sprintf_utf(COLYEL&"while"&COLRED&" statement has no condition. It should have one.",{}),pos)
end if
check_is_expression(args[1],s)
if ptr4 then
src_error(sprintf_utf(COLYEL&"for"&COLRED&" statement has too many arguments (%d)",{length(args)}),args[5][TREE_TRUNK][CMD_POS])
elsif length(args)=3 then
--append default step value
args&={ {{"1",virtual_pos(pos)},{}} }
end if
ctr=args[1][TREE_TRUNK]
temp=what_kind_and_id(ctr,vars,false)
kind=temp[1]
if kind=KIND_LOCAL then
map:put(used_locals,temp[2],true)
elsif kind=KIND_GLOBAL then
--warn, then translate into a numeric reference to a variable
src_warn(sprintf_utf(
"Using global variable "&COLYEL&"%s"&COLRED&" as the counter in a "&COLYEL&"for"&COLRED&" loop"
,{ctr[CMD_TEXT]}
),ctr[CMD_POS])
end if
args[1][TREE_TRUNK]=convert_variable_to_id(ctr,vars,"for")
check_is_expression(args[2],s)
check_is_expression(args[3],s)
check_is_expression(args[4],s)
--Handle "do"
if ptr1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have only zero or one."
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=1 then
tree[ptr][TREE_TRUNK][CMD_TEXT]="exitreturning"
elsif length(args)=0 then
tree[ptr][TREE_TRUNK][CMD_TEXT]="exitscript"
end if
elsif compare("return",s)=0 or compare("exitreturning",s)=0 then
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have only one."
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
src_error(sprintf_utf(COLYEL&s&COLRED&" statement has no argument. It should have one. Prehaps you meant to use "&COLYEL&"exit"&COLRED&" or "&COLYEL&"exit script"&COLRED,{}),pos)
end if
elsif compare("break",s)=0 or compare("continue",s)=0 then
if length(args)>1 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have no more than one."
,{length(args)}
),args[2][TREE_TRUNK][CMD_POS])
elsif length(args)=0 then
--append default value
tree[ptr][TREE_BRANCHES]={{{"1",virtual_pos(pos)},{}}}
end if
elsif compare("exitscript",s)=0 then
if length(args)>0 then
src_error(sprintf_utf(
COLYEL&s&COLRED&" statement has %d arguments. It should have none. Prehaps you meant to use "&COLYEL&"exit"&COLRED&" or "&COLYEL&"exit returning"&COLRED
,{length(args)}
),args[1][TREE_TRUNK][CMD_POS])
end if
elsif compare("switch",s)=0 then
tree=build_switch_node(tree,vars,ptr)
elsif compare("case",s)=0 then
src_error(COLYEL&"case"&COLRED&" is not allowed outside of "&COLYEL&"switch"&COLRED,pos)
end if
elsif kind=KIND_KEYWORD then
if compare("subscript",s)=0 or compare("variable",s)=0 then
--Does nothing; remove. (Kept until now to check they isn't placed in a silly place)
--(Note: any attributes prefixing a subscript have already been removed in
--parse_script())
tree=delete_element(tree,ptr)
continue
end if
end if
if equal(s,"") then
--Floaty brackets (they haven't been removed yet). Look to parent instead.
s=parent
end if
tree[ptr][TREE_BRANCHES]=normalize_flow_control(tree[ptr][TREE_BRANCHES],vars,s)
ptr+=1
end while
return(tree)
end function
---------------------------------------------------------------------------
--A modification of Dijkstra's shunting-yard algorithm
--(This could probably subsume build_ast too)
function convert_operators(NodeList tree)
NodeList new_tree
sequence stack --{int,Node} pairs
Node opnode
integer ptr
integer at
integer priority
integer nonoperator_before
stack={}
new_tree={}
ptr=1
nonoperator_before=false
while ptr<=length(tree) do
--for each peer branch on the tree
at=find_in_column(tree[ptr][TREE_TRUNK][CMD_TEXT],operator_list,PAIR_NAME)
if at>0 then
if nonoperator_before=false then
--there is no room for the before-operand. Bad!
src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its left-side operand (did not expect to see an operator here)",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
elsif ptr=length(tree) then
--there is no room for the after-operand. Bad!
src_error(sprintf_utf("operator "&COLYEL&"%s"&COLRED&" is missing its right-side operand",{tree[ptr][TREE_TRUNK][CMD_TEXT]}),tree[ptr][TREE_TRUNK][CMD_POS])
end if
priority=operator_list[at][PAIR_NUM]
--convert the operator to its true function name (which might be the same)
tree[ptr][TREE_TRUNK][CMD_TEXT]=operator_list[at][OPER_TRUENAME]
while length(stack) and priority>=stack[$][1] do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
stack&={{priority,tree[ptr]}}
nonoperator_before=false
else
if nonoperator_before then
--Two non-ops in a row means moved onto the new argument in this argument list, so finish processing operators first
while length(stack) do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
end if
tree[ptr][TREE_BRANCHES]=convert_operators(tree[ptr][TREE_BRANCHES])
new_tree&={tree[ptr]}
nonoperator_before=true
end if
ptr+=1
end while
--copy of above
while length(stack) do
--pop the operator from the stack and "apply" it to the tree
opnode=stack[$][2]
stack=stack[1..$-1]
opnode[TREE_BRANCHES]=new_tree[$-1..$]
new_tree=new_tree[1..$-2]&{opnode}
end while
return new_tree
end function
---------------------------------------------------------------------------
--Get the parameters for a certain attribute of a script or function, if it exists
--func_defn: an element of script_list or function_list
--Returns a NodeList, possible empty
function attribute_params(string name, sequence func_defn)
integer attridx
attridx=find_in_column(name,func_defn[FUNC_ATTRS],1)
if attridx=0 then
return {}
else
return func_defn[FUNC_ATTRS][attridx][TREE_BRANCHES]
end if
end function
---------------------------------------------------------------------------
--Ensure correct number of arguments, add defaults, and translate variables to IDs in assignments
--And also some extra warnings...
function fix_arguments(Node tree, integer kind, integer id, sequence list, VarList vars)
integer at
integer argnum,maxargs
NodeList args, params
integer fatal
string message, param
string funcname
srcpos funcpos
funcname=tree[TREE_TRUNK][CMD_TEXT]
funcpos=tree[TREE_TRUNK][CMD_POS]
args=tree[TREE_BRANCHES]
argnum=length(args)
--Lookup ID because name might be scoped
at=find_in_column(id,list,PAIR_NUM)
if equal(list[at][FUNC_ARGS],VAR_ARGS) then
return(args)
else
maxargs=length(list[at][FUNC_ARGS])
end if
if maxargs < argnum then
src_error(sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" takes at most %d arguments but is being passed %d arguments"
,{KIND_LONGNAMES[kind],funcname,maxargs,argnum}
),args[maxargs+1][TREE_TRUNK][CMD_POS])
elsif maxargs > argnum then
--add defaults if not enough args are present
if kind=KIND_MATH then
--special processing for math
if list[at][PAIR_NUM]<=15 or list[at][PAIR_NUM]>=19 then
--math shouldnt have defaults
src_error(sprintf_utf(
"math function "&COLYEL&"%s"&COLRED&" is being passed %d arguments but it should always have %d"
,{funcname,argnum,maxargs}
),funcpos)
else
--variable assignment commands can have defaults
if argnum = 0 then
--no defaults for first argument of variable function
src_error(sprintf_utf(
"variable manipulation function "&COLYEL&"%s"&COLRED&" has %d arguments - it needs at least 1"
,{funcname,argnum}
),funcpos)
elsif argnum = 1 then
--make defaults for second arg of variable function
if list[at][PAIR_NUM]=16 then
--setvariable
args=append(args,{
{"0",virtual_pos(funcpos)}
,{}
})
else
--increment and decrement
args=append(args,{
{"1",virtual_pos(funcpos)}
,{}
})
end if
end if
end if
else
--normal processing for script and function
for i=argnum+1 to maxargs do
if equal(list[at][FUNC_ARGS][i],NO_DEFAULT) then
src_error(sprintf_utf(
"%s "&COLYEL&"%s"&COLRED&" has no default for missing argument %d"
,{KIND_LONGNAMES[kind],funcname,i}
),funcpos)
end if
args=append(args,{
{sprintf("%d",{list[at][FUNC_ARGS][i]}),virtual_pos(funcpos)}
,{}
})
end for
end if
end if
--Translate variables in the variable manipulation functions
if kind=KIND_MATH and list[at][PAIR_NUM]>=16 and list[at][PAIR_NUM]<=18 then
args[1][TREE_TRUNK]=convert_variable_to_id(args[1][TREE_TRUNK],vars,funcname)
end if
--Obsolescence warnings
--(This seems as good a spot to do this as any, because we've looked up the function in 'list' already)
--Syntax: @obsolete([error,] [replacement_cmd,] ["message"]) (arg order can var)
if and_bits(list[at][FUNC_FLAGS],FLAG_OBSOLETE)!=0 then
params=attribute_params("@obsolete",list[at])
message=""
fatal=false
for i=1 to length(params) do
param=params[i][CMD_TEXT]
if equal(param,"error") then
fatal=true
elsif param[1]='"' then
message&=param[2..$-1]
else
message&=sprintf_utf("Use replacement function "&COLYEL&"%s"&COLRED&" instead. ",{param})
end if
end for
if fatal then
message=sprintf_utf("Function "&COLYEL&"%s"&COLRED&" is obsolete and can no longer be used. %s",{funcname,message})
src_error(message,funcpos)
else
message=sprintf_utf("Function "&COLYEL&"%s"&COLRED&" is obsolete; avoid it. %s",{funcname,message})
src_warn(message,funcpos)
end if
end if
return(args)
end function
---------------------------------------------------------------------------
--Calls fix_arguments on each non-flow node with arguments
function normalize_arguments(NodeList tree, VarList vars)
sequence kind_and_id
integer kind
atom id
--Unlike the flow normalization and operator translation we do not insert/delete elements from
--the current level, so we can use a "for" safely, and dont need a "while"
for i=1 to length(tree) do
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars,false)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_SCRIPT then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,script_list,vars)
elsif kind=KIND_FUNCTION then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,function_list,vars)
elsif kind=KIND_MATH then
tree[i][TREE_BRANCHES]=fix_arguments(tree[i],kind,id,math_list,vars)
end if --number, flow, global, local, parens need no argchecking
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=normalize_arguments(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--returns a two-char string that represents a 16-bit word in least-signifigant-byte-first order
function output_word(integer n)
integer b1,b2
b1=and_bits(n,#FF)
b2=floor(and_bits(n,#FFFF)/256)
return({b1,b2})
end function
---------------------------------------------------------------------------
function convert_to_bytes(sequence s)
sequence result
result={}
for i=1 to length(s) do
result&=int_to_bytes(s[i])
end for
return(result)
end function
---------------------------------------------------------------------------
--Encode a unicode (UTF32/native Euphoria) string in the format the engine
--requires, which is currently Latin-1
function encode_ohr(string string, integer maxbytes=0)
for i=1 to length(string) do
if string[i]>=256 then
string[i]='?'
end if
end for
if maxbytes and maxbytes7 and equal("$string",str_temp[1..7]) then
value_temp=value(str_temp[8..length(str_temp)])
id=table_string(value_temp[2])
else
src_error(sprintf_utf("Reference "&COLYEL&"@%s"&COLRED&" not understood: references must be the name of a global variable or a script",{str_temp}),cmdpos)
end if
end if
end if
kind=KIND_NUMBER
end if
result&=kind
result&=id
if kind=KIND_NUMBER then
--No children and no debug info
elsif kind=KIND_GLOBAL or kind=KIND_LOCAL or kind=KIND_NONLOCAL then
--No children, debug info
if debug_info then
result&=cmdpos-script_position
end if
elsif kind=KIND_SCRIPT or kind=KIND_FUNCTION or kind=KIND_FLOW or kind=KIND_MATH then
--Use id from what_kind_and_id
if kind=KIND_FUNCTION then
if max_used_functionMAX_ARGS then
src_error(sprintf_utf("Number of arguments being passed (%d) is more than allowed (%d)",{numargs,MAX_ARGS}),cmdpos)
end if
result&=numargs
--Add placeholders for each argoffset
result&=repeat(0,numargs)
if debug_info then
result&=cmdpos-script_position
end if
if nodeoffset=0 then
--Special case: Most nodes are added after their children, but the root
--node needs to have offset 0, so append it already
compiled_data&=result
end if
for i=1 to numargs do
--actually evaluate each argument and set the real offsets
recurse_result=binary_compile_recurse(tree[TREE_BRANCHES][i],vars)
if recurse_result >= nodeoffset then
can_reference=false
end if
result[3+i]=recurse_result
end for
if nodeoffset=0 then
--Root node: replace the dummy argument offsets, and we're done.
compiled_data[1..length(result)]=result
return(nodeoffset)
end if
nodeoffset=length(compiled_data)
else
src_error(sprintf_utf("Compiler Bug! Illegal kind "&COLYEL&"%d"&COLRED&" for "&COLYEL&"%s"&COLRED,{kind,s}),cmdpos)
end if
--If we've have multiple copies of the exact same node (subtree), deduplicate them.
--Don't bother with nodes with >4 children: unlikely to repeat.
--The integer(result[2]) test here is to work around a bug in Euphoria (at least in 4.0.3 32-bit):
--map:hash throws an error if given an int which doesn't fit in a 31-bit integer
if not fast_mode and length(result)<=7 and integer(result[2]) then
if can_reference then --If false, this node must be unique, so map:get won't find anything.
at=map:get(node_lookup,result,-1)
if at>-1 then
return at
end if
end if
map:put(node_lookup,result,nodeoffset)
end if
--append data directly
compiled_data&=result
return(nodeoffset)
end function
---------------------------------------------------------------------------
function get_parent_id(Script script)
integer parent_idx
parent_idx=script[SCRIPT_PARENT_IDX]
if parent_idx=0 then
return(0)
else
return(all_scripts[parent_idx][SCRIPT_ID])
end if
end function
---------------------------------------------------------------------------
function binary_compile(Script script)
NodeList tree
sequence result
integer recurse_ret
sequence localvars
string_table={}
compiled_data={}
node_lookup=map:new()
--binary data is in a mix of 16-bit and 32-bit signed words.
--the header is (mostly) 16-bit and the data (script format version 1+) is 32-bit.
--output header (0-based offsets given here)
--0..1: the zero-rooted byte-offset of the first executable code byte
result=output_word(CODE_START_BYTE_OFFSET)
--2..3: the number of local variables (including args but not nonlocals)
result&=output_word(length(script[SCRIPT_VARIABLES]) - script[SCRIPT_NUM_NONLOCALS])
--4..5: the number of arguments the script takes (also in SCRIPTS.TXT)
result&=output_word(length(script[SCRIPT_ARG_TOKS]))
--6..7: the format of the command data (presently used to indicate 32-bit encoding)
result&=output_word(HSZ_FORMAT_VERSION)
--8..11: a 32bit offset to the string literal table (in bytes), we don't know it yet
result&=output_word(0)
result&=output_word(0)
--12..13: parent script ID or 0 if none
result&=output_word(get_parent_id(script))
--14..15: Nesting depth
result&=output_word(script[SCRIPT_NEST_DEPTH])
--16..17: number of nonlocals
result&=output_word(script[SCRIPT_NUM_NONLOCALS])
--18..21: the length of the string table in 32-bit words, also unknown
result&=output_word(0)
result&=output_word(0)
--22..23: a bitfield detailing optional features.
if debug_info then
--First bit: non-leaf cmd nodes appended with srcpos
result&=output_word(1)
else
result&=output_word(0)
end if
--24..27: the offset of the local variable name table, in 32-bit words
--after the header, or 0 if not present. Currently unknown.
result&=output_word(0)
result&=output_word(0)
--28..31: the srcpos offset of the start of the script
integer script_start
script_start=srcpos_offset(script[SCRIPT_TRIGGER_TOK][CMD_POS])
result&=int_to_bytes(script_start)
script_position=script_start*power(2,9) --aka encode_srcpos(0,script_start-1,0)
--what follows is command data in the format [kindID,Value,argcount,argpointerlist]
--numbers and variables have no argcount or argpointerlist
--an argpointer is the zero-rooted word-offset of the argument relative
--to the start of the executable commands. I realise that this format is
--unnecessarily complicated. I had hoped to get benefits of being able to
--store frequently reused commands only once and then just point to them,
--but in actual practice, it isnt worth the trouble, since the only
--commands that tend to be redundant are the really short ones.
--the first command is always a "do". there can be only one top-level command
tree=script[SCRIPT_AST]
if length(tree)!=1 then
simple_error(sprintf_utf("compiler bug! script tree has %s root nodes",{length(tree)}))
end if
reenter_timing_zone("binary_compile_recurse")
recurse_ret=binary_compile_recurse(tree[1],script[SCRIPT_VARIABLES])
if recurse_ret!=0 then
simple_error(sprintf_utf("compiler bug! Root binary_compile_recurse call returned %d",{recurse_ret}))
end if
exit_timing_zone()
result&=convert_to_bytes(compiled_data)
compiled_data={}
--append with a table of string literals and give its offset and length
if length(string_table)>0 then
if remainder((length(result)-CODE_START_BYTE_OFFSET),4) != 0 then
simple_error("compiler bug! string table not aligned")
end if
if remainder(length(string_table),4) != 0 then
simple_error("compiler bug! string table not padded")
end if
result[9..12]=int_to_bytes(length(result))
result[19..22]=int_to_bytes(length(string_table)/4)
result&=string_table
end if
if debug_info then
--append local variable names table
if remainder(length(result)-CODE_START_BYTE_OFFSET,4) != 0 then
simple_error("compiler bug! varname table not aligned")
end if
localvars=filter_local_variables(script[SCRIPT_VARIABLES])
if length(localvars) then
result[25..28]=int_to_bytes((length(result)-CODE_START_BYTE_OFFSET)/4)
--Could use seek_string_by_id instead of filter_local_variables
result&=variable_names_to_table(localvars)
end if
--Otherwise remains 0
end if
return(result)
end function
---------------------------------------------------------------------------
--floaty brackets are un-needed after the operators have been translated.
function collapse_floaty_brackets(NodeList tree)
integer i
NodeList graft
i=1
while i<=length(tree) do
if length(tree[i][TREE_TRUNK][CMD_TEXT])=0 then
--found a floaty-bracket
graft=tree[i][TREE_BRANCHES]
if length(graft)!=1 then
src_error(sprintf_utf("Found unexpected pair of brackets containing %d expressions. Either the brackets should not exist at all, or should only enclose a single expression",{length(graft)}),tree[i][TREE_TRUNK][CMD_POS])
end if
tree[i]=graft[1]
--delete_element(tree,i)
--tree=insert_sequence(tree,graft,i)
else
if length(tree[i][TREE_BRANCHES]) then
tree[i][TREE_BRANCHES]=collapse_floaty_brackets(tree[i][TREE_BRANCHES])
end if
i+=1
end if
end while
return(tree)
end function
---------------------------------------------------------------------------
--parent is CMD_TEXT of parent node
function sanity_check(NodeList tree, VarList vars, string parent)
string s
sequence kind_and_id
integer kind
atom id
srcpos pos
for i=1 to length(tree) do
s=tree[i][TREE_TRUNK][CMD_TEXT]
pos=tree[i][TREE_TRUNK][CMD_POS]
kind_and_id=what_kind_and_id(tree[i][TREE_TRUNK],vars,false)
kind=kind_and_id[1]
id=kind_and_id[2]
if i=1 and equal("if",parent) then
if kind=KIND_NUMBER then
if id then
src_warn(sprintf_utf("Condition is always true ("&COLYEL&"%d"&COLRED&")",{id}),pos)
else
src_warn("Condition is always false",pos)
end if
end if
elsif block_command(parent) then
if kind=KIND_NUMBER then
src_warn(sprintf_utf("Expected script, function, or flow control, but found an expression with value "&COLYEL&"%d"&COLRED&". It will do nothing here."
,{id}),pos)
elsif kind=KIND_GLOBAL then
src_warn(sprintf_utf("Expected script, function, or flow control, but found global variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),pos)
elsif kind=KIND_LOCAL or kind=KIND_NONLOCAL then
src_warn(sprintf_utf("Expected script, function, or flow control, but found local variable "&COLYEL&"%s"&COLRED&". It will do nothing here."
,{s}),pos)
elsif kind=KIND_MATH and (id<=15 or id=19 or id>=22) then
-- setvariable, increment, decrement, logand, logor have side effects
src_warn(sprintf_utf("Expected a statement but found built-in function "&COLYEL&"%s"&COLRED&", returning a value that is being discarded"
,{s}),pos)
end if
end if
if kind=KIND_GLOBAL then
map:put(used_globals,id,true)
elsif kind=KIND_LOCAL then
map:put(used_locals,id,true)
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=sanity_check(tree[i][TREE_BRANCHES],vars,tree[i][TREE_TRUNK][CMD_TEXT])
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--simulate a result overflowing a signed 32 bit register
function overflow_int32(atom val)
--maybe check whether val is so large that this is could be inaccurate?
atom ret
ret=mod(val,#100000000)
if ret>2147483647 then
ret-=#100000000
end if
return(ret)
end function
---------------------------------------------------------------------------
function optimized_arg(Node tree, VarList vars)
sequence kind_and_id
integer kind
atom id
object arg1,arg2
kind_and_id=what_kind_and_id(tree[TREE_TRUNK],vars,false)
kind=kind_and_id[1]
id=kind_and_id[2]
if kind=KIND_NUMBER then
return(id)
elsif kind=KIND_MATH and (id<=15 or id>=19) then
arg1=optimized_arg(tree[TREE_BRANCHES][1],vars)
if equal(arg1,{}) then
arg1=tree[TREE_BRANCHES][1]
end if
if length(tree[TREE_BRANCHES])>=2 then
arg2=optimized_arg(tree[TREE_BRANCHES][2],vars)
else
arg2=0
end if
if equal(arg2,{}) then
arg2=tree[TREE_BRANCHES][2]
end if
if atom(arg1) and atom(arg2) then
if id=0 then
--random
if arg1=arg2 then
return(arg1)
end if
elsif id=1 then
--exponent
if arg2>0 then
--if too large, Euphoria will signal an overflow, but I'm also worried
--about overflow_int32 giving inaccurate results
if log(abs(arg1)+1)+log(abs(arg2)+1)arg2)
elsif id=14 then
--lessthanorequalto
return(arg1<=arg2)
elsif id=15 then
--greaterthanorequalto
return(arg1>=arg2)
elsif id=19 then
--not
return(arg1=0)
elsif id=22 then
--logxor
return((arg1=0)!=(arg2=0))
elsif id=23 then
--abs
return(overflow_int32(abs(arg1)))
elsif id=24 then
--sign
return((arg1>0)-(arg1<0))
elsif id=25 then
--sqrt
if arg1<0 then
src_error(sprintf_utf("Found the expression (or equivalent) "&COLYEL&"sqrt(%d)"&COLRED,{arg1}),tree[TREE_TRUNK][CMD_POS])
end if
return(floor(sqrt(arg1)+0.5))
end if
end if
--have to allow for side effects of shortcut evaluating operators by only checking first arg
if atom(arg1) then
if id=4 then
--multiply
if arg1=0 then
return 0
elsif arg1=1 then
return arg2
end if
elsif id=6 then
--add
if arg1=0 then
return arg2
end if
elsif id=20 then
--logand
if arg1=0 then
return 0
end if
elsif id=21 then
--logor
if arg1!=0 then
return 1
end if
end if
end if
if atom(arg2) then
if id=4 then
--multiply
if arg2=0 then
return 0
elsif arg2=1 then
return arg1
end if
elsif id=5 then
--subtract
if arg2=0 then
return arg1
end if
elsif id=6 then
--add
if arg2=0 then
return arg1
end if
end if
end if
end if
return({}) --return nonatom
end function
---------------------------------------------------------------------------
--goes through a script simplifying expressions that always have the same value
function optimize_script(NodeList tree, VarList vars)
object arg
for i=1 to length(tree) do
arg=optimized_arg(tree[i],vars)
if atom(arg) then
tree[i][TREE_TRUNK][CMD_TEXT]=sprintf("%d",arg)
tree[i][TREE_BRANCHES]={}
elsif sequence(arg) and length(arg) then
tree[i]=arg
end if
if length(tree[i][TREE_BRANCHES]) then
--if there are sub-arguments, recurse
tree[i][TREE_BRANCHES]=optimize_script(tree[i][TREE_BRANCHES],vars)
end if
end for
return(tree)
end function
---------------------------------------------------------------------------
--FIXME: if a local is used only in a subscript it is reported as unused
procedure warn_unused_locals(VarList vars)
for i=1 to length(vars) do
if vars[i][VAR_FRAME]=0 and not map:has(used_locals,vars[i][VAR_ID]) then
src_warn(sprintf_utf("local variable "&COLYEL&"%s"&COLRED&" is never used",{vars[i][CMD_TEXT]}),vars[i][CMD_POS])
end if
end for
end procedure
---------------------------------------------------------------------------
procedure warn_unused_globals()
integer id
for i=1 to length(global_list[PAIR_NUM]) do
id=global_list[PAIR_NUM][i]
if not map:has(used_globals,id) then
src_warn(sprintf_utf("global variable "&COLYEL&"%s"&COLRED&" ID "&COLYEL&"%d"&COLRED&" is never used",{global_list[PAIR_NAME][i],id}),global_list[GLB_POS][i])
end if
end for
end procedure
---------------------------------------------------------------------------
--script_idx is all_scripts index
procedure compile_a_script(integer script_idx)
Script script
NodeList script_tree
VarList local_vars
TokenList args
integer num_nonlocals
integer parent_idx
integer trigger_idx
integer trigger
cur_script_num=script_idx
script=all_scripts[script_idx]
trigger_idx=find_in_column(script[SCRIPT_TRIGGER_TOK][CMD_TEXT],trigger_list,PAIR_NAME)
trigger=trigger_list[trigger_idx][PAIR_NUM]
current_script=script[SCRIPT_FULL_NAME]
parent_idx=script[SCRIPT_PARENT_IDX]
reenter_timing_zone("gather_local_vars")
if parent_idx then
local_vars=shift_variables_frame(all_scripts[parent_idx][SCRIPT_VARIABLES])
else
local_vars={}
end if
num_nonlocals=length(local_vars)
script&={
trigger --trigger type
,num_nonlocals
,local_vars
}
all_scripts[script_idx]=script
--Get all locals (modifies all_scripts[cur_script_num])
args=script[SCRIPT_ARG_TOKS] --The arguments are the first several local variables
for i=1 to length(args) do
--Checks for name validity
add_local_variable(args[i])
end for
gather_local_vars(script[SCRIPT_BODY_TOKS])
--script[SCRIPT_VARIABLES] is now outdated
script=all_scripts[script_idx]
local_vars=script[SCRIPT_VARIABLES]
if length(local_vars)-num_nonlocals > MAX_LOCALS then
simple_error(
sprintf_utf("Script "&COLYEL&"%s"&COLRED&" has %d arguments and variables but at most %d are supported",
{script[SCRIPT_FULL_NAME],length(local_vars)-num_nonlocals,MAX_LOCALS})
)
end if
exit_timing_zone()
used_locals=map:new()
reenter_timing_zone("build_ast")
script_tree=build_ast(script[SCRIPT_BODY_TOKS],local_vars)
exit_timing_zone()
reenter_timing_zone("convert_operators")
script_tree=convert_operators(script_tree)
exit_timing_zone()
reenter_timing_zone("expand_macros")
script_tree=expand_macros(script_tree)
exit_timing_zone()
reenter_timing_zone("normalize_flow_control")
script_tree=normalize_flow_control(script_tree,local_vars,"")
exit_timing_zone()
reenter_timing_zone("normalize_arguments")
script_tree=normalize_arguments(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("collapse_floaty_brackets")
script_tree=collapse_floaty_brackets(script_tree)
exit_timing_zone()
if not fast_mode then
reenter_timing_zone("optimize_script")
script_tree=optimize_script(script_tree,local_vars)
exit_timing_zone()
reenter_timing_zone("sanity_check")
script_tree=sanity_check(script_tree,local_vars,"")
exit_timing_zone()
if find('u',optlist) then
warn_unused_locals(local_vars)
end if
end if
script&={
script_tree
,{} --compiled data to go into the HSZ lumps
}
all_scripts[script_idx]=script
reenter_timing_zone("binary_compile")
all_scripts[script_idx][SCRIPT_BINARY]=binary_compile(script)
exit_timing_zone()
current_script=""
cur_script_num=0
end procedure
---------------------------------------------------------------------------
--Parse argument declarations (initially containing default values)
--in a script argument list, returns {arg_names_list, default_vals_list} pair
--where any default values have been removed from the argument names
function process_arglist(TokenList args)
TokenList arglist
sequence defaults
integer i
integer using_defaults
arglist={}
defaults={}
using_defaults=false
i=1
while i<=length(args) do
if compare("=",args[i][CMD_TEXT])=0 then
src_error("Syntax error: spurious = in script argument list",args[i][CMD_POS])
end if
check_undefined_string(args[i],"argument name")
if find(args[i][CMD_TEXT],column(arglist,CMD_TEXT)) then
src_error(sprintf_utf("Multiple script arguments named "&COLYEL&"%s"&COLRED&" in argument list",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
if ilength(args) then
src_error(sprintf_utf("Syntax error: expected default value for script argument "&COLYEL&"%s"&COLRED,{args[i][CMD_TEXT]}),args[i+1][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults&=try_string_to_number({enforce_constants(args[i+2][CMD_TEXT]),args[i+2][CMD_POS]})
using_defaults=true
i+=3
else
if using_defaults=true then
--any argument with a default must be followed with arguments with defaults
src_error(sprintf_utf("Default value missing for argument "&COLYEL&"%s"&COLRED&": the previous argument had a default value, so all following ones must too",{args[i][CMD_TEXT]}),args[i][CMD_POS])
end if
arglist=append(arglist,args[i])
defaults=append(defaults,NO_DEFAULT)
i+=1
end if
end while
return({arglist,defaults})
end function
---------------------------------------------------------------------------
--Check that every declared script has been defined, with the same number of arguments,
--and each script appears exactly once. Basically, compare script_list and all_scripts
--Also handle argument default values.
--Before this function, scripts only have IDs (stored in script_list) if they are declared in a definescript.
--Afterwards all scripts have IDs assigned (in all_scripts)
procedure check_script_declarations()
integer at
sequence args_and_defaults, args, defaults
sequence scripts_found
sequence src_position, attributes
string fullname
srcpos script_pos
integer id, parent_id
scripts_found=repeat(false,length(script_list))
for i=1 to length(all_scripts) do
fullname=all_scripts[i][SCRIPT_FULL_NAME]
parent_id=get_parent_id(all_scripts[i])
current_script=fullname --global
script_pos=all_scripts[i][SCRIPT_POS]
attributes=all_scripts[i][SCRIPT_ATTRIBUTES] --Attributes get copied to script_list, where they belong
--at=find(fullname,column(script_list,PAIR_NAME))
at=find_in_column(fullname,script_list,PAIR_NAME)
args_and_defaults=process_arglist(all_scripts[i][SCRIPT_ARG_TOKS])
args=args_and_defaults[1]
defaults=args_and_defaults[2]
if not at then
--define script optional, add to script list
id=pick_script_id(parent_id,all_scripts[i][PAIR_NAME])
script_list=create_function(script_list,id,current_script,defaults,KIND_SCRIPT,script_pos,attributes)
at=length(script_list)
scripts_found&={true}
else
if scripts_found[at]=true then
simple_error(sprintf_utf("Compiler bug; two scripts named %s but this error not handled properly",{current_script}))
end if
scripts_found[at]=true
--The srcpos stored for this script will be from definescript. Repoint it at the actual script
--(doesn't really matter, all_scripts would have ended up with that srcpos anyway)
script_list[at][FUNC_POS]=script_pos
script_list[at][FUNC_ATTRS]&=attributes
if length(args) != length(script_list[at][FUNC_ARGS]) then
src_error(
sprintf_utf(
"Script "&COLYEL&"%s"&COLRED&" has %d arguments named, but has %d arguments in its definescript declaration"
,{current_script,length(args),length(script_list[at][FUNC_ARGS])}
)
,script_pos
)
end if
for j=1 to length(script_list[at][FUNC_ARGS]) do
if not equal(defaults[j],NO_DEFAULT) and not equal(defaults[j],script_list[at][FUNC_ARGS][j]) then
src_error(
sprintf_utf(
"Default value for argument "&COLYEL&"%s"&COLRED&" does not match default given in script definition, %d"
,{all_scripts[i][SCRIPT_ARG_TOKS][j][CMD_TEXT],script_list[at][FUNC_ARGS][j]}
)
,script_pos
)
end if
end for
end if
if length(args)>MAX_ARGS then
src_error(sprintf_utf("Number of arguments (%d) is more than allowed (%d)",{length(args),MAX_ARGS}),script_pos)
end if
all_scripts[i][SCRIPT_ARG_TOKS]=args
all_scripts[i]&={script_list[at][PAIR_NUM]} --append SCRIPT_ID
end for
current_script=""
for i=1 to length(scripts_found) do
if scripts_found[i]=false then
src_error(
sprintf_utf("Script "&COLYEL&"%s"&COLRED&" was declared using "&COLYEL&"define script"&COLRED&" but is missing.",{script_list[i][PAIR_NAME]})
,script_list[i][FUNC_POS]
)
end if
end for
end procedure
---------------------------------------------------------------------------
procedure compile_each_script()
sequence count
enter_timing_zone("Compiling scripts")
color_print("compiling scripts",{})
count=repeat(0,length(file_list))
enter_timing_zone("check_script_declarations")
check_script_declarations()
exit_timing_zone()
for i=1 to length(all_scripts) do
-- color_print("%s\n",{all_scripts[i][SCRIPT_NAME]})
reenter_timing_zone("compile_a_script")
compile_a_script(i)
exit_timing_zone()
count[srcpos_file_number(all_scripts[i][SCRIPT_POS])]+=1
color_print(".",{})
end for
color_print("\n",{})
if not fast_mode and find('u',optlist) then
warn_unused_globals()
end if
for i=1 to length(count) do
if count[i] then
wrap_print("compiled %d scripts from "&COLBWHI&"%s"&COLWHI&"\n",{count[i],file_list[i][FILE_NAME]})
end if
end for
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
function generate_scripts_dot_txt()
sequence result
integer print_scripts
result=""
print_scripts = 0
if find('a',optlist) then
print_scripts = 2
elsif find('s',optlist) then
print_scripts = 1
end if
for i=1 to length(script_list) do
result&=encode_ohr(sprintf_utf("%s\r\n%d\r\n%d\r\n",{script_list[i][PAIR_NAME],script_list[i][PAIR_NUM],length(script_list[i][FUNC_ARGS])}))
for j=1 to length(script_list[i][FUNC_ARGS]) do
if equal(script_list[i][FUNC_ARGS][j],NO_DEFAULT) then
result&="0\r\n"
else
result&=sprintf("%d\r\n",{script_list[i][FUNC_ARGS][j]})
end if
end for
if (print_scripts=1 and script_list[i][PAIR_NUM]=44 then
--Scripts compiled by older copies of hspeak didn't include the parent ID
parent_id=read_int16(record,21)
end if
ret&={ {id, {parent_id,name}} }
end for
--pretty_print(stdout,ret,{3})
return ret
end function
---------------------------------------------------------------------------
function generate_commands_dot_bin()
sequence result
sequence encoded_names
integer i, id, name_num
integer offset
integer records
function_list=sort(function_list)
if length(function_list) then
records=function_list[$][PAIR_NUM]+1
else
records=0
end if
if records>2048 then
simple_warn(sprintf_utf("Maximum function id was %d, but will not export function names above id 2047",{records-1}))
records=2048
end if
--header size in bytes (offset to location table)
result=output_word(6)
--file format version
result&=output_word(0)
--number of records in location table
result&=output_word(records)
offset=length(result)+records*2
id=0
i=1
encoded_names={}
while i<=length(function_list) do
if function_list[i][PAIR_NUM]47 then
temp=without_extension(fname)
fname=temp[1..46-length(ext)]&"."&ext
end if
suffix=count(fname,names_used)
names_used=append(names_used,fname)
if suffix>0 then
temp=without_extension(fname) --you can't slice a function result directly! :(
fname=sprintf_utf("%s~%d.%s",{temp[1..$-1],suffix,ext})
end if
file_list[i][FILE_LUMP]=fname
--color_print("writing %d %s\n",{i,fname})
if write_lump(fh,fname,file_list[i][FILE_TEXT])=false then
simple_error("unable to write a script source lump")
end if
end for
end procedure
---------------------------------------------------------------------------
function generate_srcfiles_dot_txt()
sequence result
result=""
for i=1 to length(file_list) do
result&=sprintf_utf("file=%s\nlump=%s\noffset=%d\nlength=%d\n",
{
absolutize_path(file_list[i][FILE_NAME]),
file_list[i][FILE_LUMP],
file_list[i][FILE_OFFSET],
length(file_list[i][FILE_TEXT])
})
end for
return(result)
end function
---------------------------------------------------------------------------
procedure write_output_file()
sequence header
integer fh
object lh
enter_timing_zone("Writing output file")
header=sprintf("HamsterSpeak\n%s\n%d\n%s\n%d\n%d\n",{COMPILER_VERSION & COMPILER_SUB_VERSION,HSP_FORMAT_VERSION,plotscr_version,HSZ_FORMAT_VERSION,max_used_function})
if length(all_scripts) then
fh=open(dest_file,"wb")
dest_file_fh=fh
if fh!=-1 then
wrap_print("writing output file "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
--write header and version
if write_lump(fh,"HS",header)=false then
simple_error("unable to write header")
end if
--write script index (old file)
if write_lump(fh,"scripts.txt",generate_scripts_dot_txt())=false then
simple_error("unable to write script index")
end if
--write script index (new file)
if write_lump(fh,"scripts.bin",generate_scripts_dot_bin())=false then
simple_error("unable to write binary script index")
end if
--write script commands list
if write_lump(fh,"commands.bin",generate_commands_dot_bin())=false then
simple_error("unable to write commands listing")
end if
--write each script
for i=1 to length(all_scripts) do
if write_lump(fh,sprintf("%d.hsz",{all_scripts[i][SCRIPT_ID]}),all_scripts[i][SCRIPT_BINARY])=false then
simple_error(sprintf_utf("internal problem: unable to write script "&COLYEL&"%s"&COLRED,{all_scripts[i][SCRIPT_NAME]}))
end if
end for
if debug_info then
--write copy of scripts
wrap_print("copying script source code into "&COLBWHI&"%s"&COLWHI&"\n",{dest_file})
lh=begin_lump(fh,"source.lumped")
if equal(lh,false) then
simple_error("unable to write scripts source file")
end if
write_script_files(fh)
if end_lump(lh)=false then
simple_error("unable to finish write of scripts source file")
end if
--write source file table (must occur after write_script_files)
if write_lump(fh,"srcfiles.txt",generate_srcfiles_dot_txt())=false then
simple_error("unable to write source files listing")
end if
end if
close(fh)
else
simple_error(sprintf_utf("attempt to open "&COLYEL&"%s"&COLRED&" failed",{dest_file}))
end if
else
color_print("no scripts to output\n",{})
end if
exit_timing_zone()
end procedure
---------------------------------------------------------------------------
init()
load_and_lex_all()
show_source_info()
preliminary_parse_pass()
parse_top_level()
compile_each_script()
dump_debug_report()
write_output_file()
run_time=time()-start_time
color_print("done (%g seconds)\n",{run_time})
if find('t',optlist) then
print_timing_data()
end if
opt_wait_for_key(was_warnings)
if was_warnings = true then
abort(2)
end if