\ Text substitution - reference implementation

\ Run it with:
  \ gforth substitute.f -e bye
  \ ficl substitute.f
  \ forth4p substitute.f
  \ pfe -q -y substitute.f
  \ spf4 'REQUIRE CASE-INS lib/ext/caseins.f s" substitute.f" included bye'

decimal

: [defined] bl word find nip 0<> ; immediate

[defined] [undefined] 0= [if]
: [undefined] postpone [defined] 0= ; immediate
[then]

[undefined] bounds [if]
: bounds        \ addr len -- addr+len addr
  over + swap  ;
[then]

[undefined] -rot [if]
: -rot          \ a b c -- c a b
  rot rot  ;
[then]

[undefined] /string [if]
: /string
  tuck - >r + r> ;
[then]

[undefined] place [if]
: place         \ c-addr1 u c-addr2 --
\ Copy the string described by c-addr1 u as a counted string at
\ the memory address described by c-addr2.
  2dup 2>r
  1 chars + swap move
  2r> c!  ;
[then]

[undefined] scan [if]
: scan          \ c-addr u char -- c-addr' u'
  >r begin dup while  over c@ r@ - while 
    1 /string
  repeat then r> drop ;
[then]

[undefined] 2variable [if]
: 2variable
  create 2 cells allot ;
[then]

char % constant delim
\ Character used as the substitution name delimiter.

wordlist constant wid-subst
\ Wordlist ID of the wordlist used to hold substitution names
\ and replacement text.

create Name 256 chars allot
\ 256 buffer: Name	\ -- addr
\ Scratch buffer to hold substitution name as a counted string.
variable DestLen	\ -- addr
\ Maximum length of the destination buffer.
2variable Dest		\ -- addr
\ Holds destination string current length and address.
variable SubstErr	\ -- addr
\ Holds zero or an error code.

[defined] VFXforth [if]		\ VFX Forth
: makeSubst	\ caddr len -- caddr
\ Given a name string create a substution and storage space.
\ Return the address of the buffer for the substitution text.
\ This word requires carnal knowledge of the host Forth.
\ Some systems may need to perform case conversion here.
  get-current >r  wid-subst set-current
  ($create)			\ like CREATE but takes caddr/len
  r> set-current
  here  256 allot  0 over c!	\ create buffer space
;
[then]

[defined] (WID-CREATE) [if]	\ SwiftForth
: makeSubst	\ caddr len -- caddr
  wid-subst (WID-CREATE)	\ like CREATE but takes caddr/len/wid
  LAST @ >CREATE !
  here  256 allot  0 over c!	\ create buffer space
;
[then]

[undefined] makeSubst [if]
wordlist constant execute-parsing-wordlist

get-current execute-parsing-wordlist set-current

: X ( xt -- )
  previous execute
  source >in ! drop ; immediate \ skip remaining input

set-current

: >order ( wid -- )
  >r get-order 1+ r> swap set-order ;

: execute-parsing ( ... c-addr u xt -- ... )
  >r dup >r
  dup 2 chars + allocate throw >r  \ construct the string to be EVALUATEd
  s" X " r@ swap chars move
  r@ 2 chars + swap chars move
  r> r> 2 + r> rot dup >r rot ( xt c-addr1 u1 r: c-addr1 )
  execute-parsing-wordlist >order  \ make sure the right X is executed
  ['] evaluate catch               \ now EVALUATE the string
  r> free throw throw ;            \ cleanup
	
: makeSubst \ caddr len -- caddr
  get-current >r wid-subst set-current
  ['] create execute-parsing
  r> set-current
  here  256 allot  0 over c!
;
[then]

: findSubst	\ caddr len -- xt flag | 0
\ Given a name string, find the substitution. Return xt and flag
\ if found, or just zero if not found. Some systems may need to
\ perform case conversion here.
  wid-subst search-wordlist
;

: replaces	\ text tlen name nlen --
\ Define the string text/tlen as the text to substitute for the
\ substitution named name/nlen. If the substitution does not
\ exist it is created.
  2dup findSubst if
    nip nip  execute			\ get buffer address
  else
    makeSubst
  then
  place					\ copy as counted string
;

: addDest	\ char --
\ Add the character to the destination string.
  Dest @ DestLen @ < if
    Dest 2@ + c!  1 chars Dest +!
  else
    drop  -1 SubstErr !
  then
;

: formName	\ caddr len -- caddr' len'
\ Given a source string pointing at a leading delimiter, place
\ the name string in the name buffer.
  1 /string  2dup delim scan >r drop	\ find length of residue
  2dup r> - dup >r Name place		\ save name in buffer
  r> 1 chars + /string			\ step over name and trailing %
;

: >dest		\ caddr len --
\ Add a string to the output string.
  bounds
  ?do  i c@ addDest  1 chars +loop
;

: processName	\ -- flag
\ Process the last substitution name. Return true if found,
\ 0 if not found.
  Name count findSubst dup >r if
    execute count >dest
  else
    delim addDest  Name count >dest  delim addDest
  then
  r>
;

: substitute	\ src slen dest dlen -- dest dlen' n
\ Expand the source string using substitutions. Note that this
\ version is simplistic, performs no error checking, and requires
\ a global buffer and global variables.
  DestLen !  0 Dest 2!  0 -rot		\ -- 0 src slen
  0 SubstErr !
  begin
    dup 0 >
   while
    over c@ delim <> if			\ character not %
      over c@ addDest  1 /string
    else
      over 1 chars + c@ delim = if	\ %% for one output %
        delim addDest  2 /string	\ add one % to output
      else
        formName processName
        if  rot 1+ -rot  then		\ count substitutions
      then
    then
  repeat
  2drop  Dest 2@  rot SubstErr @
  if  drop  SubstErr @  then
;

: unescape	\ c-addr1 len1 c-addr2 -- c-addr2 len2
\ Replace each '%' character in the input string c-addr1/len1 by
\ two '%' characters. The output is represented by caddr2/len2.
\ If you pass a string through UNESCAPE and then SUBSTITUTE,
\ you get the original string.
  dup 2swap  over + swap ?do
    i c@ [char] % =
    if  [char] % over c! 1+  then
    i c@ over c! 1+
  loop
  over -
;


\ Tests
\ =====

create tb  256 allot	\ -- addr
\ Buffer for text.
create db  256 allot	\ -- addr
\ destination buffer for text.

: >tb		\ caddr len -- caddr' len
\ Place string in TB, and return the string. Done
\ this way to avoid problems with transient regions.
  tb place  tb count
;

: .sub		\ caddr len n --
\ Display the result of a substitution.
  cr . ." Substitutions, result:" type ." :"
;

: tsub		\ caddr len --
\ Run the substitution text and display the results.
  db 256 substitute .sub
;

s" hello" >tb s" hl" replaces
s" world" >tb s" wld" replaces

s" Start: %hl%,%wld%! :End" tsub
s" Hello, world!" tsub
s" aaa%foobar%bbb" tsub
s" aaa%%bbb" tsub
