quotes.fs 4.99 KB
Newer Older
1 2
\ quote: S\" and .\" words

Anton Ertl's avatar
Anton Ertl committed
3
\ Copyright (C) 2002,2003,2005,2007,2008,2018 Free Software Foundation, Inc.
4 5 6 7 8

\ This file is part of Gforth.

\ Gforth is free software; you can redistribute it and/or
\ modify it under the terms of the GNU General Public License
9
\ as published by the Free Software Foundation, either version 3
10 11 12 13 14 15 16 17
\ of the License, or (at your option) any later version.

\ This program is distributed in the hope that it will be useful,
\ but WITHOUT ANY WARRANTY; without even the implied warranty of
\ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
\ GNU General Public License for more details.

\ You should have received a copy of the GNU General Public License
18
\ along with this program. If not, see http://www.gnu.org/licenses/.
19

20 21 22 23 24 25 26 27 28
[ifundef] umin
: umin ( u1 u2 -- u )
    2dup u>
    if
	swap
    then
    drop ;
[then]

29 30
: char/ ; immediate

31 32
: parse-num-x  ( c-addr1 umax -- c-addr2 c )
    >r 0. rot source chars + over - char/ r> umin >number
33 34
    drop rot rot drop ;

35 36
: parse-num ( c-addr1 umax base -- c-addr2 c )
    ['] parse-num-x swap base-execute ;
37 38

create \-escape-table
39 40 41 42
 7 c, #bs c,  'c c,   'd c, #esc c,   #ff c,   'g c,
'h c,  'i c,  'j c,   'k c,  #lf c,   #lf c,  #lf c,
'o c,  'p c,  '" c,  #cr c,   's c,  #tab c,   'u c,
11 c,  'w c,  'x c,   'y c,    0 c,
43

44 45 46
: \-escape, ( c-addr1 -- c-addr2 )
    \ c-addr1 points at a char right after a '\', c-addr2 points right after
    \ the whole sequence, the translated chars are appended to the dictionary.
anton's avatar
anton committed
47
    dup c@
48
    dup 'U' = if
49
	drop char+ 8 16 parse-num xc, exit
50
    endif
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
    dup 'u' = if
	drop char+ 4 16 parse-num
	dup $DC00 and $D800 = if
	    >r  count '\' = >r count 'u' = r> and if
		4 16 parse-num dup $DC00 and $DC00 = if
		    $3FF and r> $3FF and #10 lshift or $10000 +
		    xc,  exit
		else
		    r> xc, xc,  exit
		endif
	    endif
	    -2 + r>
	endif
	xc, exit
    endif
    dup 'x' = if
	drop char+ 2 16 parse-num c, exit
68
    endif
69 70
    dup '0' '8' within if
	drop 3 8 parse-num c, exit
71
    endif
72
    dup 'n' = if
anton's avatar
anton committed
73 74 75 76 77 78
	\ \-escapes were designed to translate to one character, so
	\ this is quite ugly: copy all but the last char right away
	drop newline 1-
	2dup here swap chars dup allot move
	chars + c@
    else
79
        dup 'm' = if \ crlf; ugly, because it's two characters
80 81
            #cr c, \ first half, the rest follows below
        endif
82 83
	dup 'a' '{' within if
	    dup 'a' - chars \-escape-table + c@
84
	    tuck = IF '\' c, THEN
anton's avatar
anton committed
85
	endif
86
    endif
87
    c, char+ ;
88

89
: \"-parse ( "string"<"> -- c-addr u ) \ gforth-internal  backslash-quote-parse
90
\G parses string, translating @code{\}-escapes to characters (as in
91 92
\G C).  The resulting string resides at @code{here}.  See @code{S\"}
\G for the supported @code{\-escapes}.
93
    here >r
94 95
    >in @ chars source chars over + >r + begin ( parse-area R: here parse-end )
	dup r@ < while
96 97
	    dup c@ '" <> while
		dup c@ dup '\ = if ( parse-area c R: here parse-end )
98
		    drop char+ dup r@ = abort" unfinished \-escape"
99
		    \-escape,
100 101 102 103 104 105 106
		else
		    c, char+
		endif
	repeat then
    char+ source >r - r> min char/ >in !
    r> drop
    here r> - dup negate allot
107
    here swap char/ ;
108 109

:noname \"-parse save-mem ;
110
:noname \"-parse save-mem 2dup postpone sliteral drop free throw ;
111
interpret/compile: s\" ( compilation 'ccc"' -- ; run-time -- c-addr u )	\ gforth	s-backslash-quote
112 113 114 115 116 117
\G Like @code{S"}, but translates C-like \-escape-sequences, as follows:
\G @code{\a} BEL (alert), @code{\b} BS, @code{\e} ESC (not in C99), @code{\f}
\G FF, @code{\n} newline, @code{\r} CR, @code{\t} HT, @code{\v} VT, @code{\"}
\G ", @code{\\} \, @code{\}[0-7]@{1,3@} octal numerical character value
\G (non-standard), @code{\x}[0-9a-f]@{0,2@} hex numerical character value
\G (standard only with two digits), @code{\u}[0-9a-f]@{4@} for unicode
118
\G codepoints (auto-merges surrogate pairs), @code{\U}[0-9a-f]@{8@} for
119 120
\G extended unicode code points; a @code{\} before any other character is
\G reserved.
Anton Ertl's avatar
Anton Ertl committed
121 122 123 124 125 126
\G @*
\G Note that @code{\x}XX produces raw bytes, while @code{\u}XXXX and
\G @code{\U}XXXXXXXX produce code points for the current encoding.
\G E.g., if we use UTF-8 encoding and want to encode @"a (code point
\G U+00E4), you can write the letter @"a itself, or write @code{\xc3\xa4}
\G (the UTF-8 bytes for this code point), @code{\u00e4}, or @code{\U000000e4}.
127 128

:noname \"-parse type ;
129
:noname postpone s\" postpone type ;
130
interpret/compile: .\" ( compilation 'ccc"' -- ; run-time -- )	\ gforth	dot-backslash-quote
131 132
\G Like @code{."}, but translates C-like \-escape-sequences (see
\G @code{S\"}).
133

anton's avatar
anton committed
134
0 [if] \ test
135 136
    s" 123" drop 10 parse-num-x 123 <> throw drop .s
    s" 123a" drop 10 parse-num   123 <> throw drop .s
137 138 139
    s" x1fg" drop \-escape, here 1- c@ 31 <> throw drop .s
    s" 00129" drop \-escape, here 1- c@ 10 <> throw drop .s
    s" a" drop \-escape, here 1- c@ 7 <> throw drop .s
140
    \"-parse " s" " str= 0= throw .s
141
    \"-parse \a\b\c\e\f\n\r\t\v\100\x40xabcde" dump
anton's avatar
anton committed
142
    s\" \a\bcd\e\fghijklm\12op\"\rs\tu\v" \-escape-table over str= 0= throw
143
    s\" \w\0101\x041\"\\" name wAA"\ str= 0= throw
144 145
    s\" s\\\" \\" ' evaluate catch 0= throw
[endif]