;;;; -*- coding utf-8 -*- ;;;; test-utf8.stk -- Test of UTF-8 strings ;;;; ;;;; Copyright © 2011 Erick Gallesio - Polytech'Nice-Sophia ;;;; ;;;; ;;;; This program is free software; you can redistribute it and/or modify ;;;; it under the terms of the GNU General Public License as published by ;;;; the Free Software Foundation; either version 2 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; This program is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this program; if not, write to the Free Software ;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, ;;;; USA. ;;;; ;;;; Author: Erick Gallesio [eg@unice.fr] ;;;; Creation date: 27-May-2011 23:35 (eg) ;;;; Last file update: 8-Oct-2011 22:13 (eg) ;;;; (require "test") (define *lang-is-utf8?* (let ((lang (getenv "LANG"))) (and lang (or (string-find? "UTF8" lang) (string-find? "utf8" lang))))) (test-section "Unicode Characters") ;;------------------------------------------------------------------ (test-subsection "basic functions") (test "length.1" 4 (string-length "«€œ»")) (test "length.2" 8 (string-length "♜♞♝♛♚♝♞♜")) (test "length.3" 8 (string-length (make-string 8 #\♟))) (test "const" "♟♟♟♟♟♟♟♟" (make-string 8 #\♟)) (test-subsection "Chibi Scheme unicode tests") (test "prechibi.1" '(1056 1056 1091 1089 1089 1082 1080 1081) (map char->integer (cons #\Р (string->list "Русский")))) (test "chibi.1" #\Р (string-ref "Русский" 0)) (test "chibi.2" #\и (string-ref "Русский" 5)) (test "chibi.3" #\й (string-ref "Русский" 6)) (test "chibi.4" 7 (string-length "Русский")) (test "chibi.5" #\日 (string-ref "日本語" 0)) (test "chibi.6" #\本 (string-ref "日本語" 1)) (test "chibi.7" #\語 (string-ref "日本語" 2)) (test "chibi.8" 3 (string-length "日本語")) (test "chibi.9" '(#\日 #\本 #\語) (string->list "日本語")) (test "chibi.10" "日本語" (list->string '(#\日 #\本 #\語))) (test "chibi.11" "日本" (substring "日本語" 0 2)) (test "chibi.12" "本語" (substring "日本語" 1 3)) (test "chibi.13" "日-語" (let ((s (substring "日本語" 0 3))) (string-set! s 1 #\-) s)) (test "chibi.14" "日本人" (let ((s (substring "日本語" 0 3))) (string-set! s 2 #\人) s)) (test "chibi.15" "字字字" (make-string 3 #\字)) (test "chibi.16" "字字字" (let ((s (make-string 3))) (string-fill! s #\字) s)) ;; Following tests come from ;;http://www.smyles.com/projects/r6gambit/darcs/test/r6rs/unicode.sls (test "gambit.1" #\I (char-upcase #\i)) (test "gambit.2" #\i (char-downcase #\i)) #;(test "gambit.3" #\I (char-titlecase #\i)) ;; not R7 (test "gambit.4" #\i (char-foldcase #\i)) (test "gambit.5" #\xDF (char-upcase #\xDF)) (test "gambit.6" #\xDF (char-downcase #\xDF)) #;(test "gambit.7" #\xDF (char-titlecase #\xDF)) ;; not R7 (test "gambit.8" #\xDF (char-foldcase #\xDF)) (test "gambit.9" #\x3A3 (char-upcase #\x3A3)) (when *lang-is-utf8?* (test "gambit.10" #\x3C3 (char-downcase #\x3A3))) #;(test "gambit.11" #\x3A3 (char-titlecase #\x3A3)) ;; not R7 (when *lang-is-utf8?* (test "gambit.12" #\x3C3 (char-foldcase #\x3A3))) (when *lang-is-utf8?* (test "gambit.13" #\x3A3 (char-upcase #\x3C2))) (test "gambit.14" #\x3C2 (char-downcase #\x3C2)) #;(test "gambit.15" #\x3A3 (char-titlecase #\x3C2)) ;; not R7 (test "gambit.16" #\x3C3 (char-foldcase #\x3C2)) (test "gambit.17" #f (char-ci? #\z #\Z)) (test "gambit.29" #f (char-ci>? #\Z #\z)) (test "gambit.30" #f (char-ci>? #\a #\Z)) (test "gambit.31" #t (char-ci>? #\Z #\a)) (test "gambit.32" #t (char-ci>=? #\Z #\z)) (test "gambit.33" #t (char-ci>=? #\z #\Z)) (test "gambit.34" #t (char-ci>=? #\z #\Z)) (test "gambit.35" #f (char-ci>=? #\a #\z)) (test "gambit.36" #t (char-alphabetic? #\a)) (test "gambit.37" #f (char-alphabetic? #\1)) (test "gambit.38" #t (char-numeric? #\1)) (test "gambit.39" #f (char-numeric? #\a)) (test "gambit.40" #t (char-whitespace? #\space)) #;(test "gambit.41" #t (char-whitespace? #\x00A0)) ;; not clear (test "gambit.42" #f (char-whitespace? #\a)) (test "gambit.43" #f (char-upper-case? #\a)) (test "gambit.44" #t (char-upper-case? #\A)) (when *lang-is-utf8?* (test "gambit.45" #t (char-upper-case? #\x3A3))) (test "gambit.46" #t (char-lower-case? #\a)) (test "gambit.47" #f (char-lower-case? #\A)) (when *lang-is-utf8?* (test "gambit.48" #t (char-lower-case? #\x3C3))) #;(test "gambit.49" #t (char-lower-case? #\x00AA)) ;; not clear #;(test "gambit.50" #f (char-title-case? #\a)) ;; Not R7 #;(test "gambit.51" #f (char-title-case? #\A)) ;; Not R7 #;(test "gambit.52" #f (char-title-case? #\I)) ;; Not R7 #;(test "gambit.53" #t (char-title-case? #\x01C5)) ;; Not R7 #;(test "gambit.54" 'Ll (char-general-category #\a)) ;; Not R7 #;(test "gambit.55" 'Zs (char-general-category #\space)) ;; Not R7 #;(test "gambit.56" 'Cn (char-general-category #\x10FFFF)) ;; Not R7 (test "gambit.57" "HI" (string-upcase "Hi")) (test "gambit.58" "HI" (string-upcase "HI")) (test "gambit.59" "hi" (string-downcase "Hi")) (test "gambit.60" "hi" (string-downcase "hi")) (test "gambit.61" "hi" (string-foldcase "Hi")) (test "gambit.G2" "hi" (string-foldcase "HI")) (test "gambit.63" "hi" (string-foldcase "hi")) #; (test "gambit.64" "STRASSE" (string-upcase "Stra\xDF;e")) ;; not R7 (test "gambit.65" "stra\xDF;e" (string-downcase "Stra\xDF;e")) #;(test "gambit.66" "strasse" (string-foldcase "Stra\xDF;e")) ;; not R7 (test "gambit.67" "strasse" (string-downcase "STRASSE")) (when *lang-is-utf8?* (test "gambit.68" "\x3C3;" (string-downcase "\x3A3;"))) (test "gambit.69" "\x39E;\x391;\x39F;\x3A3;" (string-upcase "\x39E;\x391;\x39F;\x3A3;")) #;(test "gambit.70" "\x3BE;\x3B1;\x3BF;\x3C2;" ;; not R7 (string-downcase "\x39E;\x391;\x39F;\x3A3;")) #;(test "gambit.71" "\x3BE;\x3B1;\x3BF;\x3C3;\x3C2;" ;; not R7 (string-downcase "\x39E;\x391;\x39F;\x3A3;\x3A3;")) #;(test "gambit.72" "\x3BE;\x3B1;\x3BF;\x3C2; \x3C3;" (string-downcase "\x39E;\x391;\x39F;\x3A3; \x3A3;")) ;; not R7 (when *lang-is-utf8?* (test "gambit.73" "\x3BE;\x3B1;\x3BF;\x3C3;" (string-foldcase "\x39E;\x391;\x39F;\x3A3;")) (test "gambit.74" "\x39E;\x391;\x39F;\x3A3;" (string-upcase "\x3BE;\x3B1;\x3BF;\x3C3;")) (test "gambit.75" "\x39E;\x391;\x39F;\x3A3;" (string-upcase "\x3BE;\x3B1;\x3BF;\x3C2;")) ) (test "gambit.76" "Knock Knock" (string-titlecase "kNock KNoCK")) #;(test "gambit.77" "Who's There?" ;; not clear (string-titlecase "who's there?") equal?) (test "gambit.78" "R6Rs" (string-titlecase "r6rs")) (test "gambit.79" "R6Rs" (string-titlecase "R6RS")) #;(test "gambit.80" (string-downcase "A\x3A3;'x") "a\x3C3;'x") ; ' is a MidLetter (test "gambit.81" #t (string-ci? "a" "Z")) (test "gambit.88" #f (string-ci>? "A" "z")) (test "gambit.89" #t (string-ci>? "Z" "a")) (test "gambit.90" #t (string-ci>? "z" "A")) (test "gambit.91" #f (string-ci>? "z" "Z")) (test "gambit.92" #f (string-ci>? "Z" "z")) (test "gambit.93" #t (string-ci=? "z" "Z")) (test "gambit.94" #f (string-ci=? "z" "a")) #;(test "gambit.95" #t (string-ci=? "Stra\xDF;e" "Strasse")) ;; Not R7 #;(test "gambit.96" #t (string-ci=? "Stra\xDF;e" "STRASSE")) ;; Not R7 #;(test "gambit.97" #t (string-ci=? "\x39E;\x391;\x39F;\x3A3;" ;; Not R7 "\x3BE;\x3B1;\x3BF;\x3C2;")) (when *lang-is-utf8?* (test "gambit.98" #t (string-ci=? "\x39E;\x391;\x39F;\x3A3;" "\x3BE;\x3B1;\x3BF;\x3C3;"))) (test "gambit.99" #t (string-ci<=? "a" "Z")) (test "gambit.100" #t (string-ci<=? "A" "z")) (test "gambit.101" #f (string-ci<=? "Z" "a")) (test "gambit.102" #f (string-ci<=? "z" "A")) (test "gambit.103" #t (string-ci<=? "z" "Z")) (test "gambit.104" #t (string-ci<=? "Z" "z")) (test "gambit.105" #f (string-ci>=? "a" "Z")) (test "gambit.106" #f (string-ci>=? "A" "z")) (test "gambit.107" #t (string-ci>=? "Z" "a")) (test "gambit.108" #t (string-ci>=? "z" "A")) (test "gambit.109" #t (string-ci>=? "z" "Z")) (test "gambit.110" #t (string-ci>=? "Z" "z")) ;;// ;;// (test (string-normalize-nfd "\xE9;") "\x65;\x301;") ;;// (test (string-normalize-nfc "\xE9;") "\xE9;") ;;// (test (string-normalize-nfd "\x65;\x301;") "\x65;\x301;") ;;// (test (string-normalize-nfc "\x65;\x301;") "\xE9;") ;;// ;;// (test (string-normalize-nfkd "\xE9;") "\x65;\x301;") ;;// (test (string-normalize-nfkc "\xE9;") "\xE9;") ;;// (test (string-normalize-nfkd "\x65;\x301;") "\x65;\x301;") ;;// (test (string-normalize-nfkc "\x65;\x301;") "\xE9;") ;;// ;;------------------------------------------------------------------ (test-section-end)