[Tcsh] "Readable" Unicode in setenv

H.Merijn Brand tcsh at tux.freedom.nl
Fri Nov 12 15:32:42 UTC 2021


If I have an environment variable that is to contain something Unicodish,
I currently have to to something similar to

 % setenv EURO_CH `perl -CO -e'print "\N{EURO SIGN}"'`
or
 % setenv EURO_CH `perl -CO -e'print "\x{20ac}"'`

so this works
 % echo $EURO_CH
 €

I browsed the tcsh manual, but could not find anything that would hint
to doing this natively. Is there a (hidden) feature to set Unicode
characters from the command line by their name or hex value (in the
current encoding)? Something similar to

 % setenv EURO_CH "\x{20ac}"
 % setenv EURO_CH "\u20AC"

In my digging, I found that

 % setenv TAB_CH "\t"

just sets the environment variable TAB_CH to a literal '\' followed by
a 't'. Which was kinda surprising to me, as I expected a TAB to be in
there as a literal TAB. That was done in echo in sh.func.c, so

 % echo $TAB_CH

translated \t to TAB. To do the same for \x{20ac}, \xbf, and \u0020ac
I changed sh.func.c like below, but I eventually want those escapes
to end up literally in the environment. Thoughts welcome

--8<---
diff --git a/sh.func.c b/sh.func.c
index cdfb6d8d..cbc4ff41 100644
--- a/sh.func.c
+++ b/sh.func.c
@@ -1196,6 +1196,22 @@ doglob(Char **v, struct command *c)
     flush();
 }

+static Char
+parse_hex_range(Char **cp, int l)
+{
+    int  ui = 0;
+    char ub[9];
+
+    if (l > 8) return 0; /* Unsupported length */
+
+    while (**cp && ui < l && isxdigit(**cp)) {
+       ub[ui++] = (char)**cp;
+       (*cp)++;
+    }
+    ub[ui] = (char)0;
+    return strtol (ub, NULL, 16);
+}
+
 static void
 xecho(int sep, Char **v)
 {
@@ -1289,6 +1305,28 @@ xecho(int sep, Char **v)
                    if (*cp >= '0' && *cp < '8')
                        c = c * 8 + *cp++ - '0';
                    break;
+               case 'x':
+                   if (*cp == '{' && isxdigit(*(cp + 1))) { /* \x{20ac} */
+                       cp++;
+                       c = parse_hex_range (&cp, 8);
+                       if (*cp != '}')
+                           stderror(ERR_NAME | ERR_VARBEGIN);
+                       cp++;
+                   }
+                   else if (isxdigit(*cp)) {   /* \x9f */
+                       c = parse_hex_range (&cp, 2);
+                   }
+                   else /* backward compat */
+                       xputchar('\\' | QUOTE);
+                   break;
+               case 'u':
+                   if (isxdigit(*cp)) {        /* \u0020ac */
+                       c = parse_hex_range (&cp, 6);
+                   }
+                   else /* backward compat */
+                       xputchar('\\' | QUOTE);
+                   break;
+
                case '\0':
                    c = '\\';
                    cp--;
-->8---

-- 
H.Merijn Brand  https://tux.nl   Perl Monger   http://amsterdam.pm.org/
using perl5.00307 .. 5.33        porting perl5 on HP-UX, AIX, and Linux
https://tux.nl/email.html http://qa.perl.org https://www.test-smoke.org
                           
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 488 bytes
Desc: OpenPGP digital signature
URL: <https://mailman.astron.com/pipermail/tcsh/attachments/20211112/c89534fc/attachment.asc>


More information about the Tcsh mailing list