urlencode.awk (3733B)
1 # Taken from http://www.shelldorado.com/scripts/cmds/urlencode 2 ########################################################################## 3 # Title : urlencode - encode URL data 4 # Author : Heiner Steven (heiner.steven@odn.de) 5 # Date : 2000-03-15 6 # Requires : awk 7 # Categories : File Conversion, WWW, CGI 8 # SCCS-Id. : @(#) urlencode 1.4 06/10/29 9 ########################################################################## 10 # Description 11 # Encode data according to 12 # RFC 1738: "Uniform Resource Locators (URL)" and 13 # RFC 1866: "Hypertext Markup Language - 2.0" (HTML) 14 # 15 # This encoding is used i.e. for the MIME type 16 # "application/x-www-form-urlencoded" 17 # 18 # Notes 19 # o The default behaviour is not to encode the line endings. This 20 # may not be what was intended, because the result will be 21 # multiple lines of output (which cannot be used in an URL or a 22 # HTTP "POST" request). If the desired output should be one 23 # line, use the "-l" option. 24 # 25 # o The "-l" option assumes, that the end-of-line is denoted by 26 # the character LF (ASCII 10). This is not true for Windows or 27 # Mac systems, where the end of a line is denoted by the two 28 # characters CR LF (ASCII 13 10). 29 # We use this for symmetry; data processed in the following way: 30 # cat | urlencode -l | urldecode -l 31 # should (and will) result in the original data 32 # 33 # o Large lines (or binary files) will break many AWK 34 # implementations. If you get the message 35 # awk: record `...' too long 36 # record number xxx 37 # consider using GNU AWK (gawk). 38 # 39 # o urlencode will always terminate it's output with an EOL 40 # character 41 # 42 # Thanks to Stefan Brozinski for pointing out a bug related to non-standard 43 # locales. 44 # 45 # See also 46 # urldecode 47 ########################################################################## 48 49 PN=`basename "$0"` # Program name 50 VER='1.4' 51 52 : ${AWK=awk} 53 54 Usage () { 55 echo >&2 "$PN - encode URL data, $VER 56 usage: $PN [-l] [file ...] 57 -l: encode line endings (result will be one line of output) 58 59 The default is to encode each input line on its own." 60 exit 1 61 } 62 63 Msg () { 64 for MsgLine 65 do echo "$PN: $MsgLine" >&2 66 done 67 } 68 69 Fatal () { Msg "$@"; exit 1; } 70 71 set -- `getopt hl "$@" 2>/dev/null` || Usage 72 [ $# -lt 1 ] && Usage # "getopt" detected an error 73 74 EncodeEOL=no 75 while [ $# -gt 0 ] 76 do 77 case "$1" in 78 -l) EncodeEOL=yes;; 79 --) shift; break;; 80 -h) Usage;; 81 -*) Usage;; 82 *) break;; # First file name 83 esac 84 shift 85 done 86 87 LANG=C export LANG 88 $AWK ' 89 BEGIN { 90 # We assume an awk implementation that is just plain dumb. 91 # We will convert an character to its ASCII value with the 92 # table ord[], and produce two-digit hexadecimal output 93 # without the printf("%02X") feature. 94 95 EOL = "%0A" # "end of line" string (encoded) 96 split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ") 97 hextab [0] = 0 98 for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0 99 if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0 100 } 101 { 102 encoded = "" 103 for ( i=1; i<=length ($0); ++i ) { 104 c = substr ($0, i, 1) 105 if ( c ~ /[a-zA-Z0-9.-]/ ) { 106 encoded = encoded c # safe character 107 } else if ( c == " " ) { 108 encoded = encoded "+" # special handling 109 } else { 110 # unsafe character, encode it as a two-digit hex-number 111 lo = ord [c] % 16 112 hi = int (ord [c] / 16); 113 encoded = encoded "%" hextab [hi] hextab [lo] 114 } 115 } 116 if ( EncodeEOL ) { 117 printf ("%s", encoded EOL) 118 } else { 119 print encoded 120 } 121 } 122 END { 123 #if ( EncodeEOL ) print "" 124 } 125 ' "$@" 126