Select Git revision
-
Mirar (Pontus Hagland) authored
Rev: src/modules/MIME/mime.c:1.11
Mirar (Pontus Hagland) authoredRev: src/modules/MIME/mime.c:1.11
mime.c 21.04 KiB
/*
* $Id: mime.c,v 1.11 1998/04/04 01:15:40 mirar Exp $
*
* RFC1521 functionality for Pike
*
* Marcus Comstedt 1996-1997
*/
#include "config.h"
#include "global.h"
RCSID("$Id: mime.c,v 1.11 1998/04/04 01:15:40 mirar Exp $");
#include "stralloc.h"
#include "pike_macros.h"
#include "object.h"
#include "program.h"
#include "interpret.h"
#include "builtin_functions.h"
#include "error.h"
#ifdef __CHAR_UNSIGNED__
#define SIGNED signed
#else
#define SIGNED
#endif
/** Forward declarations of functions implementing Pike functions **/
static void f_decode_base64( INT32 args );
static void f_encode_base64( INT32 args );
static void f_decode_qp( INT32 args );
static void f_encode_qp( INT32 args );
static void f_decode_uue( INT32 args );
static void f_encode_uue( INT32 args );
static void f_tokenize( INT32 args );
static void f_quote( INT32 args );
/** Global tables **/
static char base64tab[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static SIGNED char base64rtab[0x80-' '];
static char qptab[16] = "0123456789ABCDEF";
static SIGNED char qprtab[0x80-'0'];
#define CT_CTL 0
#define CT_WHITE 1
#define CT_ATOM 2
#define CT_SPECIAL 3
#define CT_LPAR 4
#define CT_RPAR 5
#define CT_LBRACK 6
#define CT_RBRACK 7
#define CT_QUOTE 8
unsigned char rfc822ctype[256];
/** Externally available functions **/
/* Initialize and start module */
void pike_module_init( void )
{
int i;
/* Init reverse base64 mapping */
memset( base64rtab, -1, sizeof(base64rtab) );
for (i = 0; i < 64; i++)
base64rtab[base64tab[i] - ' '] = i;
/* Init reverse qp mapping */
memset( qprtab, -1, sizeof(qprtab) );
for (i = 0; i < 16; i++)
qprtab[qptab[i]-'0'] = i;
for (i = 10; i < 16; i++)
/* Lower case hex digits */
qprtab[qptab[i] - ('0' + 'A' - 'a')] = i;
/* Init lexical properties of characters for MIME.tokenize() */
memset( rfc822ctype, CT_ATOM, sizeof(rfc822ctype) );
for (i = 0; i < 32; i++)
rfc822ctype[i] = CT_CTL;
rfc822ctype[127] = CT_CTL;
rfc822ctype[' '] = CT_WHITE;
rfc822ctype['\t'] = CT_WHITE;
rfc822ctype['('] = CT_LPAR;
rfc822ctype[')'] = CT_RPAR;
rfc822ctype['['] = CT_LBRACK;
rfc822ctype[']'] = CT_LBRACK;
rfc822ctype['"'] = CT_QUOTE;
for(i=0; i<10; i++)
rfc822ctype[(int)"<>@,;:\\/?="[i]] = CT_SPECIAL;
/* Add global functions */
add_function_constant( "decode_base64", f_decode_base64,
"function(string:string)", OPT_TRY_OPTIMIZE );
add_function_constant( "encode_base64", f_encode_base64,
"function(string,void|int:string)",OPT_TRY_OPTIMIZE );
add_function_constant( "decode_qp", f_decode_qp,
"function(string:string)", OPT_TRY_OPTIMIZE );
add_function_constant( "encode_qp", f_encode_qp,
"function(string,void|int:string)",OPT_TRY_OPTIMIZE );
add_function_constant( "decode_uue", f_decode_uue,
"function(string:string)", OPT_TRY_OPTIMIZE );
add_function_constant( "encode_uue", f_encode_uue,
"function(string,void|string:string)",
OPT_TRY_OPTIMIZE);
add_function_constant( "tokenize", f_tokenize,
"function(string:array(string|int))",
OPT_TRY_OPTIMIZE );
add_function_constant( "quote", f_quote,
"function(array(string|int):string)",
OPT_TRY_OPTIMIZE );
}
/* Restore and exit module */
void pike_module_exit( void )
{
}
/** Functions implementing Pike functions **/
/* MIME.decode_base64() */
static void f_decode_base64( INT32 args )
{
if(args != 1)
error( "Wrong number of arguments to MIME.decode_base64()\n" );
else if (sp[-1].type != T_STRING)
error( "Wrong type of argument to MIME.decode_base64()\n" );
else {
/* Decode the string in sp[-1].u.string. Any whitespace etc
must be ignored, so the size of the result can't be exactly
calculated from the input size. We'll use a dynamic buffer
instead. */
dynamic_buffer buf;
SIGNED char *src;
INT32 cnt, d = 1;
int pads = 0;
buf.s.str = NULL;
initialize_buf( &buf );
for (src = (SIGNED char *)sp[-1].u.string->str, cnt = sp[-1].u.string->len;
cnt--; src++)
if(*src>=' ' && base64rtab[*src-' ']>=0) {
/* 6 more bits to put into d */
if((d=(d<<6)|base64rtab[*src-' '])>=0x1000000) {
/* d now contains 24 valid bits. Put them in the buffer */
low_my_putchar( d>>16, &buf );
low_my_putchar( d>>8, &buf );
low_my_putchar( d, &buf );
d=1;
}
} else if (*src=='=') {
/* A pad character has been encountered.
Increase pad count, and remove unused bits from d. */
pads++;
d>>=2;
}
/* If data size not an even multiple of 3 bytes, output remaining data */
switch(pads) {
case 1:
low_my_putchar( d>>8, &buf );
case 2:
low_my_putchar( d, &buf );
}
/* Return result */
pop_n_elems( 1 );
push_string( low_free_buf( &buf ) );
}
}
/* Convenience function for encode_base64(); Encode groups*3 bytes from
* *srcp into groups*4 bytes at *destp.
*/
static int do_b64_encode( INT32 groups, unsigned char **srcp, char **destp,
int insert_crlf )
{
unsigned char *src = *srcp;
char *dest = *destp;
int g = 0;
while (groups--) {
/* Get 24 bits from src */
INT32 d = *src++<<8;
d = (*src++|d)<<8;
d |= *src++;
/* Output in encoded from to dest */
*dest++ = base64tab[d>>18];
*dest++ = base64tab[(d>>12)&63];
*dest++ = base64tab[(d>>6)&63];
*dest++ = base64tab[d&63];
/* Insert a linebreak once in a while... */
if(insert_crlf && ++g == 19) {
*dest++ = 13;
*dest++ = 10;
g=0;
}
}
/* Update pointers */
*srcp = src;
*destp = dest;
return g;
}
/* MIME.encode_base64() */
static void f_encode_base64( INT32 args )
{
if(args != 1 && args != 2)
error( "Wrong number of arguments to MIME.encode_base64()\n" );
else if(sp[-args].type != T_STRING)
error( "Wrong type of argument to MIME.encode_base64()\n" );
else {
/* Encode the string in sp[-args].u.string. First, we need to know
the number of 24 bit groups in the input, and the number of
bytes actually present in the last group. */
INT32 groups = (sp[-args].u.string->len+2)/3;
int last = (sp[-args].u.string->len-1)%3+1;
int insert_crlf = !(args == 2 && sp[-1].type == T_INT &&
sp[-1].u.integer != 0);
/* We need 4 bytes for each 24 bit group, and 2 bytes for each linebreak */
struct pike_string *str =
begin_shared_string( groups*4+(insert_crlf? (groups/19)*2 : 0) );
unsigned char *src = (unsigned char *)sp[-args].u.string->str;
char *dest = str->str;
if (groups) {
/* Temporary storage for the last group, as we may have to read
an extra byte or two and don't want to get any page-faults. */
unsigned char tmp[3], *tmpp = tmp;
int i;
if (do_b64_encode( groups-1, &src, &dest, insert_crlf ) == 18)
/* Skip the final linebreak if it's not to be followed by anything */
str->len -= 2;
/* Copy the last group to temporary storage */
tmp[1] = tmp[2] = 0;
for (i = 0; i < last; i++)
tmp[i] = *src++;
/* Encode the last group, and replace output codes with pads as needed */
do_b64_encode( 1, &tmpp, &dest, 0 );
switch (last) {
case 1:
*--dest = '=';
case 2:
*--dest = '=';
}
}
/* Return the result */
pop_n_elems( args );
push_string( end_shared_string( str ) );
}
}
/* MIME.decode_qp() */
static void f_decode_qp( INT32 args )
{
if(args != 1)
error( "Wrong number of arguments to MIME.decode_qp()\n" );
else if(sp[-1].type != T_STRING)
error( "Wrong type of argument to MIME.decode_qp()\n" );
else {
/* Decode the string in sp[-1].u.string. We have absolutely no idea
how much of the input is raw data and how much is encoded data,
so we'll use a dynamic buffer to hold the result. */
dynamic_buffer buf;
SIGNED char *src;
INT32 cnt;
buf.s.str=NULL;
initialize_buf(&buf);
for (src = (SIGNED char *)sp[-1].u.string->str, cnt = sp[-1].u.string->len;
cnt--; src++)
if (*src == '=') {
/* Encoded data */
if (cnt > 0 && (src[1] == 10 || src[1] == 13)) {
/* A '=' followed by CR, LF or CRLF will be simply ignored. */
if (src[1] == 13) {
--cnt;
src++;
}
if (cnt>0 && src[1]==10) {
--cnt;
src++;
}
} else if (cnt >= 2 && src[1] >= '0' && src[2] >= '0' &&
qprtab[src[1]-'0'] >= 0 && qprtab[src[2]-'0'] >= 0) {
/* A '=' followed by a hexadecimal number. */
low_my_putchar( (qprtab[src[1]-'0']<<4)|qprtab[src[2]-'0'], &buf );
cnt -= 2;
src += 2;
}
} else
/* Raw data */
low_my_putchar( *src, &buf );
/* Return the result */
pop_n_elems( 1 );
push_string( low_free_buf( &buf ) );
}
}
/* MIME.encode_qp() */
static void f_encode_qp( INT32 args )
{
if (args != 1 && args != 2)
error( "Wrong number of arguments to MIME.encode_qp()\n" );
else if (sp[-args].type != T_STRING)
error( "Wrong type of argument to MIME.encode_qp()\n" );
else {
/* Encode the string in sp[-args].u.string. We don't know how
much of the data has to be encoded, so let's use that trusty
dynamic buffer once again. */
dynamic_buffer buf;
unsigned char *src = (unsigned char *)sp[-args].u.string->str;
INT32 cnt;
int col = 0;
int insert_crlf = !(args == 2 && sp[-1].type == T_INT &&
sp[-1].u.integer != 0);
buf.s.str = NULL;
initialize_buf( &buf );
for (cnt = sp[-args].u.string->len; cnt--; src++) {
if ((*src >= 33 && *src <= 60) ||
(*src >= 62 && *src <= 126))
/* These characters can always be encoded as themselves */
low_my_putchar( *src, &buf );
else {
/* Better safe than sorry, eh? Use the dreaded hex escape */
low_my_putchar( '=', &buf );
low_my_putchar( qptab[(*src)>>4], &buf );
low_my_putchar( qptab[(*src)&15], &buf );
col += 2;
}
/* We'd better not let the lines get too long */
if (++col >= 73 && insert_crlf) {
low_my_putchar( '=', &buf );
low_my_putchar( 13, &buf );
low_my_putchar( 10, &buf );
col = 0;
}
}
/* Return the result */
pop_n_elems( args );
push_string( low_free_buf( &buf ) );
}
}
/* MIME.decode_uue() */
static void f_decode_uue( INT32 args )
{
if (args != 1)
error( "Wrong number of arguments to MIME.decode_uue()\n" );
else if(sp[-1].type != T_STRING)
error( "Wrong type of argument to MIME.decode_uue()\n" );
else {
/* Decode string in sp[-1].u.string. This is done much like in
the base64 case, but we'll look for the "begin" line first. */
dynamic_buffer buf;
char *src;
INT32 cnt;
buf.s.str = NULL;
initialize_buf( &buf );
src = sp[-1].u.string->str;
cnt = sp[-1].u.string->len;
while (cnt--)
if(*src++=='b' && cnt>5 && !memcmp(src, "egin ", 5))
break;
if (cnt>=0)
/* We found a the string "begin". Now skip to EOL */
while (cnt--)
if (*src++=='\n')
break;
if (cnt<0) {
/* Could not find "begin.*\n", return 0 */
pop_n_elems( 1 );
push_int( 0 );
return;
}
for (;;) {
int l, g;
/* If we run out of input, or the line starts with "end", we are done */
if (cnt<=0 || *src=='e')
break;
/* Get the length byte, calculate the number of groups, and
check that we have sufficient data */
l=(*src++-' ')&63;
--cnt;
g = (l+2)/3;
l -= g*3;
if ((cnt -= g*4) < 0)
break;
while (g--) {
/* Read 24 bits of data */
INT32 d = ((*src++-' ')&63)<<18;
d |= ((*src++-' ')&63)<<12;
d |= ((*src++-' ')&63)<<6;
d |= ((*src++-' ')&63);
/* Output it into the buffer */
low_my_putchar( d>>16, &buf );
low_my_putchar( d>>8, &buf );
low_my_putchar( d, &buf );
}
/* If the line didn't contain an even multiple of 24 bits, remove
spurious bytes from the buffer */
while (l++)
low_make_buf_space( -1, &buf );
/* Skip to EOL */
while (cnt-- && *src++!=10);
}
/* Return the result */
pop_n_elems( 1 );
push_string( low_free_buf( &buf ) );
}
}
/* Convenience function for encode_uue(); Encode groups*3 bytes from
* *srcp into groups*4 bytes at *destp, and reserve space for last more.
*/
static void do_uue_encode( INT32 groups, unsigned char **srcp, char **destp,
INT32 last )
{
unsigned char *src = *srcp;
char *dest = *destp;
while (groups || last) {
/* A single line can hold at most 15 groups */
int g = (groups >= 15? 15 : groups);
if (g<15) {
/* The line isn't filled completely. Add space for the "last" bytes */
*dest++ = ' ' + (3*g + last);
last = 0;
} else
*dest++ = ' ' + (3*g);
groups -= g;
while (g--) {
/* Get 24 bits of data */
INT32 d = *src++<<8;
d = (*src++|d)<<8;
d |= *src++;
/* Output it in encoded form */
if((*dest++ = ' '+(d>>18)) == ' ') dest[-1]='`';
if((*dest++ = ' '+((d>>12)&63)) == ' ') dest[-1]='`';
if((*dest++ = ' '+((d>>6)&63)) == ' ') dest[-1]='`';
if((*dest++ = ' '+(d&63)) == ' ') dest[-1]='`';
}
if(groups || last) {
/* There's more data to be written, so add a linebreak before looping */
*dest++ = 13;
*dest++ = 10;
}
}
/* Update pointers */
*srcp = src;
*destp = dest;
}
/* MIME.encode_uue() */
static void f_encode_uue( INT32 args )
{
if (args != 1 && args != 2)
error( "Wrong number of arguments to MIME.encode_uue()\n" );
else if (sp[-args].type != T_STRING ||
(args == 2 && sp[-1].type != T_VOID && sp[-1].type != T_STRING &&
sp[-1].type != T_INT))
error( "Wrong type of argument to MIME.encode_uue()\n" );
else {
/* Encode string in sp[-args].u.string. If args == 2, there may be
a filename in sp[-1].u.string. If we don't get a filename, use
the generic filename "attachment"... */
char *dest, *filename = "attachment";
struct pike_string *str;
unsigned char *src = (unsigned char *) sp[-args].u.string->str;
/* Calculate number of 24 bit groups, and actual # of bytes in last grp */
INT32 groups = (sp[-args].u.string->len + 2)/3;
int last= (sp[-args].u.string->len - 1)%3 + 1;
/* Get the filename if provided */
if (args == 2 && sp[-1].type == T_STRING)
filename = sp[-1].u.string->str;
/* Allocate the space we need. This included space for the actual
data, linebreaks and the "begin" and "end" lines (including filename) */
str = begin_shared_string( groups*4 + ((groups + 14)/15)*3 +
strlen( filename ) + 20 );
dest = str->str;
/* Write the begin line containing the filename */
sprintf(dest, "begin 644 %s\r\n", filename);
dest += 12 + strlen(filename);
if (groups) {
/* Temporary storage for the last group, as we may have to read
an extra byte or two and don't want to get any page-faults. */
unsigned char tmp[3], *tmpp=tmp;
char *kp, k;
int i;
do_uue_encode( groups-1, &src, &dest, last );
/* Copy the last group into temporary storage */
tmp[1] = tmp[2] = 0;
for (i = 0; i < last; i++)
tmp[i] = *src++;
/* Remember the address and contents of the last character written.
This will get overwritten by a fake length byte which we will
then replace with the originial character */
k = *--dest;
kp = dest;
do_uue_encode( 1, &tmpp, &dest, 0 );
/* Restore the saved character */
*kp = k;
/* Replace final nulls with pad characters if neccesary */
switch (last) {
case 1:
dest[-2] = '`';
case 2:
dest[-1] = '`';
}
/* Add a final linebreak after the last group */
*dest++ = 13;
*dest++ = 10;
}
/* Put a terminating line (length byte `) and the "end" line into buffer */
memcpy( dest, "`\r\nend\r\n", 8 );
/* Return the result */
pop_n_elems( args );
push_string( end_shared_string( str ) );
}
}
/* MIME.tokenize() */
static void f_tokenize( INT32 args )
{
if (args != 1)
error( "Wrong number of arguments to MIME.tokenize()\n" );
if (sp[-1].type == T_ARRAY)
{
/* take first entry from array */
struct array *a=sp[-1].u.array;
if (a->size>0)
{
sp--;
push_svalue(a->item+0);
free_array(a);
}
}
if (sp[-1].type != T_STRING)
error( "Wrong type of argument to MIME.tokenize()\n" );
else {
/* Tokenize string in sp[-1].u.string. We'll just push the
tokens on the stack, and then do an aggregate_array just
before exiting. */
unsigned char *src = (unsigned char *)sp[-1].u.string->str;
struct array *arr;
struct pike_string *str;
INT32 cnt = sp[-1].u.string->len, n = 0, l, e;
char *p;
while (cnt>0)
switch (rfc822ctype[*src]) {
case CT_SPECIAL:
case CT_RBRACK:
case CT_RPAR:
/* Individual special character, push as a char (= int) */
push_int( *src++ );
n++;
--cnt;
break;
case CT_ATOM:
/* Atom, find length then push as a string */
for (l=1; l<cnt; l++)
if (rfc822ctype[src[l]] != CT_ATOM)
break;
push_string( make_shared_binary_string( (char *)src, l ) );
n++;
src += l;
cnt -= l;
break;
case CT_QUOTE:
/* Quoted-string, find length then push as a string while removing
escapes. */
for (e = 0, l = 1; l < cnt; l++)
if (src[l] == '"')
break;
else
if (src[l] == '\\') {
e++;
l++;
}
/* l is the distance to the ending ", and e is the number of \
escapes encountered on the way */
str = begin_shared_string( l-e-1 );
/* Copy the string and remove \ escapes */
for (p = str->str, e = 1; e < l; e++)
*p++ = (src[e] == '\\'? src[++e] : src[e]);
/* Push the resulting string */
push_string( end_shared_string( str ) );
n++;
src += l+1;
cnt -= l+1;
break;
case CT_LBRACK:
/* Domain literal. Handled just like quoted-string, except that
] marks the end of the token, not ". */
for (e = 0, l = 1; l < cnt; l++)
if(src[l] == ']')
break;
else
if(src[l] == '\\') {
e++;
l++;
}
/* l is the distance to the ending ], and e is the number of \
escapes encountered on the way */
str = begin_shared_string( l-e+1 );
/* Copy the literal and remove \ escapes */
for (p = str->str, e = 0; e <= l; e++)
*p++ = (src[e] == '\\'? src[++e] : src[e]);
/* Push the resulting string */
push_string( end_shared_string( str ) );
n++;
src += l+1;
cnt -= l+1;
break;
case CT_LPAR:
/* Comment. Nested comments are allowed, so we'll use e to
keep track of the nesting level. */
for (e = 1, l = 1; l < cnt; l++)
if (src[l] == '(')
/* One level deeper nesting */
e++;
else if(src[l] == ')') {
/* End of comment level. If nesting reaches 0, we're done */
if(!--e)
break;
} else
/* Skip escaped characters */
if(src[l] == '\\')
l++;
/* Skip the comment altogether */
src += l+1;
cnt -= l+1;
break;
case CT_WHITE:
/* Whitespace, just ignore it */
src++;
--cnt;
break;
default:
error( "Invalid character in header field\n" );
}
/* Create the resulting array and push it */
arr = aggregate_array( n );
pop_n_elems( 1 );
push_array( arr );
}
}
/* Convenience function for quote() which determines if a sequence of
* characters can be stored as an atom.
*/
static int check_atom_chars( unsigned char *str, INT32 len )
{
/* Atoms must contain at least 1 character... */
if (len < 1)
return 0;
/* Check the individual characters */
while (len--)
if (*str >= 0x80 || rfc822ctype[*str] != CT_ATOM)
return 0;
else
str++;
/* Ok, it's safe */
return 1;
}
/* MIME.quote() */
static void f_quote( INT32 args )
{
struct svalue *item;
INT32 cnt;
dynamic_buffer buf;
int prev_atom = 0;
if (args != 1)
error( "Wrong number of arguments to MIME.quote()\n" );
else if (sp[-1].type != T_ARRAY)
error( "Wrong type of argument to MIME.quote()\n" );
/* Quote array in sp[-1].u.array. Once again we'll rely on a
dynamic_buffer to collect the output string. */
buf.s.str = NULL;
initialize_buf( &buf );
for (cnt=sp[-1].u.array->size, item=sp[-1].u.array->item; cnt--; item++) {
if (item->type == T_INT) {
/* Single special character */
low_my_putchar( item->u.integer, &buf );
prev_atom = 0;
} else if (item->type != T_STRING) {
/* Neither int or string. Too bad... */
toss_buffer( &buf );
error( "Wrong type of argument to MIME.quote()\n" );
} else {
/* It's a string, so we'll store it either as an atom, or
as a quoted-string */
struct pike_string *str = item->u.string;
/* In case the previous item was also a string, we'll add a single
whitespace as a delimiter */
if (prev_atom)
low_my_putchar( ' ', &buf );
if (check_atom_chars((unsigned char *)str->str, str->len)) {
/* Valid atom without quotes... */
low_my_binary_strcat( str->str, str->len, &buf );
} else {
/* Have to use quoted-string */
INT32 len = str->len;
char *src = str->str;
low_my_putchar( '"', &buf );
while(len--) {
if(*src=='"' || *src=='\\' || *src=='\r')
/* Some characters have to be escaped even within quotes... */
low_my_putchar( '\\', &buf );
low_my_putchar( *src++, &buf );
}
low_my_putchar( '"', &buf );
}
prev_atom = 1;
}
}
/* Return the result */
pop_n_elems( 1 );
push_string( low_free_buf( &buf ) );
}