diff --git a/src/modules/image/blit.c b/src/modules/image/blit.c
index fbff9cf4cc2c7fd57a1332fe242cca999e0ef3e8..6c2988a3a8ed5a11656c762766485675179b6905 100644
--- a/src/modules/image/blit.c
+++ b/src/modules/image/blit.c
@@ -23,6 +23,30 @@ struct program *image_program;
 #define min(a,b) ((a)<(b)?(a):(b))
 #define max(a,b) ((a)<(b)?(b):(a))
 
+#if 0
+#include <sys/resource.h>
+#define CHRONO(X) chrono(X)
+
+void chrono(char *x)
+{
+   struct rusage r;
+   static struct rusage rold;
+   getrusage(RUSAGE_SELF,&r);
+   fprintf(stderr,"%s: %ld.%06ld - %ld.%06ld\n",x,
+	   r.ru_utime.tv_sec,r.ru_utime.tv_usec,
+
+	   ((r.ru_utime.tv_usec-rold.ru_utime.tv_usec<0)?-1:0)
+	   +r.ru_utime.tv_sec-rold.ru_utime.tv_sec,
+           ((r.ru_utime.tv_usec-rold.ru_utime.tv_usec<0)?1000000:0)
+           + r.ru_utime.tv_usec-rold.ru_utime.tv_usec
+	   );
+
+   rold=r;
+}
+#else
+#define CHRONO(X)
+#endif
+
 /***************** internals ***********************************/
 
 #define apply_alpha(x,y,alpha) \
@@ -93,12 +117,16 @@ void img_box_nocheck(INT32 x1,INT32 y1,INT32 x2,INT32 y2)
 void img_blit(rgb_group *dest,rgb_group *src,INT32 width,
 	      INT32 lines,INT32 moddest,INT32 modsrc)
 {
+CHRONO("image_blit begin");
+
    while (lines--)
    {
       MEMCPY(dest,src,sizeof(rgb_group)*width);
       dest+=moddest;
       src+=modsrc;
    }
+CHRONO("image_blit end");
+
 }
 
 void img_crop(struct image *dest,
@@ -241,7 +269,11 @@ void image_paste_alpha(INT32 args)
 void image_paste_mask(INT32 args)
 {
    struct image *img,*mask;
-   INT32 x1,y1,x,y,x2,y2;
+   INT32 x1,y1,x,y,x2,y2,smod,dmod,mmod;
+   rgb_group *s,*d,*m;
+   float q;
+
+CHRONO("image_paste_mask init");
 
    if (args<2)
       error("illegal number of arguments to image->paste_mask()\n");
@@ -273,19 +305,37 @@ void image_paste_mask(INT32 args)
    x2=min(THIS->xsize-x1,min(img->xsize,mask->xsize));
    y2=min(THIS->ysize-y1,min(img->ysize,mask->ysize));
 
-   for (x=max(0,-x1); x<x2; x++)
-      for (y=max(0,-y1); y<y2; y++)
+CHRONO("image_paste_mask begin");
+
+   s=img->img+max(0,-x1)+max(0,-y1)*img->xsize;
+   m=mask->img+max(0,-x1)+max(0,-y1)*mask->xsize;
+   d=THIS->img+max(0,-x1)+x1+(y1+max(0,-y1))*THIS->xsize;
+   x=max(0,-x1);
+   smod=img->xsize-(x2-x);
+   mmod=mask->xsize-(x2-x);
+   dmod=THIS->xsize-(x2-x);
+
+   q=1.0/255;
+
+   for (y=max(0,-y1); y<y2; y++)
+   {
+      for (x=max(0,-x1); x<x2; x++)
       {
-	 pixel(THIS,x+x1,y+y1).r=
-            (unsigned char)((pixel(THIS,x+x1,y+y1).r*(long)(255-pixel(mask,x,y).r)+
-			     pixel(img,x,y).r*(long)pixel(mask,x,y).r)/255);
-	 pixel(THIS,x+x1,y+y1).g=
-            (unsigned char)((pixel(THIS,x+x1,y+y1).g*(long)(255-pixel(mask,x,y).g)+
-			     pixel(img,x,y).g*(long)pixel(mask,x,y).g)/255);
-	 pixel(THIS,x+x1,y+y1).b=
-            (unsigned char)((pixel(THIS,x+x1,y+y1).b*(long)(255-pixel(mask,x,y).b)+
-			     pixel(img,x,y).b*(long)pixel(mask,x,y).b)/255);
+	 if (m->r==255) d->r=s->r;
+	 else if (m->r==0) d->r=d->r;
+	 else d->r=(unsigned char)(((d->r*(255-m->r))+(s->r*m->r))*q);
+	 if (m->g==255) d->g=s->g;
+	 else if (m->g==0) d->g=d->g;
+	 else d->g=(unsigned char)(((d->g*(255-m->g))+(s->g*m->g))*q);
+	 if (m->b==255) d->b=s->b;
+	 else if (m->b==0) d->b=d->b;
+	 else d->b=(unsigned char)(((d->b*(255-m->b))+(s->b*m->b))*q);
+	 s++; m++; d++;
       }
+      s+=smod; m+=mmod; d+=dmod;
+   }
+CHRONO("image_paste_mask end");
+
    pop_n_elems(args);
    THISOBJ->refs++;
    push_object(THISOBJ);
@@ -295,6 +345,9 @@ void image_paste_alpha_color(INT32 args)
 {
    struct image *img,*mask;
    INT32 x1,y1,x,y,x2,y2;
+   rgb_group rgb,*d,*m;
+   INT32 mmod,dmod;
+   float q;
 
    if (args!=1 && args!=4 && args!=6 && args!=3)
       error("illegal number of arguments to image->paste_alpha_color()\n");
@@ -330,19 +383,37 @@ void image_paste_alpha_color(INT32 args)
    x2=min(THIS->xsize-x1,mask->xsize);
    y2=min(THIS->ysize-y1,mask->ysize);
 
-   for (x=max(0,-x1); x<x2; x++)
-      for (y=max(0,-y1); y<y2; y++)
+CHRONO("image_paste_alpha_color begin");
+
+   m=mask->img+max(0,-x1)+max(0,-y1)*mask->xsize;
+   d=THIS->img+max(0,-x1)+x1+(y1+max(0,-y1))*THIS->xsize;
+   x=max(0,-x1);
+   mmod=mask->xsize-(x2-x);
+   dmod=THIS->xsize-(x2-x);
+
+   q=1.0/255;
+
+   rgb=THIS->rgb;
+
+   for (y=max(0,-y1); y<y2; y++)
+   {
+      for (x=max(0,-x1); x<x2; x++)
       {
-	 pixel(THIS,x+x1,y+y1).r=
-            (unsigned char)((pixel(THIS,x+x1,y+y1).r*(long)(255-pixel(mask,x,y).r)+
-			     THIS->rgb.r*(long)pixel(mask,x,y).r)/255);
-	 pixel(THIS,x+x1,y+y1).g=
-            (unsigned char)((pixel(THIS,x+x1,y+y1).g*(long)(255-pixel(mask,x,y).g)+
-			     THIS->rgb.g*(long)pixel(mask,x,y).g)/255);
-	 pixel(THIS,x+x1,y+y1).b=
-            (unsigned char)((pixel(THIS,x+x1,y+y1).b*(long)(255-pixel(mask,x,y).b)+
-			     THIS->rgb.b*(long)pixel(mask,x,y).b)/255);
+	 if (m->r==255) d->r=rgb.r;
+	 else if (m->r==0) ;
+	 else d->r=(unsigned char)(((d->r*(255-m->r))+(rgb.r*m->r))*q);
+	 if (m->g==255) d->g=rgb.g;
+	 else if (m->g==0) ;
+	 else d->g=(unsigned char)(((d->g*(255-m->g))+(rgb.g*m->g))*q);
+	 if (m->b==255) d->b=rgb.b;
+	 else if (m->b==0) ;
+	 else d->b=(unsigned char)(((d->b*(255-m->b))+(rgb.b*m->b))*q);
+	 m++; d++;
       }
+      m+=mmod; d+=dmod;
+   }
+CHRONO("image_paste_alpha_color end");
+
    pop_n_elems(args);
    THISOBJ->refs++;
    push_object(THISOBJ);
diff --git a/src/modules/image/matrix.c b/src/modules/image/matrix.c
index 2153b3c36bdea8102e4a962647cc4d228f397846..ac0c651273f9c3be7f095ecf12caddc14539bd1c 100644
--- a/src/modules/image/matrix.c
+++ b/src/modules/image/matrix.c
@@ -23,6 +23,25 @@ struct program *image_program;
 #define min(a,b) ((a)<(b)?(a):(b))
 #define max(a,b) ((a)<(b)?(b):(a))
 
+#undef MATRIX_CHRONO
+#ifdef MATRIX_CHRONO
+#include <sys/resource.h>
+#define CHRONO(X) chrono(X)
+
+void chrono(char *x)
+{
+   struct rusage r;
+   getrusage(RUSAGE_SELF,&r);
+   fprintf(stderr,"%s: %ld.%06ld %ld.%06ld %ld.%06ld\n",x,
+	   r.ru_utime.tv_sec,r.ru_utime.tv_usec,
+	   r.ru_stime.tv_sec,r.ru_stime.tv_usec,
+	   r.ru_stime.tv_sec+r.ru_utime.tv_sec,
+	   r.ru_stime.tv_usec+r.ru_utime.tv_usec);
+}
+#else
+#define CHRONO(X)
+#endif
+
 /***************** internals ***********************************/
 
 #define apply_alpha(x,y,alpha) \
@@ -457,10 +476,12 @@ static void img_skewx(struct image *src,
    d=dest->img=malloc(sizeof(rgb_group)*dest->xsize*dest->ysize);
    if (!d) return;
    s=src->img;
-   
+
    xmod=diff/src->ysize;
    rgb=dest->rgb;
 
+   CHRONO("skewx begin\n");
+
    y=src->ysize;
    while (y--)
    {
@@ -504,6 +525,8 @@ static void img_skewx(struct image *src,
       while (j--) *(d++)=rgb;
       x0+=xmod;
    }
+
+   CHRONO("skewx end\n");
 }
 
 static void img_skewy(struct image *src,
@@ -531,6 +554,8 @@ static void img_skewy(struct image *src,
    ymod=diff/src->xsize;
    rgb=dest->rgb;
 
+CHRONO("skewy begin\n");
+
    x=src->xsize;
    while (x--)
    {
@@ -576,6 +601,9 @@ static void img_skewy(struct image *src,
       d-=dest->ysize*xsz-1;
       y0+=ymod;
    }
+
+CHRONO("skewy end\n");
+
 }
 
 void image_skewx(INT32 args)
@@ -707,11 +735,11 @@ void img_rotate(INT32 args,int xpn)
 
    dest2.img=d0.img=NULL;
 
-   if (angle<-135) angle-=360*(int)(angle/360);
-   else if (angle>225) angle-=360*(int)(angle/360);
+   if (angle<-135) angle-=360*(int)((angle-225)/360);
+   else if (angle>225) angle-=360*(int)((angle+135)/360);
    if (angle<-45) 
    { 
-      img_cw(THIS,&dest2); 
+      img_ccw(THIS,&dest2); 
       angle+=90; 
    }
    else if (angle>135) 
@@ -722,8 +750,8 @@ void img_rotate(INT32 args,int xpn)
    }
    else if (angle>45) 
    { 
-      img_ccw(THIS,&dest2);  
-      angle-=180; 
+      img_cw(THIS,&dest2);  
+      angle-=90; 
    }
    else dest2=*THIS;