diff --git a/src/acconfig.h b/src/acconfig.h index 91d671adecc5d708f7b4d3c87fde50ff3c7e5fd8..4a98e1329817e2755a7735de3ae1c558fb399f9f 100644 --- a/src/acconfig.h +++ b/src/acconfig.h @@ -1,5 +1,5 @@ /* - * $Id: acconfig.h,v 1.46 1999/08/10 00:18:58 mast Exp $ + * $Id: acconfig.h,v 1.47 1999/08/11 22:13:18 hubbe Exp $ */ #ifndef MACHINE_H #define MACHINE_H @@ -295,6 +295,9 @@ /* The last argument to accept() is an ACCEPT_SIZE_T * */ #define ACCEPT_SIZE_T int +/* Can we compile in MMX support? */ +#undef TRY_USE_MMX + @BOTTOM@ /* NT stuff */ diff --git a/src/configure.in b/src/configure.in index b9749426dbe5c8a7c3ba60b862f87a6fc318a903..09023aca179f774458c75ba3379080bff6ba5ef0 100644 --- a/src/configure.in +++ b/src/configure.in @@ -1,4 +1,4 @@ -AC_REVISION("$Id: configure.in,v 1.306 1999/08/10 12:35:19 grubba Exp $") +AC_REVISION("$Id: configure.in,v 1.307 1999/08/11 22:13:19 hubbe Exp $") AC_INIT(interpret.c) AC_CONFIG_HEADER(machine.h) @@ -1093,7 +1093,7 @@ thread.h dlfcn.h dld.h dl.h sys/times.h sched.h sys/procfs.h sys/param.h \ winsock.h sys/ioct.h sys/socket.h malloc.h netinet/in.h sys/wait.h winbase.h \ grp.h pwd.h passwd.h group.h winsock2.h signal.h sys/file.h poll.h sys/poll.h \ socket.h ieeefp.h fp_class.h floatingpoint.h sys/priocntl.h sched.h \ -windows.h errno.h stddef.h) +windows.h errno.h stddef.h mmx.h) AC_CHECK_SIZEOF(char *,4) AC_CHECK_SIZEOF(long,4) @@ -2235,6 +2235,28 @@ else AC_MSG_RESULT(not by cast) fi +############################################################################# +AC_MSG_CHECKING(Working MMX) + +AC_CACHE_VAL(pike_cv_sys_has_working_mmx, +[ +AC_TRY_LINK([ +#include <mmx.h> +],[ +{ + mmx_t a; + mmx_t b; + paddw(a,b); +} +], pike_cv_sys_has_working_mmx=yes,pike_cv_sys_has_working_mmx=no) +]) + +AC_MSG_RESULT($pike_cv_sys_has_working_mmx) + +if test "x$pike_cv_sys_has_working_mmx" = xyes ; then + AC_DEFINE(TRY_USE_MMX) +fi + ############################################################################# AC_MSG_CHECKING(if float conversion can cause SIGFPE) diff --git a/src/main.c b/src/main.c index fba091089396707a4dd4d326a495b3c4685c4d71..f5066fdbe9b6530be9c0d572d80ca5c17e8a7079 100644 --- a/src/main.c +++ b/src/main.c @@ -5,7 +5,7 @@ \*/ /**/ #include "global.h" -RCSID("$Id: main.c,v 1.73 1999/06/02 21:21:38 marcus Exp $"); +RCSID("$Id: main.c,v 1.74 1999/08/11 22:13:21 hubbe Exp $"); #include "fdlib.h" #include "backend.h" #include "module.h" @@ -44,6 +44,11 @@ RCSID("$Id: main.c,v 1.73 1999/06/02 21:21:38 marcus Exp $"); #include <sys/resource.h> #endif +#ifdef TRY_USE_MMX +#include <mmx.h> +int try_use_mmx; +#endif + char *master_file; char **ARGV; @@ -132,6 +137,10 @@ int dbm_main(int argc, char **argv) extern char **environ; #endif +#ifdef TRY_USE_MMX + try_use_mmx=mmx_ok(); +#endif + ARGV=argv; fd_init(); diff --git a/src/main.h b/src/main.h index c9d0dfb9ed34ac53489a63ddbba84832a7c5e0c1..930e94c9cbf2cc454c8102f530629d076f743269 100644 --- a/src/main.h +++ b/src/main.h @@ -5,7 +5,7 @@ \*/ /* - * $Id: main.h,v 1.9 1998/04/13 14:30:52 grubba Exp $ + * $Id: main.h,v 1.10 1999/08/11 22:13:22 hubbe Exp $ */ #ifndef MAIN_H #define MAIN_H @@ -15,6 +15,10 @@ extern int d_flag, t_flag, a_flag, l_flag, c_flag, p_flag, debug_options; extern int default_t_flag; +#ifdef TRY_USE_MMX +extern int try_use_mmx; +#endif + #define DEBUG_SIGNALS 1 #define NO_TAILRECURSION 2 diff --git a/src/modules/Image/layers.c b/src/modules/Image/layers.c index 5f1e260aecbd3454d8fd71bcae164957e0f24234..93d9364879510ecf51465499afda79416b2a0a6e 100644 --- a/src/modules/Image/layers.c +++ b/src/modules/Image/layers.c @@ -1,7 +1,7 @@ /* **! module Image **! note -**! $Id: layers.c,v 1.33 1999/08/10 12:57:36 mirar Exp $ +**! $Id: layers.c,v 1.34 1999/08/11 22:13:30 hubbe Exp $ **! class Layer **! see also: layers **! @@ -203,7 +203,7 @@ #include <math.h> /* floor */ -RCSID("$Id: layers.c,v 1.33 1999/08/10 12:57:36 mirar Exp $"); +RCSID("$Id: layers.c,v 1.34 1999/08/11 22:13:30 hubbe Exp $"); #include "image_machine.h" @@ -1397,6 +1397,8 @@ static void lm_normal(rgb_group *s,rgb_group *l,rgb_group *d, /* operators from template */ +#if 0 + #define LM_FUNC lm_add #define L_TRUNC(X) MINIMUM(255,(X)) #define L_OPER(A,B) ((A)+(int)(B)) @@ -1405,6 +1407,152 @@ static void lm_normal(rgb_group *s,rgb_group *l,rgb_group *d, #undef L_TRUNC #undef L_OPER +#else + +#define L_TRUNC(X) MINIMUM(255,(X)) +#define L_OPER(A,B) ((A)+(int)(B)) + +#ifdef TRY_USE_MMX +#include <mmx.h> +#endif + +static void lm_add(rgb_group *s,rgb_group *l,rgb_group *d, + rgb_group *sa,rgb_group *la,rgb_group *da, + int len,double alpha) +{ + if (alpha==0.0) + { + MEMCPY(d,s,sizeof(rgb_group)*len); + MEMCPY(da,sa,sizeof(rgb_group)*len); + return; + } + else if (alpha==1.0) + { + if (!la) /* no layer alpha => full opaque */ + { +#ifdef TRY_USE_MMX + extern int try_use_mmx; + if(try_use_mmx) + { + /* Strangely enough, this doesn't seem to make things + * any faster. Guess I should take a look at the generated + * assembler code... + * /Hubbe + */ + + int num=sizeof(rgb_group) * len; + unsigned char *source=(char *)s; + unsigned char *dest=(char *)d; + unsigned char *sourcel=(char *)l; + + while (num-->0 && (7&(int)dest)) + { + *dest=L_TRUNC(L_OPER(*source,*sourcel)); + source++; + sourcel++; + dest++; + } + + + while(num > 16) + { + movq_m2r(*source, mm0); + source+=8; + movq_m2r(*source, mm1); + source+=8; + paddusb_m2r(*sourcel, mm0); + sourcel+=8; + paddusb_m2r(*sourcel, mm1); + sourcel+=8; + movq_r2m(mm0,*dest); + dest+=8; + movq_r2m(mm1,*dest); + dest+=8; + num-=16; + } + emms(); + while (num-->0) + { + *dest=L_TRUNC(L_OPER(*source,*sourcel)); + source++; + sourcel++; + dest++; + } + } + else +#endif + { + while (len--) + { + d->r=L_TRUNC(L_OPER(s->r,l->r)); + d->g=L_TRUNC(L_OPER(s->g,l->g)); + d->b=L_TRUNC(L_OPER(s->b,l->b)); + *da=white; + l++; s++; sa++; da++; d++; + } + } + } + else + while (len--) + { + if (la->r==COLORMAX && la->g==COLORMAX && la->b==COLORMAX) + { + d->r=L_TRUNC(L_OPER(s->r,l->r)); + d->g=L_TRUNC(L_OPER(s->g,l->g)); + d->b=L_TRUNC(L_OPER(s->b,l->b)); + *da=white; + } + else if (la->r==0 && la->g==0 && la->b==0) + { + *d=*s; + *da=*sa; + } + else + { + d->r=L_TRUNC(L_OPER(s->r,l->r)); + ALPHA_ADD(s,d,d,sa,la,da,r); + d->g=L_TRUNC(L_OPER(s->g,l->g)); + ALPHA_ADD(s,d,d,sa,la,da,g); + d->b=L_TRUNC(L_OPER(s->b,l->b)); + ALPHA_ADD(s,d,d,sa,la,da,b); + } + l++; s++; la++; sa++; da++; d++; + } + } + else + { + if (!la) /* no layer alpha => full opaque */ + while (len--) + { + d->r=L_TRUNC(L_OPER(s->r,l->r)); + ALPHA_ADD_V_NOLA(s,d,d,sa,da,alpha,r); + d->g=L_TRUNC(L_OPER(s->g,l->g)); + ALPHA_ADD_V_NOLA(s,d,d,sa,da,alpha,g); + d->b=L_TRUNC(L_OPER(s->b,l->b)); + ALPHA_ADD_V_NOLA(s,d,d,sa,da,alpha,b); + l++; s++; sa++; da++; d++; + } + else + while (len--) + { + d->r=L_TRUNC(L_OPER(s->r,l->r)); + ALPHA_ADD_V(s,d,d,sa,la,da,alpha,r); + d->g=L_TRUNC(L_OPER(s->g,l->g)); + ALPHA_ADD_V(s,d,d,sa,la,da,alpha,g); + d->b=L_TRUNC(L_OPER(s->b,l->b)); + ALPHA_ADD_V(s,d,d,sa,la,da,alpha,b); + l++; s++; la++; sa++; da++; d++; + } + } +} + +#undef L_TRUNC +#undef L_OPER + +#endif + + + #define LM_FUNC lm_subtract #define L_TRUNC(X) MAXIMUM(0,(X)) #define L_OPER(A,B) ((A)-(int)(B)) @@ -2872,7 +3020,7 @@ void init_image_layers(void) char buf[100]; char buf2[sizeof(INT32)]; int i; - + for (i=0; i<LAYER_MODES; i++) layer_mode[i].ps=make_shared_string(layer_mode[i].name);