25 #include "lv_common.h"
32 static void *mem_copy_c (
void *dest,
const void *src, visual_size_t n);
33 static void *mem_set8_c (
void *dest,
int c, visual_size_t n);
34 static void *mem_set16_c (
void *dest,
int c, visual_size_t n);
35 static void *mem_set32_c (
void *dest,
int c, visual_size_t n);
36 static void *mem_copy_pitch_c (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows);
40 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
41 static void *mem_copy_3dnow (
void *dest,
const void *src, visual_size_t n);
42 static void *mem_copy_mmx (
void *dest,
const void *src, visual_size_t n);
43 static void *mem_copy_mmx2 (
void *dest,
const void *src, visual_size_t n);
45 static void *mem_copy_pitch_mmx (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows);
46 static void *mem_copy_pitch_mmx2 (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows);
47 static void *mem_copy_pitch_3dnow (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows);
49 static void *mem_set8_mmx (
void *dest,
int c, visual_size_t n);
50 static void *mem_set8_mmx2 (
void *dest,
int c, visual_size_t n);
52 static void *mem_set16_mmx (
void *dest,
int c, visual_size_t n);
53 static void *mem_set16_mmx2 (
void *dest,
int c, visual_size_t n);
55 static void *mem_set32_mmx (
void *dest,
int c, visual_size_t n);
56 static void *mem_set32_mmx2 (
void *dest,
int c, visual_size_t n);
69 void visual_mem_initialize ()
74 visual_mem_copy = mem_copy_c;
75 visual_mem_copy_pitch = mem_copy_pitch_c;
77 visual_mem_set = mem_set8_c;
78 visual_mem_set16 = mem_set16_c;
79 visual_mem_set32 = mem_set32_c;
81 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
84 visual_mem_copy = mem_copy_mmx;
85 visual_mem_copy_pitch = mem_copy_pitch_mmx;
87 visual_mem_set = mem_set8_mmx;
88 visual_mem_set16 = mem_set16_mmx;
89 visual_mem_set32 = mem_set32_mmx;
95 visual_mem_copy = mem_copy_3dnow;
96 visual_mem_copy_pitch = mem_copy_pitch_3dnow;
100 visual_mem_copy = mem_copy_mmx2;
101 visual_mem_copy_pitch = mem_copy_pitch_mmx2;
103 visual_mem_set = mem_set8_mmx2;
104 visual_mem_set16 = mem_set16_mmx2;
105 visual_mem_set32 = mem_set32_mmx2;
115 visual_return_val_if_fail (nbytes > 0, NULL);
117 buf = malloc (nbytes);
132 visual_return_val_if_fail (nbytes > 0, NULL);
136 visual_mem_set (buf, 0, nbytes);
143 return realloc (ptr, nbytes);
151 static void *mem_copy_c (
void *dest,
const void *src, visual_size_t n)
153 return memcpy(dest, src, n);
157 static void *mem_set8_c (
void *dest,
int c, visual_size_t n)
159 return memset(dest, c, n);
163 static void *mem_set16_c (
void *dest,
int c, visual_size_t n)
169 ((c << 16) & 0xffff0000);
170 uint16_t setflag16 = c & 0xffff;
186 static void *mem_set32_c (
void *dest,
int c, visual_size_t n)
189 uint32_t setflag32 = c;
199 static void *mem_copy_pitch_c (
void *dest,
const void *src,
int pitch1,
int pitch2,
int row_bytes,
int rows)
202 const uint8_t *s = src;
205 for (i = 0; i < rows; i++) {
206 memcpy(d, s, row_bytes);
216 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
218 static void *mem_copy_mmx (
void *dest,
const void *src, visual_size_t n)
221 const uint32_t *s = src;
223 const uint8_t *sc = src;
227 (
"\n\t movq (%0), %%mm0"
228 "\n\t movq 8(%0), %%mm1"
229 "\n\t movq 16(%0), %%mm2"
230 "\n\t movq 24(%0), %%mm3"
231 "\n\t movq 32(%0), %%mm4"
232 "\n\t movq 40(%0), %%mm5"
233 "\n\t movq 48(%0), %%mm6"
234 "\n\t movq 56(%0), %%mm7"
235 "\n\t movq %%mm0, (%1)"
236 "\n\t movq %%mm1, 8(%1)"
237 "\n\t movq %%mm2, 16(%1)"
238 "\n\t movq %%mm3, 24(%1)"
239 "\n\t movq %%mm4, 32(%1)"
240 "\n\t movq %%mm5, 40(%1)"
241 "\n\t movq %%mm6, 48(%1)"
242 "\n\t movq %%mm7, 56(%1)"
243 ::
"r" (s),
"r" (d) :
"memory");
260 sc = (
const uint8_t *) s;
268 static void *mem_copy_mmx2 (
void *dest,
const void *src, visual_size_t n)
271 const uint32_t *s = src;
273 const uint8_t *sc = src;
277 (
"\n\t prefetchnta 256(%0)"
278 "\n\t prefetchnta 320(%0)"
279 "\n\t movq (%0), %%mm0"
280 "\n\t movq 8(%0), %%mm1"
281 "\n\t movq 16(%0), %%mm2"
282 "\n\t movq 24(%0), %%mm3"
283 "\n\t movq 32(%0), %%mm4"
284 "\n\t movq 40(%0), %%mm5"
285 "\n\t movq 48(%0), %%mm6"
286 "\n\t movq 56(%0), %%mm7"
287 "\n\t movntq %%mm0, (%1)"
288 "\n\t movntq %%mm1, 8(%1)"
289 "\n\t movntq %%mm2, 16(%1)"
290 "\n\t movntq %%mm3, 24(%1)"
291 "\n\t movntq %%mm4, 32(%1)"
292 "\n\t movntq %%mm5, 40(%1)"
293 "\n\t movntq %%mm6, 48(%1)"
294 "\n\t movntq %%mm7, 56(%1)"
295 ::
"r" (s),
"r" (d) :
"memory");
312 sc = (
const uint8_t *) s;
320 static void *mem_copy_3dnow (
void *dest,
const void *src, visual_size_t n)
323 const uint32_t *s = src;
325 const uint8_t *sc = src;
329 (
"\n\t prefetch 256(%0)"
330 "\n\t prefetch 320(%0)"
331 "\n\t movq (%0), %%mm0"
332 "\n\t movq 8(%0), %%mm1"
333 "\n\t movq 16(%0), %%mm2"
334 "\n\t movq 24(%0), %%mm3"
335 "\n\t movq 32(%0), %%mm4"
336 "\n\t movq 40(%0), %%mm5"
337 "\n\t movq 48(%0), %%mm6"
338 "\n\t movq 56(%0), %%mm7"
339 "\n\t movq %%mm0, (%1)"
340 "\n\t movq %%mm1, 8(%1)"
341 "\n\t movq %%mm2, 16(%1)"
342 "\n\t movq %%mm3, 24(%1)"
343 "\n\t movq %%mm4, 32(%1)"
344 "\n\t movq %%mm5, 40(%1)"
345 "\n\t movq %%mm6, 48(%1)"
346 "\n\t movq %%mm7, 56(%1)"
347 ::
"r" (s),
"r" (d) :
"memory");
364 sc = (
const uint8_t *) s;
373 static void *mem_copy_pitch_mmx (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows)
376 const uint32_t *s = src;
379 for (i = 0; i < rows; i++) {
381 const uint32_t *inner_s;
382 uint8_t *inner_dc = (uint8_t*) d;
383 const uint8_t *inner_sc = (
const uint8_t*) s;
386 while (!VISUAL_ALIGNED(inner_dc, 4) && n > 4) {
387 *inner_dc++ = *inner_sc++;
391 inner_d = (uint32_t*) inner_dc;
392 inner_s = (
const uint32_t*) inner_sc;
394 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
397 (
"\n\t movq (%0), %%mm0"
398 "\n\t movq 8(%0), %%mm1"
399 "\n\t movq 16(%0), %%mm2"
400 "\n\t movq 24(%0), %%mm3"
401 "\n\t movq 32(%0), %%mm4"
402 "\n\t movq 40(%0), %%mm5"
403 "\n\t movq 48(%0), %%mm6"
404 "\n\t movq 56(%0), %%mm7"
405 "\n\t movq %%mm0, (%1)"
406 "\n\t movq %%mm1, 8(%1)"
407 "\n\t movq %%mm2, 16(%1)"
408 "\n\t movq %%mm3, 24(%1)"
409 "\n\t movq %%mm4, 32(%1)"
410 "\n\t movq %%mm5, 40(%1)"
411 "\n\t movq %%mm6, 48(%1)"
412 "\n\t movq %%mm7, 56(%1)"
413 ::
"r" (inner_s),
"r" (inner_d) :
"memory");
426 *inner_d++ = *inner_s++;
430 inner_dc = (uint8_t*) inner_d;
431 inner_sc = (
const uint8_t*) inner_s;
434 *inner_dc++ = *inner_sc++;
436 d = (uint32_t*)((uint8_t*) d + pitch1);
437 s = (
const uint32_t*)((
const uint8_t*) s + pitch2);
443 static void *mem_copy_pitch_mmx2 (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows)
446 const uint32_t *s = src;
449 for (i = 0; i < rows; i++) {
451 const uint32_t *inner_s;
452 uint8_t *inner_dc = (uint8_t*) d;
453 const uint8_t *inner_sc = (
const uint8_t*) s;
456 while (!VISUAL_ALIGNED(inner_dc, 4) && n > 4) {
457 *inner_dc++ = *inner_sc++;
461 inner_d = (uint32_t*) inner_dc;
462 inner_s = (
const uint32_t*) inner_sc;
464 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
467 (
"\n\t prefetchnta 256(%0)"
468 "\n\t prefetchnta 320(%0)"
469 "\n\t movq (%0), %%mm0"
470 "\n\t movq 8(%0), %%mm1"
471 "\n\t movq 16(%0), %%mm2"
472 "\n\t movq 24(%0), %%mm3"
473 "\n\t movq 32(%0), %%mm4"
474 "\n\t movq 40(%0), %%mm5"
475 "\n\t movq 48(%0), %%mm6"
476 "\n\t movq 56(%0), %%mm7"
477 "\n\t movntq %%mm0, (%1)"
478 "\n\t movntq %%mm1, 8(%1)"
479 "\n\t movntq %%mm2, 16(%1)"
480 "\n\t movntq %%mm3, 24(%1)"
481 "\n\t movntq %%mm4, 32(%1)"
482 "\n\t movntq %%mm5, 40(%1)"
483 "\n\t movntq %%mm6, 48(%1)"
484 "\n\t movntq %%mm7, 56(%1)"
485 ::
"r" (inner_s),
"r" (inner_d) :
"memory");
498 *inner_d++ = *inner_s++;
502 inner_dc = (uint8_t*) inner_d;
503 inner_sc = (
const uint8_t*) inner_s;
506 *inner_dc++ = *inner_sc++;
508 d = (uint32_t*)((uint8_t*) d + pitch1);
509 s = (
const uint32_t*)((
const uint8_t*) s + pitch2);
515 static void *mem_copy_pitch_3dnow (
void *dest,
const void *src,
int pitch1,
int pitch2,
int width,
int rows)
518 const uint32_t *s = src;
521 for (i = 0; i < rows; i++) {
523 const uint32_t *inner_s;
524 uint8_t *inner_dc = (uint8_t*) d;
525 const uint8_t *inner_sc = (
const uint8_t*) s;
528 while (!VISUAL_ALIGNED(inner_dc, 4) && n > 4) {
529 *inner_dc++ = *inner_sc++;
533 inner_d = (uint32_t*) inner_dc;
534 inner_s = (
const uint32_t*) inner_sc;
536 #if defined(VISUAL_ARCH_X86) || defined(VISUAL_ARCH_X86_64)
539 (
"\n\t prefetch 256(%0)"
540 "\n\t prefetch 320(%0)"
541 "\n\t movq (%0), %%mm0"
542 "\n\t movq 8(%0), %%mm1"
543 "\n\t movq 16(%0), %%mm2"
544 "\n\t movq 24(%0), %%mm3"
545 "\n\t movq 32(%0), %%mm4"
546 "\n\t movq 40(%0), %%mm5"
547 "\n\t movq 48(%0), %%mm6"
548 "\n\t movq 56(%0), %%mm7"
549 "\n\t movq %%mm0, (%1)"
550 "\n\t movq %%mm1, 8(%1)"
551 "\n\t movq %%mm2, 16(%1)"
552 "\n\t movq %%mm3, 24(%1)"
553 "\n\t movq %%mm4, 32(%1)"
554 "\n\t movq %%mm5, 40(%1)"
555 "\n\t movq %%mm6, 48(%1)"
556 "\n\t movq %%mm7, 56(%1)"
557 ::
"r" (inner_s),
"r" (inner_d) :
"memory");
570 *inner_d++ = *inner_s++;
574 inner_dc = (uint8_t*) inner_d;
575 inner_sc = (
const uint8_t*) inner_s;
578 *inner_dc++ = *inner_sc++;
580 d = (uint32_t*)((uint8_t*) d + pitch1);
581 s = (
const uint32_t*)((
const uint8_t*) s + pitch2);
587 static void *mem_set8_mmx (
void *dest,
int c, visual_size_t n)
593 ((c << 8) & 0xff00) |
594 ((c << 16) & 0xff0000) |
595 ((c << 24) & 0xff000000);
596 uint8_t setflag8 = c & 0xff;
599 (
"\n\t movd (%0), %%mm0"
600 "\n\t movd (%0), %%mm1"
601 "\n\t psllq $32, %%mm1"
602 "\n\t por %%mm1, %%mm0"
603 "\n\t movq %%mm0, %%mm2"
604 "\n\t movq %%mm0, %%mm1"
605 "\n\t movq %%mm2, %%mm3"
606 "\n\t movq %%mm1, %%mm4"
607 "\n\t movq %%mm0, %%mm5"
608 "\n\t movq %%mm2, %%mm6"
609 "\n\t movq %%mm1, %%mm7"
610 ::
"r" (&setflag32) :
"memory");
614 (
"\n\t movq %%mm0, (%0)"
615 "\n\t movq %%mm1, 8(%0)"
616 "\n\t movq %%mm2, 16(%0)"
617 "\n\t movq %%mm3, 24(%0)"
618 "\n\t movq %%mm4, 32(%0)"
619 "\n\t movq %%mm5, 40(%0)"
620 "\n\t movq %%mm6, 48(%0)"
621 "\n\t movq %%mm7, 56(%0)"
622 ::
"r" (d) :
"memory");
645 static void *mem_set8_mmx2 (
void *dest,
int c, visual_size_t n)
651 ((c << 8) & 0xff00) |
652 ((c << 16) & 0xff0000) |
653 ((c << 24) & 0xff000000);
654 uint8_t setflag8 = c & 0xff;
657 (
"\n\t movd (%0), %%mm0"
658 "\n\t movd (%0), %%mm1"
659 "\n\t psllq $32, %%mm1"
660 "\n\t por %%mm1, %%mm0"
661 "\n\t movq %%mm0, %%mm2"
662 "\n\t movq %%mm0, %%mm1"
663 "\n\t movq %%mm2, %%mm3"
664 "\n\t movq %%mm1, %%mm4"
665 "\n\t movq %%mm0, %%mm5"
666 "\n\t movq %%mm2, %%mm6"
667 "\n\t movq %%mm1, %%mm7"
668 ::
"r" (&setflag32) :
"memory");
672 (
"\n\t movntq %%mm0, (%0)"
673 "\n\t movntq %%mm1, 8(%0)"
674 "\n\t movntq %%mm2, 16(%0)"
675 "\n\t movntq %%mm3, 24(%0)"
676 "\n\t movntq %%mm4, 32(%0)"
677 "\n\t movntq %%mm5, 40(%0)"
678 "\n\t movntq %%mm6, 48(%0)"
679 "\n\t movntq %%mm7, 56(%0)"
680 ::
"r" (d) :
"memory");
703 static void *mem_set16_mmx (
void *dest,
int c, visual_size_t n)
709 ((c << 16) & 0xffff0000);
710 uint16_t setflag16 = c & 0xffff;
713 (
"\n\t movd (%0), %%mm0"
714 "\n\t movd (%0), %%mm1"
715 "\n\t psllq $32, %%mm1"
716 "\n\t por %%mm1, %%mm0"
717 "\n\t movq %%mm0, %%mm2"
718 "\n\t movq %%mm0, %%mm1"
719 "\n\t movq %%mm2, %%mm3"
720 "\n\t movq %%mm1, %%mm4"
721 "\n\t movq %%mm0, %%mm5"
722 "\n\t movq %%mm2, %%mm6"
723 "\n\t movq %%mm1, %%mm7"
724 ::
"r" (&setflag32) :
"memory");
728 (
"\n\t movq %%mm0, (%0)"
729 "\n\t movq %%mm1, 8(%0)"
730 "\n\t movq %%mm2, 16(%0)"
731 "\n\t movq %%mm3, 24(%0)"
732 "\n\t movq %%mm4, 32(%0)"
733 "\n\t movq %%mm5, 40(%0)"
734 "\n\t movq %%mm6, 48(%0)"
735 "\n\t movq %%mm7, 56(%0)"
736 ::
"r" (d) :
"memory");
759 static void *mem_set16_mmx2 (
void *dest,
int c, visual_size_t n)
765 ((c << 16) & 0xffff0000);
766 uint16_t setflag16 = c & 0xffff;
769 (
"\n\t movd (%0), %%mm0"
770 "\n\t movd (%0), %%mm1"
771 "\n\t psllq $32, %%mm1"
772 "\n\t por %%mm1, %%mm0"
773 "\n\t movq %%mm0, %%mm2"
774 "\n\t movq %%mm0, %%mm1"
775 "\n\t movq %%mm2, %%mm3"
776 "\n\t movq %%mm1, %%mm4"
777 "\n\t movq %%mm0, %%mm5"
778 "\n\t movq %%mm2, %%mm6"
779 "\n\t movq %%mm1, %%mm7"
780 ::
"r" (&setflag32) :
"memory");
784 (
"\n\t movntq %%mm0, (%0)"
785 "\n\t movntq %%mm1, 8(%0)"
786 "\n\t movntq %%mm2, 16(%0)"
787 "\n\t movntq %%mm3, 24(%0)"
788 "\n\t movntq %%mm4, 32(%0)"
789 "\n\t movntq %%mm5, 40(%0)"
790 "\n\t movntq %%mm6, 48(%0)"
791 "\n\t movntq %%mm7, 56(%0)"
792 ::
"r" (d) :
"memory");
815 static void *mem_set32_mmx (
void *dest,
int c, visual_size_t n)
818 uint32_t setflag32 = c;
821 (
"\n\t movd (%0), %%mm0"
822 "\n\t movd (%0), %%mm1"
823 "\n\t psllq $32, %%mm1"
824 "\n\t por %%mm1, %%mm0"
825 "\n\t movq %%mm0, %%mm2"
826 "\n\t movq %%mm0, %%mm1"
827 "\n\t movq %%mm2, %%mm3"
828 "\n\t movq %%mm1, %%mm4"
829 "\n\t movq %%mm0, %%mm5"
830 "\n\t movq %%mm2, %%mm6"
831 "\n\t movq %%mm1, %%mm7"
832 ::
"r" (&setflag32) :
"memory");
836 (
"\n\t movq %%mm0, (%0)"
837 "\n\t movq %%mm1, 8(%0)"
838 "\n\t movq %%mm2, 16(%0)"
839 "\n\t movq %%mm3, 24(%0)"
840 "\n\t movq %%mm4, 32(%0)"
841 "\n\t movq %%mm5, 40(%0)"
842 "\n\t movq %%mm6, 48(%0)"
843 "\n\t movq %%mm7, 56(%0)"
844 ::
"r" (d) :
"memory");
860 static void *mem_set32_mmx2 (
void *dest,
int c, visual_size_t n)
863 uint32_t setflag32 = c;
866 (
"\n\t movd (%0), %%mm0"
867 "\n\t movd (%0), %%mm1"
868 "\n\t psllq $32, %%mm1"
869 "\n\t por %%mm1, %%mm0"
870 "\n\t movq %%mm0, %%mm2"
871 "\n\t movq %%mm0, %%mm1"
872 "\n\t movq %%mm2, %%mm3"
873 "\n\t movq %%mm1, %%mm4"
874 "\n\t movq %%mm0, %%mm5"
875 "\n\t movq %%mm2, %%mm6"
876 "\n\t movq %%mm1, %%mm7"
877 ::
"r" (&setflag32) :
"memory");
881 (
"\n\t movntq %%mm0, (%0)"
882 "\n\t movntq %%mm1, 8(%0)"
883 "\n\t movntq %%mm2, 16(%0)"
884 "\n\t movntq %%mm3, 24(%0)"
885 "\n\t movntq %%mm4, 32(%0)"
886 "\n\t movntq %%mm5, 40(%0)"
887 "\n\t movntq %%mm6, 48(%0)"
888 "\n\t movntq %%mm7, 56(%0)"
889 ::
"r" (d) :
"memory");