From 2e07c41450fc993c02621e478a11debc2d54c717 Mon Sep 17 00:00:00 2001 From: Tim Rowledge Date: Mon, 6 Sep 2021 15:24:59 -0700 Subject: [PATCH 1/2] Update BitBltArmSimdSourceWord.s to latest ben avison version Half a dozen small bug fixes from ben Co-Authored-By: bavison <3324657+bavison@users.noreply.github.com> --- .../BitBltPlugin/BitBltArmSimdSourceWord.s | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdSourceWord.s b/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdSourceWord.s index 27233f5d5f..bd50e3e8bf 100644 --- a/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdSourceWord.s +++ b/platforms/Cross/plugins/BitBltPlugin/BitBltArmSimdSourceWord.s @@ -528,7 +528,7 @@ counter SETA counter + 4 SourceWord GenerateFunctions 1, 4,, \ FLAG_COLOUR_MAP :OR: FLAG_DST_WRITEONLY :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_NO_EXPAND_SKEW, 2, \ - "stride_s,map,bitptrs,orig_w,scratch", \ + "stride_s,map,bitptrs,scratch,orig_w", \ "x,stride_s,bitptrs", orig_w,, init ; leading_pixels_reg=wk3 ; ******************************************************************** @@ -951,7 +951,7 @@ counter SETA counter + 4 MACRO SourceWord1_2_128bits_head $src, $fixed_skew, $intra_preloads - Read2Words src, 3, carry, $fixed_skew, skew, $wk0 + Read2Words src, 2, carry, $fixed_skew, skew, $wk0 MEND MACRO @@ -959,8 +959,8 @@ counter SETA counter + 4 LCLA counter counter SETA 0 WHILE counter < 16 - MSR CPSR_f, $wk3 - MOV $wk3, $wk3, LSL #4 + MSR CPSR_f, $wk2 + MOV $wk2, $wk2, LSL #4 ORRPL $wk0, ht, $wk0, LSL #2 ORRMI $wk0, ht_info, $wk0, LSL #2 ORRNE $wk0, ht, $wk0, LSL #2 @@ -973,8 +973,8 @@ counter SETA counter + 4 WEND counter SETA 0 WHILE counter < 16 - MSR CPSR_f, $wk3 - MOV $wk3, $wk3, LSL #4 + MSR CPSR_f, $wk2 + MOV $wk2, $wk2, LSL #4 ORRPL $wk1, ht, $wk1, LSL #2 ORRMI $wk1, ht_info, $wk1, LSL #2 ORRNE $wk1, ht, $wk1, LSL #2 @@ -987,8 +987,13 @@ counter SETA counter + 4 WEND counter SETA 0 WHILE counter < 16 + [ counter = 0 + MSR CPSR_f, $wk3 + MOV $wk4, $wk3, LSL #4 + | MSR CPSR_f, $wk4 MOV $wk4, $wk4, LSL #4 + ] ORRPL $wk2, ht, $wk2, LSL #2 ORRMI $wk2, ht_info, $wk2, LSL #2 ORRNE $wk2, ht, $wk2, LSL #2 @@ -5661,4 +5666,4 @@ SourceWord GenerateFunctions 0, 32, _scalar, \ ; ******************************************************************** - END + END \ No newline at end of file From 5c87395f34719e5251e2a8034ee1b26bff911cee Mon Sep 17 00:00:00 2001 From: Tim Rowledge Date: Mon, 6 Sep 2021 15:34:55 -0700 Subject: [PATCH 2/2] Update mvm files for ARMv8 assert and debug squeak.cog.spur builds These may not really need updating; it's not like performance of bitblt is crucial in general when debugging. Against that, one should debug the same code as the production build. The differences in optimisation levels may need some discussion? --- building/linux64ARMv8/squeak.cog.spur/build.assert/mvm | 2 ++ building/linux64ARMv8/squeak.cog.spur/build.debug/mvm | 2 ++ 2 files changed, 4 insertions(+) diff --git a/building/linux64ARMv8/squeak.cog.spur/build.assert/mvm b/building/linux64ARMv8/squeak.cog.spur/build.assert/mvm index bd688299d5..9b1dc40bf4 100755 --- a/building/linux64ARMv8/squeak.cog.spur/build.assert/mvm +++ b/building/linux64ARMv8/squeak.cog.spur/build.assert/mvm @@ -3,6 +3,7 @@ set -e # assert Cog Spur VM with VM profiler and threaded heartbeat INSTALLDIR=assert/sqcogspur64ARMv8linuxht # armv8.N-a all fail in signalSemaphoreWithIndex for N in 1,2,3,4,5 +# tpr - add enablementisation of fast bitblt ben avison code MACHINE="-march=armv8-a -mtune=cortex-a72" OPT="-g3 -O1 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -DDEBUGVM=0" @@ -21,6 +22,7 @@ test -f plugins.ext || (test -f ../plugins.ext && cp -p ../plugins.ext . || cp - test -f config.h || ../../../../platforms/unix/config/configure \ --with-vmversion=5.0 --with-src=src/spur64.cog \ --without-vm-display-fbdev --without-npsqueak \ + --enable-fast-bitblt \ CFLAGS="$MACHINE $OPT -DCOGMTVM=0 -DDUAL_MAPPED_CODE_ZONE=1" \ LIBS="-lrt" diff --git a/building/linux64ARMv8/squeak.cog.spur/build.debug/mvm b/building/linux64ARMv8/squeak.cog.spur/build.debug/mvm index fd2e5a943a..01e7e51b10 100755 --- a/building/linux64ARMv8/squeak.cog.spur/build.debug/mvm +++ b/building/linux64ARMv8/squeak.cog.spur/build.debug/mvm @@ -3,6 +3,7 @@ set -e # debug Cog Spur VM with VM profiler and threaded heartbeat INSTALLDIR=debug/sqcogspur64ARMv8linuxht # armv8.N-a all fail in signalSemaphoreWithIndex for N in 1,2,3,4,5 +# tpr - add enablementisation of fast bitblt ben avison code MACHINE="-march=armv8-a -mtune=cortex-a72" OPT="-g3 -O0 -DDEBUGVM=1 -DAIO_DEBUG=1" @@ -21,6 +22,7 @@ test -f plugins.ext || (test -f ../plugins.ext && cp -p ../plugins.ext . || cp - test -f config.h || ../../../../platforms/unix/config/configure \ --with-vmversion=5.0 --with-src=src/spur64.cog \ --without-vm-display-fbdev --without-npsqueak \ + --enable-fast-bitblt \ CFLAGS="$MACHINE $OPT -DCOGMTVM=0 -DDUAL_MAPPED_CODE_ZONE=1" \ LIBS="-lrt"