Menu

#985 Z80 an optimization of some simple returns

open
nobody
None
5
2025-12-14
2025-02-03
No

The following code:

unsigned char t, x, y, z;

unsigned char f( void );
unsigned char g( unsigned char c );

unsigned char my( void ) {
    t = 4;
    return f()
        && g( x )
        && g( y )
        && g( z );
}

produces using:

SDCC : mcs51/z80/z180/r2k/r2ka/r3ka/sm83/tlcs90/ez80_z80/z80n/r800/ds390/pic16/pic14/TININative/ds400/hc08/s08/stm8/pdk13/pdk14/pdk15/mos6502/mos65c02/f8 TD- 4.5.0 #15240 (MINGW64)

sdcc -mz80 --opt-code-speed --no-c-code-in-asm      --max-allocs-per-node20000000   -c   -o zout/check-speed.rel     check.c 

the following code:

_myf::
    call    _f
    or  a, a
    jr  Z, 00103$
    ld  a, (_x)
    call    _g
    or  a, a
    jr  Z, 00103$
    ld  a, (_y)
    call    _g
    or  a, a
    jr  Z, 00103$
    ld  a, (_z)
    call    _g
    or  a, a
    jr  NZ, 00104$
00103$:
    xor a, a
    ret
00104$:
    ld  a, #0x01
    ret

but the functionally equivalent, but clumsier:

unsigned char myf2( void ) {
    if ( !f() )
        return 0;
    if ( !g( x ) )
        return 0;
    if ( !g( y ) )
        return 0;
    if ( !g( z ) )
        return 0;
    return 1;
}

surprisingly, already knows how to do ret Z when A is already zero. (!)

_myf2::
    call    _f
    or  a,a
    ret Z
    ld  a, (_x)
    call    _g
    or  a,a
    ret Z
    ld  a, (_y)
    call    _g
    or  a,a
    ret Z
    ld  a, (_z)
    call    _g
    or  a,a
    ret Z
    ld  a, #0x01
    ret

In the starting myf the only used peephole rule in the conditional jumps was just:

; common peephole 163 changed absolute to relative conditional jump.

and in the myf2 there were four (!):

    or  a,a
; common peephole 154a removed redundant zeroing of a (which has just been tested to be #0x00).
;ic:7:  __iffalse_0($2) :
;   genLabel
    ret Z
; common peephole 161 replaced jump by return.
; common peephole 81 removed jp by using inverse jump logic
; common peephole 159 removed unused label 00102$.

And how it looks like with --no-peep:

    call    _g
;   genMove_o size 1 result type 2 source type 2 hl_dead 1
;ic:10:     if iTemp1 [k6 lr8:9 so:0]{ ia0 a2p0 re0 rm0 nos0 ru0 dp0}{unsigned-char fixed}[a ] != 0 goto __iffalse_1($4)
;   genIfx
    or  a, a
    jp  NZ, 00104$
;check.c:40: return 0;
;ic:13:     ret 0x0 {unsigned-char literal}
;   genRet
;   genMove_o size 1 result type 2 source type 1 hl_dead 1
    xor a, a
    jp  00109$
;ic:14:  __iffalse_1($4) :
;   genLabel
00104$:

where behind 00109$ is just a ret.

My guess is that for such an optimization to always happen, it would have to happen in some earlier stages.

Discussion


Log in to post a comment.