Hi Warren,
I've always tried to avoid x86 and friends for ARM, so I may be wrong,
but the run up to the first of the two memcpy() calls looks the same to
me. Here's the assembler, values given an RBP of 100, and the stack
contents. Good version first, bad second.
rbp = 100
L29:
movq -8(%rbp),%rax rax = *92
pushq %rax *92
movq 16(%rbp),%rax rax = *116
pushq %rax *92 *116
movq $64,%rax rax = 64
pushq %rax *92 *116 64
movq 32(%rbp),%rax rax = *132
popq %rcx rcx = 64 *92 *116
addq %rcx,%rax rcx = 64+*132
movq (%rax),%rax rax = *(64+*132)
pushq %rax *92 *116 *(64+*132)
movq $40,%rax rax = 40
pushq %rax *92 *116 *(64+*132) 40
movq 32(%rbp),%rax rax = *132
popq %rcx rcx = 40 *92 *116 *(64+*132)
addq %rcx,%rax rax = 40+*132
movq (%rax),%rax rax = *(40+*132)
popq %rcx rcx = *(64+*132) *92 *116
addq %rcx,%rax rax = *(64+*132)+*(40+*132)
pushq %rax *92 *116
*(64+*132)+*(40+*132)
call Cmemcpy
rbp = 100
L29:
movq -8(%rbp),%r8 r8 = *92
pushq %r8 *92
movq 16(%rbp),%r8 r8 = *116
pushq %r8 *92 *116
movq $64,%r8 r8 = 64
movq 32(%rbp),%r9 r9 = *132
addq %r9,%r8 r8 = *132+64
movq (%r8),%r8 r8 = *(*132+64)
movq $40,%r9 r9 = 40
movq 32(%rbp),%r10 r10 = *132
addq %r10,%r9 r9 = *132+40
movq (%r9),%r9 r9 = *(*132+40)
addq %r9,%r8 r8 = *(*132+64)+*(*132+40)
pushq %r8 *92 *116
*(*132+64)+*(*132+40)
call Cmemcpy
A glance at the second memcpy() call look equivalent too.
So perhaps it's not calculating the parameters to memcpy() that's wrong,
but the inputs into those calculations being faulty? I'd use gdb(1) to
break at particular instructions, examine memory, etc., to work
backwards through the bad version until spotting where good data becomes
bad.
--
Cheers, Ralph.