Skip to content

Instantly share code, notes, and snippets.

@jackyyf
Created July 20, 2014 05:56
Show Gist options
  • Save jackyyf/f6a7e9a7b54293f2bde5 to your computer and use it in GitHub Desktop.
Save jackyyf/f6a7e9a7b54293f2bde5 to your computer and use it in GitHub Desktop.
Gist by paste.py @ 2014-07-20 13:56:54.026013
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len ints to dst.
# Return the number of positive ints (>0) contained in src.
#
# Include your name and ID here.
#
# 1. DO NOT CREATE A NEW STACK.
# 2. Use a loop of 16 x 4.
# 3. Use two branch to save one jump.
##################################################################
# Do not modify this portion
# Function prologue.
ncopy:
# pushl %ebp # Save old frame pointer
# rrmovl %esp,%ebp # Set up new frame pointer
pushl %esi # Save callee-save regs
pushl %ebx
# pushl %edi
mrmovl 12(%esp),%ebx # src
mrmovl 20(%esp),%edx # len
mrmovl 16(%esp),%ecx # dst
##################################################################
# You can modify this portion
# Loop header
# xorl %eax,%eax # count = 0;
rrmovl %edx, %eax
# jmp FIRST
#andl %edx,%edx # check len
#jg Loop # if has more, goto loop
#jmp Done # else no more work
# Note: since they failed only once but succeed many times, it's neccessary
# to use jg to work with prediction.
FRONT:
# andl %edx, %edx
# jg FIRST
# jmp Done
iaddl $-16, %edx # No Smaller than than 16?
jge LEFT16 # Copy 16 elements.
iaddl $16, %edx # Smaller than 16
addl %edx, %edx
addl %edx, %edx # %edx x 4
mrmovl JTable(%edx), %esi
#xorl %edx, %edx
pushl %esi
ret
LEFT16:
mrmovl 0x3c(%ebx), %esi
rmmovl %esi, 0x3c(%ecx)
andl %esi, %esi
jg EFT15
iaddl $0xFFFFFFFF, %eax
EFT15:
mrmovl 0x38(%ebx), %esi
rmmovl %esi, 0x38(%ecx)
andl %esi, %esi
jg EFT14
iaddl $0xFFFFFFFF, %eax
EFT14:
mrmovl 0x34(%ebx), %esi
rmmovl %esi, 0x34(%ecx)
andl %esi, %esi
jg EFT13
iaddl $0xFFFFFFFF, %eax
EFT13:
mrmovl 0x30(%ebx), %esi
rmmovl %esi, 0x30(%ecx)
andl %esi, %esi
jg EFT12
iaddl $0xFFFFFFFF, %eax
EFT12:
mrmovl 0x2c(%ebx), %esi
rmmovl %esi, 0x2c(%ecx)
andl %esi, %esi
jg EFT11
iaddl $0xFFFFFFFF, %eax
EFT11:
mrmovl 0x28(%ebx), %esi
rmmovl %esi, 0x28(%ecx)
andl %esi, %esi
jg EFT10
iaddl $0xFFFFFFFF, %eax
EFT10:
mrmovl 0x24(%ebx), %esi
rmmovl %esi, 0x24(%ecx)
andl %esi, %esi
jg EFT9
iaddl $0xFFFFFFFF, %eax
EFT9:
mrmovl 0x20(%ebx), %esi
rmmovl %esi, 0x20(%ecx)
andl %esi, %esi
jg EFT8
iaddl $0xFFFFFFFF, %eax
EFT8:
mrmovl 0x1c(%ebx), %esi
rmmovl %esi, 0x1c(%ecx)
andl %esi, %esi
jg EFT7
iaddl $0xFFFFFFFF, %eax
EFT7:
mrmovl 0x18(%ebx), %esi
rmmovl %esi, 0x18(%ecx)
andl %esi, %esi
jg EFT6
iaddl $0xFFFFFFFF, %eax
EFT6:
mrmovl 0x14(%ebx), %esi
rmmovl %esi, 0x14(%ecx)
andl %esi, %esi
jg EFT5
iaddl $0xFFFFFFFF, %eax
EFT5:
mrmovl 0x10(%ebx), %esi
rmmovl %esi, 0x10(%ecx)
andl %esi, %esi
jg EFT4
iaddl $0xFFFFFFFF, %eax
EFT4:
mrmovl 0xc(%ebx), %esi
rmmovl %esi, 0xc(%ecx)
andl %esi, %esi
jg EFT3
iaddl $0xFFFFFFFF, %eax
EFT3:
mrmovl 0x8(%ebx), %esi
rmmovl %esi, 0x8(%ecx)
andl %esi, %esi
jg EFT2
iaddl $0xFFFFFFFF, %eax
EFT2:
mrmovl 0x4(%ebx), %esi
rmmovl %esi, 0x4(%ecx)
andl %esi, %esi
jg EFT1
iaddl $0xFFFFFFFF, %eax
EFT1:
mrmovl (%ebx), %esi
rmmovl %esi, (%ecx)
iaddl $0x40, %ebx
iaddl $0x40, %ecx
andl %esi, %esi
jg FRONT
iaddl $0xFFFFFFFF, %eax
jmp FRONT
LEFT15:
mrmovl 0x38(%ebx), %esi
rmmovl %esi, 0x38(%ecx)
andl %esi, %esi
jg LEFT14
iaddl $0xFFFFFFFF, %eax
LEFT14:
mrmovl 0x34(%ebx), %esi
rmmovl %esi, 0x34(%ecx)
andl %esi, %esi
jg LEFT13
iaddl $0xFFFFFFFF, %eax
LEFT13:
mrmovl 0x30(%ebx), %esi
rmmovl %esi, 0x30(%ecx)
andl %esi, %esi
jg LEFT12
iaddl $0xFFFFFFFF, %eax
LEFT12:
mrmovl 0x2c(%ebx), %esi
rmmovl %esi, 0x2c(%ecx)
andl %esi, %esi
jg LEFT11
iaddl $0xFFFFFFFF, %eax
LEFT11:
mrmovl 0x28(%ebx), %esi
rmmovl %esi, 0x28(%ecx)
andl %esi, %esi
jg LEFT10
iaddl $0xFFFFFFFF, %eax
LEFT10:
mrmovl 0x24(%ebx), %esi
rmmovl %esi, 0x24(%ecx)
andl %esi, %esi
jg LEFT9
iaddl $0xFFFFFFFF, %eax
LEFT9:
mrmovl 0x20(%ebx), %esi
rmmovl %esi, 0x20(%ecx)
andl %esi, %esi
jg LEFT8
iaddl $0xFFFFFFFF, %eax
LEFT8:
mrmovl 0x1c(%ebx), %esi
rmmovl %esi, 0x1c(%ecx)
andl %esi, %esi
jg LEFT7
iaddl $0xFFFFFFFF, %eax
LEFT7:
mrmovl 0x18(%ebx), %esi
rmmovl %esi, 0x18(%ecx)
andl %esi, %esi
jg LEFT6
iaddl $0xFFFFFFFF, %eax
LEFT6:
mrmovl 0x14(%ebx), %esi
rmmovl %esi, 0x14(%ecx)
andl %esi, %esi
jg LEFT5
iaddl $0xFFFFFFFF, %eax
LEFT5:
mrmovl 0x10(%ebx), %esi
rmmovl %esi, 0x10(%ecx)
andl %esi, %esi
jg LEFT4
iaddl $0xFFFFFFFF, %eax
LEFT4:
mrmovl 0xc(%ebx), %esi
rmmovl %esi, 0xc(%ecx)
andl %esi, %esi
jg LEFT3
iaddl $0xFFFFFFFF, %eax
LEFT3:
mrmovl 0x8(%ebx), %esi
rmmovl %esi, 0x8(%ecx)
andl %esi, %esi
jg LEFT2
iaddl $0xFFFFFFFF, %eax
LEFT2:
mrmovl 0x4(%ebx), %esi
rmmovl %esi, 0x4(%ecx)
andl %esi, %esi
jg LEFT1
iaddl $0xFFFFFFFF, %eax
LEFT1:
mrmovl (%ebx), %esi
rmmovl %esi, (%ecx)
iaddl $0x40, %ebx
iaddl $0x40, %ecx
andl %esi, %esi
jg Done
iaddl $0xFFFFFFFF, %eax
Done:
# popl %edi # Restore callee-save registers
popl %ebx
popl %esi
#rrmovl %ebp, %esp
#popl %ebp
ret
##################################################################
# Keep the following label at the end of your function
#
.align 4
JTable:
.long Done
.long LEFT1
.long LEFT2
.long LEFT3
.long LEFT4
.long LEFT5
.long LEFT6
.long LEFT7
.long LEFT8
.long LEFT9
.long LEFT10
.long LEFT11
.long LEFT12
.long LEFT13
.long LEFT14
.long LEFT15
End:
#/* $end ncopy-ys */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment