Created
July 20, 2014 05:56
-
-
Save jackyyf/f6a7e9a7b54293f2bde5 to your computer and use it in GitHub Desktop.
Gist by paste.py @ 2014-07-20 13:56:54.026013
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/* $begin ncopy-ys */ | |
################################################################## | |
# ncopy.ys - Copy a src block of len ints to dst. | |
# Return the number of positive ints (>0) contained in src. | |
# | |
# Include your name and ID here. | |
# | |
# 1. DO NOT CREATE A NEW STACK. | |
# 2. Use a loop of 16 x 4. | |
# 3. Use two branch to save one jump. | |
################################################################## | |
# Do not modify this portion | |
# Function prologue. | |
ncopy: | |
# pushl %ebp # Save old frame pointer | |
# rrmovl %esp,%ebp # Set up new frame pointer | |
pushl %esi # Save callee-save regs | |
pushl %ebx | |
# pushl %edi | |
mrmovl 12(%esp),%ebx # src | |
mrmovl 20(%esp),%edx # len | |
mrmovl 16(%esp),%ecx # dst | |
################################################################## | |
# You can modify this portion | |
# Loop header | |
# xorl %eax,%eax # count = 0; | |
rrmovl %edx, %eax | |
# jmp FIRST | |
#andl %edx,%edx # check len | |
#jg Loop # if has more, goto loop | |
#jmp Done # else no more work | |
# Note: since they failed only once but succeed many times, it's neccessary | |
# to use jg to work with prediction. | |
FRONT: | |
# andl %edx, %edx | |
# jg FIRST | |
# jmp Done | |
iaddl $-16, %edx # No Smaller than than 16? | |
jge LEFT16 # Copy 16 elements. | |
iaddl $16, %edx # Smaller than 16 | |
addl %edx, %edx | |
addl %edx, %edx # %edx x 4 | |
mrmovl JTable(%edx), %esi | |
#xorl %edx, %edx | |
pushl %esi | |
ret | |
LEFT16: | |
mrmovl 0x3c(%ebx), %esi | |
rmmovl %esi, 0x3c(%ecx) | |
andl %esi, %esi | |
jg EFT15 | |
iaddl $0xFFFFFFFF, %eax | |
EFT15: | |
mrmovl 0x38(%ebx), %esi | |
rmmovl %esi, 0x38(%ecx) | |
andl %esi, %esi | |
jg EFT14 | |
iaddl $0xFFFFFFFF, %eax | |
EFT14: | |
mrmovl 0x34(%ebx), %esi | |
rmmovl %esi, 0x34(%ecx) | |
andl %esi, %esi | |
jg EFT13 | |
iaddl $0xFFFFFFFF, %eax | |
EFT13: | |
mrmovl 0x30(%ebx), %esi | |
rmmovl %esi, 0x30(%ecx) | |
andl %esi, %esi | |
jg EFT12 | |
iaddl $0xFFFFFFFF, %eax | |
EFT12: | |
mrmovl 0x2c(%ebx), %esi | |
rmmovl %esi, 0x2c(%ecx) | |
andl %esi, %esi | |
jg EFT11 | |
iaddl $0xFFFFFFFF, %eax | |
EFT11: | |
mrmovl 0x28(%ebx), %esi | |
rmmovl %esi, 0x28(%ecx) | |
andl %esi, %esi | |
jg EFT10 | |
iaddl $0xFFFFFFFF, %eax | |
EFT10: | |
mrmovl 0x24(%ebx), %esi | |
rmmovl %esi, 0x24(%ecx) | |
andl %esi, %esi | |
jg EFT9 | |
iaddl $0xFFFFFFFF, %eax | |
EFT9: | |
mrmovl 0x20(%ebx), %esi | |
rmmovl %esi, 0x20(%ecx) | |
andl %esi, %esi | |
jg EFT8 | |
iaddl $0xFFFFFFFF, %eax | |
EFT8: | |
mrmovl 0x1c(%ebx), %esi | |
rmmovl %esi, 0x1c(%ecx) | |
andl %esi, %esi | |
jg EFT7 | |
iaddl $0xFFFFFFFF, %eax | |
EFT7: | |
mrmovl 0x18(%ebx), %esi | |
rmmovl %esi, 0x18(%ecx) | |
andl %esi, %esi | |
jg EFT6 | |
iaddl $0xFFFFFFFF, %eax | |
EFT6: | |
mrmovl 0x14(%ebx), %esi | |
rmmovl %esi, 0x14(%ecx) | |
andl %esi, %esi | |
jg EFT5 | |
iaddl $0xFFFFFFFF, %eax | |
EFT5: | |
mrmovl 0x10(%ebx), %esi | |
rmmovl %esi, 0x10(%ecx) | |
andl %esi, %esi | |
jg EFT4 | |
iaddl $0xFFFFFFFF, %eax | |
EFT4: | |
mrmovl 0xc(%ebx), %esi | |
rmmovl %esi, 0xc(%ecx) | |
andl %esi, %esi | |
jg EFT3 | |
iaddl $0xFFFFFFFF, %eax | |
EFT3: | |
mrmovl 0x8(%ebx), %esi | |
rmmovl %esi, 0x8(%ecx) | |
andl %esi, %esi | |
jg EFT2 | |
iaddl $0xFFFFFFFF, %eax | |
EFT2: | |
mrmovl 0x4(%ebx), %esi | |
rmmovl %esi, 0x4(%ecx) | |
andl %esi, %esi | |
jg EFT1 | |
iaddl $0xFFFFFFFF, %eax | |
EFT1: | |
mrmovl (%ebx), %esi | |
rmmovl %esi, (%ecx) | |
iaddl $0x40, %ebx | |
iaddl $0x40, %ecx | |
andl %esi, %esi | |
jg FRONT | |
iaddl $0xFFFFFFFF, %eax | |
jmp FRONT | |
LEFT15: | |
mrmovl 0x38(%ebx), %esi | |
rmmovl %esi, 0x38(%ecx) | |
andl %esi, %esi | |
jg LEFT14 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT14: | |
mrmovl 0x34(%ebx), %esi | |
rmmovl %esi, 0x34(%ecx) | |
andl %esi, %esi | |
jg LEFT13 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT13: | |
mrmovl 0x30(%ebx), %esi | |
rmmovl %esi, 0x30(%ecx) | |
andl %esi, %esi | |
jg LEFT12 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT12: | |
mrmovl 0x2c(%ebx), %esi | |
rmmovl %esi, 0x2c(%ecx) | |
andl %esi, %esi | |
jg LEFT11 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT11: | |
mrmovl 0x28(%ebx), %esi | |
rmmovl %esi, 0x28(%ecx) | |
andl %esi, %esi | |
jg LEFT10 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT10: | |
mrmovl 0x24(%ebx), %esi | |
rmmovl %esi, 0x24(%ecx) | |
andl %esi, %esi | |
jg LEFT9 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT9: | |
mrmovl 0x20(%ebx), %esi | |
rmmovl %esi, 0x20(%ecx) | |
andl %esi, %esi | |
jg LEFT8 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT8: | |
mrmovl 0x1c(%ebx), %esi | |
rmmovl %esi, 0x1c(%ecx) | |
andl %esi, %esi | |
jg LEFT7 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT7: | |
mrmovl 0x18(%ebx), %esi | |
rmmovl %esi, 0x18(%ecx) | |
andl %esi, %esi | |
jg LEFT6 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT6: | |
mrmovl 0x14(%ebx), %esi | |
rmmovl %esi, 0x14(%ecx) | |
andl %esi, %esi | |
jg LEFT5 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT5: | |
mrmovl 0x10(%ebx), %esi | |
rmmovl %esi, 0x10(%ecx) | |
andl %esi, %esi | |
jg LEFT4 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT4: | |
mrmovl 0xc(%ebx), %esi | |
rmmovl %esi, 0xc(%ecx) | |
andl %esi, %esi | |
jg LEFT3 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT3: | |
mrmovl 0x8(%ebx), %esi | |
rmmovl %esi, 0x8(%ecx) | |
andl %esi, %esi | |
jg LEFT2 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT2: | |
mrmovl 0x4(%ebx), %esi | |
rmmovl %esi, 0x4(%ecx) | |
andl %esi, %esi | |
jg LEFT1 | |
iaddl $0xFFFFFFFF, %eax | |
LEFT1: | |
mrmovl (%ebx), %esi | |
rmmovl %esi, (%ecx) | |
iaddl $0x40, %ebx | |
iaddl $0x40, %ecx | |
andl %esi, %esi | |
jg Done | |
iaddl $0xFFFFFFFF, %eax | |
Done: | |
# popl %edi # Restore callee-save registers | |
popl %ebx | |
popl %esi | |
#rrmovl %ebp, %esp | |
#popl %ebp | |
ret | |
################################################################## | |
# Keep the following label at the end of your function | |
# | |
.align 4 | |
JTable: | |
.long Done | |
.long LEFT1 | |
.long LEFT2 | |
.long LEFT3 | |
.long LEFT4 | |
.long LEFT5 | |
.long LEFT6 | |
.long LEFT7 | |
.long LEFT8 | |
.long LEFT9 | |
.long LEFT10 | |
.long LEFT11 | |
.long LEFT12 | |
.long LEFT13 | |
.long LEFT14 | |
.long LEFT15 | |
End: | |
#/* $end ncopy-ys */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment