Skip to content

Instantly share code, notes, and snippets.

@dnaroma
Last active February 22, 2024 05:32
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dnaroma/1bfc901d95f777a340fcb615d6a96bd3 to your computer and use it in GitHub Desktop.
Save dnaroma/1bfc901d95f777a340fcb615d6a96bd3 to your computer and use it in GitHub Desktop.
Gen Proto BanG Dream

This is the code rewritten in Python 3 from the original code of esterTion's PHP version. Only works with blobs from Android apk!!!

genProto.py for old versions, genProto.py for new versions with arm64 blobs.

from struct import unpack
import re
import sys
dumpcs = open('dump.cs').read()
prog = open('libil2cpp.so', 'rb')
definedClass = []
targetClass = sys.argv[1] #'SuiteMasterGetResponse' # change to get different classes
outputPath = './{}.proto'.format(targetClass)
outputFile = open(outputPath, 'w')
# write first line
outputFile.write('syntax = "proto2";\n')
typeMap = {
'uint': 'uint32',
'string': 'string',
'ulong': 'uint64',
'float': 'float',
'int': 'int32',
'double': 'double',
'bool': 'bool',
'long': 'int64'
}
def getTag(address):
offset = address & 0xFFFFFFFF
prog.seek(offset)
inst = prog.read(4)
inst = int.from_bytes(inst, byteorder='little', signed=False)
if inst == 0xe5900004: #0x080440f9:
prog.seek(offset + 4)
retnum = int.from_bytes(prog.read(2), 'little', signed=False)
rotate_flag = int.from_bytes(prog.read(1), 'little', signed=False)
if rotate_flag == 0xA0:
# rotate tag number
rotate_num = (retnum >> 8) & 0xF
tag = retnum & 0xFF
for i in range(rotate_num * 2):
tag = rotr(tag, 32)
return tag
return retnum & 0xfff
elif inst == 0xe92d4c10:
prog.seek(offset + 12)
return int.from_bytes(prog.read(2), 'little', signed=False) & 0xfff
else:
print(hex(inst), hex(address))
def rotr(num, bits):
num &= (2**bits-1)
bit = num & 1
num >>= 1
if(bit):
num |= (1 << (bits-1))
return num
def writeMessage(target, message):
outputFile.write('message {} {{\n'.format(target))
for item, info in message.items():
typ = info[0]
if type(info[1]).__name__ == 'str':
tag = getTag(int(info[1], 16))
else:
tag = info[1]
hint = info[2]
comment = info[3]
if hint == 'map':
outputFile.write(' {}{} {} = {};\n'.format(hint, typ, item, tag))
else:
outputFile.write(' {} {} {} = {};\n'.format(hint, typ, item, tag))
outputFile.write('}\n')
def readClass(level, target):
try:
definedClass.index(target)
shownClass = True
except ValueError:
definedClass.append(target)
shownClass = False
message = {}
classDef = re.search('\[ProtoContractAttribute\].*?\n.*?class ' + target + ' [^\{\}]*?\{((.*\n)*?)?\s+(//\s+Properties(.*\n)*?)?\s+(//\s+Methods(.*\n)*?)?\}\s*?', dumpcs)
if not classDef:
print('{} not found'.format(target))
else:
propList = re.findall('(\[ProtoMemberAttribute\] //.*Offset: 0x([0-9A-F]+)\n \w+ ([^\ \<]+(\<(.*?)\>)?) ([^\ ;]+))', classDef[0])
for prop in propList:
typ = jumpTyp(level, prop[2], prop[5])
message[typ[0]] = [typ[1], prop[1], typ[2], typ[3]]
if not shownClass:
# print('{} \n'.format(target))
writeMessage(target, message)
def jumpTyp(level, typ, name):
if typ[-2:] == '[]':
sub = jumpTyp(level + 2, typ[0:-2], 'entry')
return [name, sub[1], 'repeated', 'array']
elif typ[0:11] == 'Dictionary`':
subType = re.search('<(\w+), (\w+)>', typ)
readClass(level + 1, subType[2])
# prefix = '{}_{}'.format(subType[1], subType[2])
# try:
# definedClass.index(prefix)
# shownClass = True
# except ValueError:
# definedClass.append(prefix)
# shownClass = False
# message = {}
# sub = jumpTyp(level + 1, subType[1], '{}_key'.format(prefix))
# message[sub[0]] = [sub[1], 1, sub[2], sub[3]]
# sub = jumpTyp(level + 1, subType[2], '{}_value'.format(prefix))
# message[sub[0]] = [sub[1], 2, sub[2], sub[3]]
# if not shownClass:
# writeMessage(prefix, message)
return [name, '<{}, {}>'.format(typeMap.get(subType[1], subType[1]), typeMap.get(subType[2], subType[2])), 'map', 'dictionary']
elif typ[0:5] == 'List`':
subType = re.search('<(\w+)>', typ)
sub = jumpTyp(level + 1, subType[1], 'entry')
return [name, sub[1], 'repeated', 'list']
elif typ[0:9] == 'Nullable`':
subType = re.search('<(\w+)>', typ)
sub = jumpTyp(level, subType[1], name)
sub[3] = 'nullable'
return sub
else:
expectTyp = ['uint','string','ulong','float','int','double', 'bool','long']
try:
expectTyp.index(typ)
isType = True
except ValueError:
expectTyp.append(typ)
isType = False
if isType:
return [name, typeMap[typ], 'optional', 'normal type']
else:
readClass(level + 1, typ)
return [name, typ, 'optional', 'sub class']
readClass(0, targetClass)
from struct import unpack
import re
import sys
dumpcs = open('dump.cs', encoding="utf8").read()
prog = open('libil2cpp.so', 'rb')
definedClass = []
targetClass = sys.argv[1] #'SuiteMasterGetResponse' # change to get different classes
outputPath = './{}.proto'.format(targetClass)
outputFile = open(outputPath, 'w')
# write first line
outputFile.write('syntax = "proto2";\n')
# outputFile.write('package bang;\n')
typeMap = {
'uint': 'uint32',
'string': 'string',
'ulong': 'uint32',
'float': 'float',
'int': 'int32',
'double': 'double',
'bool': 'bool',
'long': 'int32'
}
def getTag(address):
offset = address & 0xFFFFFFFF
prog.seek(offset)
inst = prog.read(4)
inst = int.from_bytes(inst, byteorder='little', signed=False)
if inst == 0xf9400408:
prog.seek(offset + 4)
inst = int.from_bytes(prog.read(4), 'little', signed=False)
elif inst == 0xf81e0ff3:
prog.seek(offset + 16)
inst = int.from_bytes(prog.read(4), 'little', signed=False)
else:
print(hex(inst), hex(address))
return None
if inst >> 24 == 0x52:
return (inst >> 5) & 0xFFFF
elif inst >> 24 == 0x32:
retnum = (inst >> 8) & 0xFFFF
immr = (retnum >> 8) & 0x3F
imms = (retnum >> 2) & 0x3F
clz = lambda x: "{:032b}".format(x).index("1")
_len = 31 - clz((0 << 6) | (~imms & 0x3F))
_size = 1 << _len
R = immr & (_size - 1)
S = imms & (_size - 1)
ret = (1 << (S+1)) - 1
for i in range(immr):
ret = rotr(ret, 32)
return ret
def rotr(num, bits):
num &= (2**bits-1)
bit = num & 1
num >>= 1
if(bit):
num |= (1 << (bits-1))
return num
def writeMessage(target, message):
outputFile.write('message {} {{\n'.format(target))
for item, info in message.items():
typ = info[0]
if type(info[1]).__name__ == 'str':
tag = getTag(int(info[1], 16))
else:
tag = info[1]
hint = info[2]
comment = info[3]
if hint == 'map':
outputFile.write(' {}{} {} = {};\n'.format(hint, typ, item, tag))
else:
outputFile.write(' {} {} {} = {};\n'.format(hint, typ, item, tag))
outputFile.write('}\n')
def readClass(level, target):
try:
definedClass.index(target)
shownClass = True
except ValueError:
definedClass.append(target)
shownClass = False
message = {}
classDef = re.search('\[ProtoContractAttribute\].*?\n.*?class ' + target + ' [^\{\}]*?\{((.*\n)*?)?\s+(//\s+Properties(.*\n)*?)?\s+(//\s+Methods(.*\n)*?)?\}\s*?', dumpcs)
if not classDef:
print('{} not found'.format(target))
else:
propList = re.findall('(\[ProtoMemberAttribute\] //.*Offset: 0x([0-9A-F]+).+?\n \w+ ([^\ \<]+(\<(.*?)\>)?) ([^\ ;]+))', classDef[0])
for prop in propList:
typ = jumpTyp(level, prop[2], prop[5])
message[typ[0]] = [typ[1], prop[1], typ[2], typ[3]]
if not shownClass:
# print('{} \n'.format(target))
writeMessage(target, message)
def jumpTyp(level, typ, name):
if typ[-2:] == '[]':
sub = jumpTyp(level + 2, typ[0:-2], 'entry')
return [name, sub[1], 'repeated', 'array']
elif typ[0:10] == 'Dictionary':
subType = re.search('<(\w+), (\w+)>', typ)
readClass(level + 1, subType[2])
return [name, '<{}, {}>'.format(typeMap.get(subType[1], subType[1]), typeMap.get(subType[2], subType[2])), 'map', 'dictionary']
elif typ[0:4] == 'List':
subType = re.search('<(\w+)>', typ)
sub = jumpTyp(level + 1, subType[1], 'entry')
return [name, sub[1], 'repeated', 'list']
elif typ[0:8] == 'Nullable':
subType = re.search('<(\w+)>', typ)
sub = jumpTyp(level, subType[1], name)
sub[3] = 'nullable'
return sub
else:
expectTyp = ['uint','string','ulong','float','int','double', 'bool','long']
try:
expectTyp.index(typ)
isType = True
except ValueError:
expectTyp.append(typ)
isType = False
if isType:
return [name, typeMap[typ], 'optional', 'normal type']
else:
readClass(level + 1, typ)
return [name, typ, 'optional', 'sub class']
readClass(0, targetClass)
@esterTion
Copy link

esterTion commented Sep 18, 2018

Great analyze & rewrite! Just found the article from googling.
And yes my code is dealing with ARM64 MachO binary, for android arm elf binary, I just dropped an so into ida.
04 00 90 E5 function looks as

.text:01BD5348 sub_1BD5348
.text:01BD5348                 LDR             R0, [R0,#4]
.text:01BD534C                 MOV             R1, #1                        ; tag id
.text:01BD5350                 MOV             R2, #0
.text:01BD5354                 LDR             R0, [R0]
.text:01BD5358                 B               ProtoMemberAttribute$$.ctor
.text:01BD5358 ; End of function sub_1BD5348

compared to ios arm64

__text:0000000101320B5C sub_101320B5C                           ; DATA XREF: __const:000000010256B948↓o
__text:0000000101320B5C LDR             X8, [X0,#8]
__text:0000000101320B60 LDR             X0, [X8]
__text:0000000101320B64 MOV             W1, #1                    ; tag id
__text:0000000101320B68 MOV             X2, #0
__text:0000000101320B6C B               ProtoMemberAttribute$$.ctor
__text:0000000101320B6C ; End of function sub_101320B5C

And 10 4C 2D E9 function looks as

.text:01BAD68C sub_1BAD68C
.text:01BAD68C                 STMFD           SP!, {R4,R10,R11,LR}
.text:01BAD690                 ADD             R11, SP, #8
.text:01BAD694                 LDR             R0, [R0,#4]
.text:01BAD698                 MOV             R1, #4              ; tag id
.text:01BAD69C                 MOV             R2, #0
.text:01BAD6A0                 LDR             R4, [R0]
.text:01BAD6A4                 MOV             R0, R4
.text:01BAD6A8                 BL              ProtoMemberAttribute$$.ctor
.text:01BAD6AC                 MOV             R0, R4
.text:01BAD6B0                 MOV             R1, #1
.text:01BAD6B4                 MOV             R2, #0
.text:01BAD6B8                 LDMFD           SP!, {R4,R10,R11,LR}
.text:01BAD6BC                 B               ProtoMemberAttribute$$set_IsRequired
.text:01BAD6BC ; End of function sub_1BAD68C

compared to ios arm64

__text:00000001012DC8B8 sub_1012DC8B8                           ; DATA XREF: __const:0000000102557B50↓o
__text:00000001012DC8B8                 STP             X20, X19, [SP,#-0x10+var_10]!
__text:00000001012DC8BC                 STP             X29, X30, [SP,#0x10+var_s0]
__text:00000001012DC8C0                 ADD             X29, SP, #0x10
__text:00000001012DC8C4                 LDR             X8, [X0,#8]
__text:00000001012DC8C8                 LDR             X19, [X8]
__text:00000001012DC8CC                 MOV             W1, #3               ; tag id
__text:00000001012DC8D0                 MOV             X0, X19
__text:00000001012DC8D4                 MOV             X2, #0
__text:00000001012DC8D8                 BL              ProtoMemberAttribute$$.ctor
__text:00000001012DC8DC                 MOV             W1, #1
__text:00000001012DC8E0                 MOV             X0, X19
__text:00000001012DC8E4                 MOV             X2, #0
__text:00000001012DC8E8                 LDP             X29, X30, [SP,#0x10+var_s0]
__text:00000001012DC8EC                 LDP             X20, X19, [SP+0x10+var_10],#0x20
__text:00000001012DC8F0                 B               ProtoMemberAttribute$$set_IsRequired
__text:00000001012DC8F0 ; End of function sub_1012DC8B8

Also, about that 300(0x12c) encoding problem, it's because how arm encode immediate values.
Found in this article, those 12 bits are not just straight up immediate values, but encoded with 4 bit rotation and 8 bit value.

For this 300 encoding case, which last 12 bits are 1111 0100 1011, which means 00000000 00000000 00000000 01001011 rotate to the right 0xf * 2 times, or to the left 2 times, and becomes 00000000 00000000 00000001 00101100, and that is exactly 0x12c(300).

(I actually found this article when I was searching for arm64 immediate value, but this doesn't help me as it's complete different encoding system from armv7, so I managed to found this)


And another thing, I didn't know about the map syntax in proto, that's pretty new to me, otherwise I wouldn't split dict like that. But changing it seems like would make my current code a lot messier (which is already a mess), so I decided not to bother with it :P.

@dnaroma
Copy link
Author

dnaroma commented Oct 11, 2018

@esterTion I'm glad to hear from you!

I didn't do much analyse of the arm code, just wondering how can I made a quick adaption. Thanks for your reply, now I know why the tag id is there and how to get them from the rotated value.

For the map syntax, it is a new syntax for proto2. It can make the decoded file somehow cleaner. Your code is amazing, don't bother to change it :-P

@david8557
Copy link

david8557 commented Apr 25, 2021

@dnaroma I use latest Il2cppDumper 6.6.2 and your python script not work anymore. Can you please check it? Here is dump.cs and libil2cpp file https://www57.zippyshare.com/v/wA9CXUS0/file.html
I think problem is in
propList = re.findall('(\[ProtoMemberAttribute\] //.*Offset: 0x([0-9A-F]+)\n \w+ ([^\ \<]+(\<(.*?)\>)?) ([^\ ;]+))', classDef[0])

Of if you can tell me which version Il2cppDumper you are using, it would help too. Thank you!

@dnaroma
Copy link
Author

dnaroma commented Dec 2, 2021

Hi @david8557 I can't remember the version number but from the revision history I can confirm it works with versions from late 2019 or early 2020. Make sure you have enabled everything in config.json.

@david8557
Copy link

Hi @dnaroma , thank you for your reply, I will try it out. Anyway, can you please show me how to edit script to make it work with google protobuf when you free too? Since it seem more popular than protobuf-net. Thanks a lot!

@dnaroma
Copy link
Author

dnaroma commented Dec 2, 2021

You can read the output dump.cs to know how the structure was defined and use disassembler to know how to get tag id from binary.

@david8557
Copy link

david8557 commented Dec 2, 2021

I have example code like this. When write manually, proto would be. But I don't know the regex to find and extract field from this. Can you please help me?
proto

message ResGetPlayerInfos{
 UserPlayerInfo userPlayerInfos_=1;
 bool isLinkUUID_=2;
 repeated int32 chargeGoldList_=3;
}

This is from dump.cs

public sealed class ResGetPlayerInfos : IMessage<ResGetPlayerInfos>, IMessage, IEquatable<ResGetPlayerInfos>, IDeepCloneable<ResGetPlayerInfos> // TypeDefIndex: 10759
{
	// Fields
	private static readonly MessageParser<ResGetPlayerInfos> _parser; // 0x0
	private UnknownFieldSet _unknownFields; // 0x10
	public const int UserPlayerInfosFieldNumber = 1;
	private static readonly FieldCodec<UserPlayerInfo> _repeated_userPlayerInfos_codec; // 0x8
	private readonly RepeatedField<UserPlayerInfo> userPlayerInfos_; // 0x18
	public const int IsLinkUUIDFieldNumber = 2;
	private bool isLinkUUID_; // 0x20
	public const int ChargeGoldListFieldNumber = 3;
	private static readonly FieldCodec<int> _repeated_chargeGoldList_codec; // 0x10
	private readonly RepeatedField<int> chargeGoldList_; // 0x28

@dnaroma
Copy link
Author

dnaroma commented Dec 2, 2021

You can learn and test regex with regex101. It won't be much different with protobuf-net ones.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment