Skip to content

Instantly share code, notes, and snippets.

@dcondrey
Created March 23, 2024 23:57
Show Gist options
  • Save dcondrey/7fb4ee9d84cd41c7a745de5949ed4c1b to your computer and use it in GitHub Desktop.
Save dcondrey/7fb4ee9d84cd41c7a745de5949ed4c1b to your computer and use it in GitHub Desktop.
Identify repeating patterns in .dat file
import struct
import re
from collections import Counter
class DatPatterns:
def __init__(self, filepath):
self.filepath = filepath
self.content = None
def read_file(self):
with open(self.filepath, 'rb') as file:
self.content = file.read()
def find_repeating_patterns(self, pattern_size=4):
pattern_counts = Counter()
for i in range(len(self.content) - pattern_size):
pattern = self.content[i:i + pattern_size]
if re.match(b'\x00+', pattern):
continue
pattern_counts[pattern] += 1
# Only consider patterns that repeat more than a threshold to indicate potential structure
return {pattern: count for pattern, count in pattern_counts.items() if count > 3}
def analyze_patterns(self, patterns):
for pattern, count in patterns.items():
print(f"\nAnalyzing Pattern {pattern.hex()} (occurrences: {count}):")
pattern_pos = self.content.find(pattern)
while pattern_pos != -1:
# Example analysis: Print 10 bytes before and after the pattern
context_before = self.content[max(0, pattern_pos-10):pattern_pos]
context_after = self.content[pattern_pos+len(pattern):pattern_pos+len(pattern)+10]
print(f"At offset {pattern_pos}: {context_before.hex()} [{pattern.hex()}] {context_after.hex()}")
pattern_pos = self.content.find(pattern, pattern_pos + 1)
def run(self):
self.read_file()
patterns = self.find_repeating_patterns()
self.analyze_patterns(patterns)
if __name__ == "__main__":
decoder = DatPatterns("/System/Library/PrivateFrameworks/CoreEmoji.framework/Versions/A/Resources/emoji.dat")
decoder.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment