Skip to content

Instantly share code, notes, and snippets.

@5ec1cff
Created April 16, 2024 16:15
Show Gist options
  • Save 5ec1cff/bb723ade25ee53bae579756a827b7b38 to your computer and use it in GitHub Desktop.
Save 5ec1cff/bb723ade25ee53bae579756a827b7b38 to your computer and use it in GitHub Desktop.
Find zip entry by offset
#include <cstdio>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <cstring>
#include <cerrno>
#include <string>
#include <utility>
struct EntryInfo {
std::string name = "";
off_t offset = -1;
size_t size = 0;
};
#define LOGD(fmt, ...) printf(fmt "\n" __VA_OPT__(,) __VA_ARGS__)
#define LOGE LOGD
#define PLOGE(fmt, ...) printf(fmt " failed with %d %s", __VA_ARGS__ __VA_OPT__(,) errno, strerror(errno))
class UniqueFd {
using Fd = int;
public:
UniqueFd() = default;
UniqueFd(Fd fd) : fd_(fd) {}
~UniqueFd() { if (fd_ >= 0) close(fd_); }
// Disallow copy
UniqueFd(const UniqueFd &) = delete;
UniqueFd &operator=(const UniqueFd &) = delete;
// Allow move
UniqueFd(UniqueFd &&other) { std::swap(fd_, other.fd_); }
UniqueFd &operator=(UniqueFd &&other) {
std::swap(fd_, other.fd_);
return *this;
}
// Implict cast to Fd
operator const Fd &() const { return fd_; }
private:
Fd fd_ = -1;
};
struct [[gnu::packed]] LFH {
uint32_t signature;
uint8_t unused1[22];
uint16_t file_name_len;
uint16_t extra_field_len;
uint32_t total_size() const {
return sizeof(LFH) + file_name_len + extra_field_len;
}
bool valid() const {
return signature == 0x04034b50;
}
};
struct [[gnu::packed]] CDFH {
uint32_t signature;
uint8_t unused1[16];
uint32_t comp_size;
uint32_t uncomp_size;
uint16_t file_name_len;
uint16_t extra_field_len;
uint16_t file_comment_len;
uint8_t unused2[8];
uint32_t offset_of_local_header;
uint32_t total_size() const {
return sizeof(CDFH) + file_name_len + extra_field_len + file_comment_len;
}
std::string file_name() const {
auto s = std::string(file_name_len + 1, '\0');
memcpy(s.data(), reinterpret_cast<const char *>(this) + sizeof(CDFH), file_name_len);
LOGD("file name len %d s=%s", file_name_len, s.c_str());
return s;
}
bool valid() const {
return signature == 0x02014b50;
}
};
EntryInfo findEntryByOffset(const char *path, off_t offset) {
UniqueFd fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd == -1) {
PLOGE("failed to open");
return {};
}
auto end = lseek(fd, -2, SEEK_END);
if (end == -1) {
PLOGE("seek");
return {};
}
CDFH *cdfhs = nullptr;
uint8_t *real_addr = nullptr;
size_t real_size;
struct [[gnu::packed]] {
uint8_t unused[6];
uint16_t cdfhs_count;
uint32_t cdfhs_size;
uint32_t cdfhs_off;
} cdfh_info;
for (uint16_t i = 0; i <= 65535 && (end - i) >= 22; i++) {
if (lseek(fd, end - i, SEEK_SET) == -1) {
PLOGE("lseek end-i");
return {};
}
uint16_t size;
if (read(fd, &size, sizeof(size)) == -1) {
PLOGE("failed to read");
return {};
}
if (size == i) {
uint32_t signature;
if (lseek(fd, -22, SEEK_CUR) == -1) {
PLOGE("lseek -22");
return {};
}
if (read(fd, &signature, sizeof(signature)) == -1) {
PLOGE("read sig");
return {};
}
if (signature == 0x06054b50) {
LOGD("found EOCD at offset -%d", i);
if (read(fd, &cdfh_info, sizeof(cdfh_info)) == -1) {
PLOGE("read cdfh info");
return {};
}
auto page_size = sysconf(_SC_PAGE_SIZE);
auto real_off = (cdfh_info.cdfhs_off / page_size) * page_size;
auto bias = cdfh_info.cdfhs_off - real_off;
real_size = cdfh_info.cdfhs_size + bias;
LOGD("cdfh count=%u size=%u off=%u real_off=%lu real_size=%zu",
cdfh_info.cdfhs_count, cdfh_info.cdfhs_size, cdfh_info.cdfhs_off, real_off,
real_size);
real_addr = reinterpret_cast<uint8_t *>(mmap(nullptr, real_size, PROT_READ,
MAP_PRIVATE, fd, real_off));
if (real_addr == reinterpret_cast<uint8_t *>(-1)) {
PLOGE("mmap");
return {};
}
cdfhs = reinterpret_cast<CDFH *>(real_addr + bias);
LOGD("mmap addr %p, cdfh addr %p", real_addr, cdfhs);
break;
}
}
}
if (cdfhs == nullptr) {
LOGE("cdfhs not found");
return {};
}
auto current = cdfhs;
LFH lfh;
std::string file_name;
off_t file_off = -1;
size_t file_size = 0;
for (auto i = 0; i < cdfh_info.cdfhs_count && current < cdfhs + cdfh_info.cdfhs_size; i++) {
if (!current->valid()) {
LOGE("cdfh %i at %p is invalid (signature=%d)!", i, current, current->signature);
// pause();
break;
}
if (pread(fd, &lfh, sizeof(lfh), current->offset_of_local_header) == -1) {
PLOGE("read lfh");
break;
}
if (!lfh.valid()) {
LOGE("lfh %i at %u is invalid!", i, current->offset_of_local_header);
break;
}
auto data_off = current->offset_of_local_header + lfh.total_size();
if (offset >= data_off && offset < data_off + current->comp_size) {
file_name = current->file_name();
file_off = data_off;
file_size = current->uncomp_size;
LOGD("found at entry %i name %s offset=%lu size=%zu comp_size=%zu", i,
file_name.c_str(), file_off, current->comp_size, file_size);
break;
}
current = reinterpret_cast<CDFH *>(reinterpret_cast<uint8_t *>(current) +
current->total_size());
}
if (file_off == -1) {
LOGE("no file found at file offset %ld", offset);
}
if (munmap(real_addr, real_size) == -1) {
PLOGE("munmap");
}
LOGD("name=%s off=%ld size=%zu", file_name.c_str(), file_off, file_size);
return {file_name, file_off, file_size};
}
int main(int argc, char **argv) {
if (argc < 3) {
printf("usage: <file> <off>\n");
return 1;
}
auto file = argv[1];
auto off = strtol(argv[2], nullptr, 0);
findEntryByOffset(file, off);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment