Skip to content

Instantly share code, notes, and snippets.

@MaskRay
Last active December 26, 2020 23:32
Show Gist options
  • Save MaskRay/9fb642b7d4d903ecd2a9dd0a773fdc70 to your computer and use it in GitHub Desktop.
Save MaskRay/9fb642b7d4d903ecd2a9dd0a773fdc70 to your computer and use it in GitHub Desktop.
musl security features
\documentclass{beamer}
\usetheme{Boadilla}
\usecolortheme{rose}
\useoutertheme{tree}
\usefonttheme[onlylarge]{structurebold}
\setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{blocks}[rounded][shadow=true]
\setcounter{tocdepth}{1}
\usepackage{minted}[outputdir=out/]
\usepackage{hyperref}
\newcommand{\myhref}[3][blue]{\href{#2}{\color{#1}{#3}}}
\usepackage[backend=bibtex]{biblatex}
\defbibheading{bibliography}{}
\bibliography{refs.bib}
\title{musl security features}
\author{MaskRay}
\institute{https://maskray.me}
\date{}
\newcommand{\image}[1]{
\begin{frame}
\includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{#1}
\end{frame}
}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\tableofcontents
\end{frame}
\section{Introduction}
\begin{frame}
\begin{block}{musl}
\begin{itemize}
\item A libc, an implementation of the user-space side of standard C/POSIX functions with Linux extensions.
\item General-purpose, not specific to the embedded domain
\item Launched in 2011. Milestone 1.0 released in 2014.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Distributions using musl}
\begin{itemize}
\item \href{https://alpinelinux.org/}{Alpine Linux}, Dragora, sabotage, \href{https://github.com/talos-systems/talos}{Talos}, \href{https://voidlinux.org/}{Void Linux}
\item Shipping musl as an optional package: \href{https://gitweb.gentoo.org/proj/musl.git}{Gentoo Linux}, \href{https://openwrt.org}{OpenWrt}
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Core Principles}
\begin{itemize}
\item Extracted from Rich Felker's talk \textit{Transitioning from uclibc to musl for embedded development}
\item Simplicity as the core approach to size, performance, security, and maintainability
\item Factoring for minimal code duplication
\item Ease of navigating and understanding code
\item Robustness/fail-safety
\item Not depending on fancy compiler/toolchain features
\item First-class status for UTF-8, non-ASCII characters
\end{itemize}
\end{block}
\end{frame}
\section{Miscellaneous}
\begin{frame}
\begin{block}{Miscellaneous}
\begin{itemize}
\item v1.0.0: \texttt{PT\_GNU\_RELRO}: some sections are readonly after relocation resolving. \texttt{mprotect} such sections.
\item v1.1.10: static pie (earlier than \myhref{http://sourceware.org/PR19574}{glibc})
\item v1.1.20: \texttt{explicit\_bzero}: \texttt{bzero} with a compiler barrier
\item v1.1.24: \texttt{secure\_getenv}: \texttt{getenv} or (set-user-ID or set-group-ID) no-op
\end{itemize}
\end{block}
\end{frame}
\section{Stack Smashing Protector}
\begin{frame}
\begin{block}{Stack Smashing Protector}
\begin{itemize}
\item \texttt{-fstack-protector\{-explicit,,-string,-all\}}
\item Add a secret value (canary) after local variables (before the return address) on the stack.
\item On return, check whether the canary stays the same.
\item \texttt{-mstack-protector-guard=} decides whether the secret is stored.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[fragile]{}
\tiny
\begin{minted}{c}
void foo(const char *a, const char *b) {
char buf[16];
strcpy(buf, a); strcat(buf, b); puts(buf);
}
\end{minted}
\begin{center}
\begin{minipage}{0.8\textwidth}
\begin{verbatim}
foo: # @foo
# Save callee-saved registers.
pushq %r14; pushq %rbx
# Allocate local variables and canary.
subq $24, %rsp
# Copy arguments to callee-saved registers.
movq %rsi, %r14; movq %rdi, %rsi
# Set canary.
movq %fs:40, %rax # -mstack-protector-guard=tls
movq __stack_chk_guard(%rip), %rax # -mstack-protector-guard=global
movq %rax, 16(%rsp)
movq %rsp, %rbx
# Main body.
movq %rbx, %rdi; callq strcpy
movq %rbx, %rdi; movq %r14, %rsi; callq strcat
movq %rbx, %rdi; callq puts
# Check canary.
movq %fs:40, %rax # -mstack-protector-guard=tls
movq __stack_chk_guard(%rip), %rax # -mstack-protector-guard=global
cmpq 16(%rsp), %rax; jne .LBB0_2
# Epilogue
addq $24, %rsp; popq %rbx; popq %r14
retq
.LBB0_2:
callq __stack_chk_fail
.Lfunc_end0:
\end{verbatim}
\end{minipage}
\end{center}
\end{frame}
\begin{frame}
\begin{block}{Stack Smashing Protector in libc}
\begin{itemize}
\item libc has to initialize (tls) \texttt{\%fs:40} or (global) \texttt{\_\_stack\_chk\_guard}.
\item In glibc, only one is supported (arch-specific configure-time decision). \myhref{https://sourceware.org/bugzilla/show_bug.cgi?id=26817}{PR glibc/26817} (tls may be less secure due to lack of guard page before static TLS block)
\item musl supports -mstack-protector-guard=tls and -mstack-protector-guard=global at the same time.
\item v1.1.9: Allow libc itself to be built with \texttt{-fstack-protector} (earlier than glibc)
\end{itemize}
\end{block}
\end{frame}
\section{Secure mode}
\begin{frame}
\begin{block}{Secure mode}
\begin{itemize}
\item set-user-ID and set-group-ID (\texttt{chmod u+s} and \texttt{chmod g+s})
\item ld.so: disallow \texttt{LD\_LIBRARY\_PATH}/\texttt{LD\_PRELOAD}, non-absolute \texttt{\$ORIGIN}, obtaining \texttt{\$ORIGIN} from main executable \texttt{/proc/self/exe}
\item date/time functions: disallow \texttt{TZ} file other than /etc/localtime (\texttt{TZ=:/usr/local/etc/localtime})
\item \texttt{catopen}: disallow \texttt{NLSPATH}
\item Ensure fd 0/1/2 are open: a badly written set-UID program may open files (occupying fd 0/1/2) and clobber these files when writing to stdout/stderr.
\end{itemize}
\end{block}
\end{frame}
\section{malloc}
\begin{frame}
\begin{block}{mallocng}
\begin{itemize}
\item v1.2.1: ``Strong hardening against memory usage errors by the caller, including detection of overflows, double-free, and use-after-free, and does not admit corruption of allocator state via these errors.''
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{Corruption of allocator state}
\begin{itemize}
\item glibc (as of 2.31): \texttt{free(): double free detected in tcache 2} \texttt{double free or corruption (fasttop)}
\item musl: crash without diagnostic
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\begin{block}{malloc implementations}
\begin{itemize}
\item dlmalloc (Doug Lea), ptmalloc (pthread malloc), glibc, jemalloc, tcmalloc,
\item dlmalloc family: metadata (fd/bk/prev\_size/size) in a free chunk overlay user data in an in-use chunk and metadata can be forged.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\small
\begin{block}{Characteristics}
\begin{itemize}
\item malloc after free: first-fit effects, similar to glibc (LIFO in tcache/fastbin; FIFO in unsorted bin). However, randomness can be trivially implemented by changing the current least significant bit heuristic. For a request of $n$, if the slot has $n+UNIT*(k-1)$ bytes, \texttt{enframe} can cycle the allocation in $k$ places, i.e. a slot can be reused without handling an identical resource identifier. This property is more useful for detecting double-free bugs than hardening.
\item Main metadata is stored separately. Out-of-bounds write forges the header of the next slot, but it can hardly do harm.
\item double free/invalid free: guaranteed crash due to rigorous verification. In glibc, double free of non-top chunk in fastbin cannot be detected (\texttt{double free or corruption (fasttop)}, other bins have more checks).
\item No \texttt{malloc\_set\_zero\_contents/malloc\_set\_pattern\_fill\_contents} (scudo). Data leak from freed memory is possible.
\item No \texttt{\_\_malloc\_hook} or \texttt{\_\_free\_hook}.
\end{itemize}
\end{block}
\end{frame}
\section{mallocng data structures}
\begin{frame}[fragile]
\tiny
\begin{minted}{c}
// Singleton: ctx
struct malloc_context {
uint64_t secret;
#ifndef PAGESIZE
size_t pagesize;
#endif
// When alloc_meta is invoked for the first time, initialize secret and pagesize.
int init_done;
unsigned mmap_counter;
struct meta *free_meta_head;
// [avail_meta,avail_meta_count) are available meta objs.
struct meta *avail_meta;
// When a new page is allocates, (4096-sizeof(meta_area))/sizeof(meta) meta objs are newly available.
size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift;
struct meta_area *meta_area_head, *meta_area_tail;
// [avail_meta_areas,avail_meta_areas+avail_meta_area_count) are available meta areas.
unsigned char *avail_meta_areas;
// Doubly linked meta list by size class,
struct meta *active[48];
size_t usage_by_class[48];
uint8_t unmap_seq[32], bounces[32];
uint8_t seq;
// [initial brk(0)+pagesize, brk) stores available meta areas.
uintptr_t brk;
};
\end{minted}
\end{frame}
\begin{frame}[fragile]
\tiny
A group contains a header (of minimum metadata) and $1\sim 32$ slots for allocation. Each group is paired with a meta object which references back to the group.
The slot size (\texttt{stride}) is decided by the size class. The number of slots is decided by the size class and its usage.
\begin{minted}{c}
struct meta_area {
uint64_t check; // ctx.secret
struct meta_area *next;
int nslots; // (4096-sizeof(meta_area))/sizeof(meta)
struct meta slots[];
};
struct meta {
struct meta *prev, *next;
struct group *mem;
// Bitmask of unused slots and bitmask of freed slots. They have no intersection.
volatile int avail_mask, freed_mask;
// The index of the last slot, i.e. the number of slots minus 1.
uintptr_t last_idx:5;
uintptr_t freeable:1;
uintptr_t sizeclass:6;
uintptr_t maplen:8*sizeof(uintptr_t)-12;
};
struct group {
struct meta *meta;
unsigned char active_idx:5;
char pad[UNIT - sizeof(struct meta *) - 1];
// N slots. A slot starts at offset (1+stride*slot_index)*UNIT.
unsigned char storage[];
};
\end{minted}
\end{frame}
\begin{frame}[fragile]
\tiny
Each chunk is user data preceded by a 4-byte header ($IB = 4$). For a requested size $n$, the chunk size is $n+4$. The slot size needs to be at least as large as $n+4$.
The canonical placement starts the user data at the slot start (offset: 0) and makes its header overlap the last 4 bytes of the previous slot. (The last 4 bytes of the current slot is reserved by the next chunk). This placement can be used for an unaligned allocation.
A chunk may have some trailing spare bytes, named \texttt{reserved}.
If $reserved >= 5$, use the last 5 bytes to store a zero canary byte and \texttt{reserved}, verified by \texttt{get\_nominal\_size} (called by malloc and free). This can detect out-of-bounds writes.
\begin{minted}{c}
struct chunk {
uint8_t zero;
uint8_t idx; // slot_index | (min(reserved,5)<<5); if nested in a larger chunk, slot_index | 6<<5
uint16_t offset; // (p-g->mem->storage) / UNIT
// If reserved bytes >= 5, end[-5] is 0 and *(uint32_t*)(end-4) stores reserved.
char p[stride-IB];
};
struct chunk32 {
uint32_t offset; // (p-g->mem->storage) / UNIT
uint8_t non_zero;
uint8_t idx; // slot_index | (min(reserved,5)<<5)
uint16_t zero;
// If reserved bytes >= 5, end[-5] is 0 and *(uint32_t*)(end-4) stores reserved.
char p[stride-IB];
};
\end{minted}
\end{frame}
\section{glibc}
\begin{frame}
\begin{block}{Features available in glibc but unavailable in musl}
\begin{itemize}
\item Memory protection keys (\texttt{pkey\_*})
\item AArch64 Branch Target Identification and Pointer Authentication (assembly annotation and ld.so support (\texttt{mprotext PROG\_BTI}))
\item AArch64 Memory Tagging Extension (including heap tagging in its malloc implementation)
\item x86 IBT and SHSTK (assembly annotation and ld.so support)
\item Defense in depth: \texttt{setjmp/longjmp/\_\_cxa\_atexit} address mangling
\end{itemize}
\end{block}
\end{frame}
\section{Refereces}
\begin{frame}
\begin{block}{References}
\begin{itemize}
\item Transitioning From uclibc to musl for Embedded Development, Rich Felker
\item \url{https://dustri.org/b/security-features-of-musl.html}, Julien Voisin
\end{itemize}
\end{block}
\end{frame}
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment