Skip to content

Instantly share code, notes, and snippets.

@kindlychung
Created September 16, 2016 14:11
Show Gist options
  • Save kindlychung/ae0bcf1af0d03b04a956bc431038a0da to your computer and use it in GitHub Desktop.
Save kindlychung/ae0bcf1af0d03b04a956bc431038a0da to your computer and use it in GitHub Desktop.
//! A simple parser for a tiny subset of CSS.
//!
//! To support more CSS syntax, it would probably be easiest to replace this
//! hand-rolled parser with one based on a library or parser generator.
use std::ascii::AsciiExt; // for `to_ascii_lowercase`
// Data structures:
#[derive(Debug)]
pub struct Stylesheet {
pub rules: Vec<Rule>,
}
#[derive(Debug)]
pub struct Rule {
pub selectors: Vec<Selector>,
pub declarations: Vec<Declaration>,
}
#[derive(Debug)]
pub enum Selector {
Simple(SimpleSelector),
}
#[derive(Debug)]
pub struct SimpleSelector {
pub tag_name: Option<String>,
pub id: Option<String>,
pub class: Vec<String>,
}
#[derive(Debug)]
pub struct Declaration {
pub name: String,
pub value: Value,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Value {
Keyword(String),
Length(f32, Unit),
ColorValue(Color),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Unit {
Px,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Color {
pub r: u8,
pub g: u8,
pub b: u8,
pub a: u8,
}
impl Copy for Color {}
pub type Specificity = (usize, usize, usize);
impl Selector {
pub fn specificity(&self) -> Specificity {
// http://www.w3.org/TR/selectors/#specificity
let tmp = *self;
let Selector::Simple(ref simple) = tmp;
let a = simple.id.iter().count();
let b = simple.class.len();
let c = simple.tag_name.iter().count();
(a, b, c)
}
}
impl Value {
/// Return the size of a length in px, or zero for non-lengths.
pub fn to_px(&self) -> f32 {
match *self {
Value::Length(f, Unit::Px) => f,
_ => 0.0,
}
}
}
/// Parse a whole CSS stylesheet.
pub fn parse(source: String) -> Stylesheet {
let mut parser = Parser {
pos: 0,
input: source,
};
Stylesheet { rules: parser.parse_rules() }
}
struct Parser {
pos: usize,
input: String,
}
impl Parser {
/// Parse a list of rule sets, separated by optional whitespace.
fn parse_rules(&mut self) -> Vec<Rule> {
let mut rules = Vec::new();
loop {
self.consume_whitespace();
if self.eof() {
break;
}
rules.push(self.parse_rule());
}
return rules;
}
/// Parse a rule set: `<selectors> { <declarations> }`.
fn parse_rule(&mut self) -> Rule {
Rule {
selectors: self.parse_selectors(),
declarations: self.parse_declarations(),
}
}
/// Parse a comma-separated list of selectors.
fn parse_selectors(&mut self) -> Vec<Selector> {
let mut selectors = Vec::new();
loop {
selectors.push(Selector::Simple(self.parse_simple_selector()));
self.consume_whitespace();
match self.next_char() {
',' => {
self.consume_char();
self.consume_whitespace();
}
'{' => break,
c => panic!("Unexpected character {} in selector list", c),
}
}
// Return selectors with highest specificity first, for use in matching.
selectors.sort_by(|a, b| b.specificity().cmp(&a.specificity()));
return selectors;
}
/// Parse one simple selector, e.g.: `type#id.class1.class2.class3`
fn parse_simple_selector(&mut self) -> SimpleSelector {
let mut selector = SimpleSelector {
tag_name: None,
id: None,
class: Vec::new(),
};
while !self.eof() {
match self.next_char() {
'#' => {
self.consume_char();
selector.id = Some(self.parse_identifier());
}
'.' => {
self.consume_char();
selector.class.push(self.parse_identifier());
}
'*' => {
// universal selector
self.consume_char();
}
c if valid_identifier_char(c) => {
selector.tag_name = Some(self.parse_identifier());
}
_ => break,
}
}
return selector;
}
/// Parse a list of declarations enclosed in `{ ... }`.
fn parse_declarations(&mut self) -> Vec<Declaration> {
assert!(self.consume_char() == '{');
let mut declarations = Vec::new();
loop {
self.consume_whitespace();
if self.next_char() == '}' {
self.consume_char();
break;
}
declarations.push(self.parse_declaration());
}
return declarations;
}
/// Parse one `<property>: <value>;` declaration.
fn parse_declaration(&mut self) -> Declaration {
let property_name = self.parse_identifier();
self.consume_whitespace();
assert!(self.consume_char() == ':');
self.consume_whitespace();
let value = self.parse_value();
self.consume_whitespace();
assert!(self.consume_char() == ';');
Declaration {
name: property_name,
value: value,
}
}
// Methods for parsing values:
fn parse_value(&mut self) -> Value {
match self.next_char() {
'0'...'9' => self.parse_length(),
'#' => self.parse_color(),
_ => Value::Keyword(self.parse_identifier()),
}
}
fn parse_length(&mut self) -> Value {
Value::Length(self.parse_float(), self.parse_unit())
}
fn parse_float(&mut self) -> f32 {
let s = self.consume_while(|c| match c {
'0'...'9' | '.' => true,
_ => false,
});
s.parse().unwrap()
}
fn parse_unit(&mut self) -> Unit {
match &*self.parse_identifier().to_ascii_lowercase() {
"px" => Unit::Px,
_ => panic!("unrecognized unit"),
}
}
fn parse_color(&mut self) -> Value {
assert!(self.consume_char() == '#');
Value::ColorValue(Color {
r: self.parse_hex_pair(),
g: self.parse_hex_pair(),
b: self.parse_hex_pair(),
a: 255,
})
}
/// Parse two hexadecimal digits.
fn parse_hex_pair(&mut self) -> u8 {
let s = &self.input[self.pos..self.pos + 2];
self.pos = self.pos + 2;
u8::from_str_radix(s, 16).unwrap()
}
/// Parse a property name or keyword.
fn parse_identifier(&mut self) -> String {
self.consume_while(valid_identifier_char)
}
/// Consume and discard zero or more whitespace characters.
fn consume_whitespace(&mut self) {
self.consume_while(char::is_whitespace);
}
/// Consume characters until `test` returns false.
fn consume_while<F>(&mut self, test: F) -> String
where F: Fn(char) -> bool
{
let mut result = String::new();
while !self.eof() && test(self.next_char()) {
result.push(self.consume_char());
}
return result;
}
/// Return the current character, and advance self.pos to the next character.
fn consume_char(&mut self) -> char {
let mut iter = self.input[self.pos..].char_indices();
let (_, cur_char) = iter.next().unwrap();
let (next_pos, _) = iter.next().unwrap_or((1, ' '));
self.pos += next_pos;
return cur_char;
}
/// Read the current character without consuming it.
fn next_char(&self) -> char {
self.input[self.pos..].chars().next().unwrap()
}
/// Return true if all input is consumed.
fn eof(&self) -> bool {
self.pos >= self.input.len()
}
}
fn valid_identifier_char(c: char) -> bool {
match c {
'a'...'z' | 'A'...'Z' | '0'...'9' | '-' | '_' => true, // TODO: Include U+00A0 and higher.
_ => false,
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment