Skip to content

Instantly share code, notes, and snippets.

@olebedev
Forked from bouk/parse-yarn-lock.nix
Created May 22, 2023 05:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save olebedev/63319bc7831a596ec4b39b93c4bd6a5d to your computer and use it in GitHub Desktop.
Save olebedev/63319bc7831a596ec4b39b93c4bd6a5d to your computer and use it in GitHub Desktop.
Parse a v1 yarn.lock into a nix expression. Try it with `nix eval -f parse-yarn-lock.nix`
# Parse a yarn.lock file using pure Nix
# yarn.lock v1 files are basically YAML with support for having multiple keys for a single value in a map and without array support.
# Inspired by https://github.com/yarnpkg/yarn/blob/158d96dce95313d9a00218302631cd263877d164/src/lockfile/parse.js
with builtins;
let
# Add index to a list of elements
enumerate = list: genList (i: ({ inherit i; e = elemAt list i; })) (length list);
mkToken = type: value: { inherit type value; };
parseLockfile = str: let
# A Regex that tokenizes a yarn lockfile
# I've split up the regex in the various token types
newlineRe = "(\r?\n)";
commentRe = "#([^\n]+)";
# Used for any kind of whitespace and also indentation in an object
indentRe = "( +)";
# Note that this contains a group for repetition, so the next group is offset.
# This is a regex that matches JSON strings, which is the format used.
stringRe = "(\"([^\"\\\\]|\\\\[\\\"\\\\/bfnrt]|\\\\u[0-9a-f]{4})+\")";
numberRe = "([0-9]+)";
booleanRe = "(true|false)";
colonRe = "(:)";
commaRe = "(,)";
# A symbol is a string without quotes
symbolRe = "([a-zA-Z\\/.-][^: \n\r,]+)";
tokenizeRe = "${newlineRe}|${commentRe}|${indentRe}|${stringRe}|${numberRe}|${booleanRe}|${colonRe}|${commaRe}|${symbolRe}";
tokenize = split tokenizeRe;
convert = token: if isString token then abort "Invalid token ${token}"
else if (elemAt token 0) != null then
mkToken "newline" null
else if (elemAt token 1) != null then
mkToken "comment" (elemAt token 1)
else if (elemAt token 2) != null then
mkToken "indent" (stringLength (elemAt token 2))
else if (elemAt token 3) != null then
mkToken "string" (fromJSON (elemAt token 3))
else if (elemAt token 5) != null then
mkToken "number" (fromJSON (elemAt token 5))
else if (elemAt token 6) != null then
mkToken "boolean" (elemAt token 6) == "true"
else if (elemAt token 7) != null then
mkToken "colon" null
else if (elemAt token 8) != null then
mkToken "comma" null
else if (elemAt token 9) != null then
mkToken "string" (elemAt token 9)
else abort "unreachable";
unprocessedTokens = map convert (filter (e: e != "") (tokenize str));
# Filter out comments, and spaces that don't follow a newline
tokens = map ({ i, e }: e) (filter ({ i, e }:
if e.type == "comment" then
# Check if this is the right version lockfile
if (match "[[:space:]]*yarn lockfile v[0-9]+[[:space:]]*" e.value) != null && (match "[[:space:]]*yarn lockfile v1[[:space:]]*" e.value) == null
then abort "Unsupported lockfile: ${e.value}"
else false
else
!(e.type == "indent" && (elemAt unprocessedTokens (i - 1)).type != "newline")) (enumerate unprocessedTokens));
get = index: if index < length tokens then elemAt tokens index else { type = "eof"; };
# Take one or more keys interspersed with commas
takeKeys = index: [(get index).value] ++ (if (get (index + 1)).type == "comma" && (get (index + 2)).type == "string" then takeKeys (index + 2) else []);
# Consume tokens for a single object
# Returns 'value' for the object and 'index' for how far we iterated
parse = start: indent:
let
# genericClosure is used here to iterate over the tokens in a non-recursive way,
# which would be too slow for the Nix language.
# We can't use fold because we need to recurse into nested maps and skip over
# the tokens that were consumed.
result = genericClosure {
startSet = [ { key = start; values = []; } ];
operator = { key, ... }:
let
token = get key;
nextToken = get (key + 1);
done = [];
next = [{ key = key + 1; values = []; }];
in
if token.type == "eof" then done
else if token.type == "newline" then
if indent == 0 then
next
else if nextToken.type != "indent" || nextToken.value != indent then
done
else [{ key = key + 1; values = []; }]
else if token.type == "indent" then
if token.value == indent then next else done
# String means this is a key value pair
else if token.type == "string" then
let
keys = takeKeys key;
skip = 1 + ((length keys) - 1) * 2;
nextToken = get (key + skip);
in
# If the key is followed by a colon then this is a nested object
if nextToken.type == "colon" then
let
# Parse the nested object
res = parse (key + skip + 1) (indent + 2);
inherit (res) value index;
in
[{
key = index;
values = map (name: { inherit name value; }) keys;
}]
# The only valid values
else if (nextToken.type == "string" || nextToken.type == "number" || nextToken.type == "boolean") then
[{
key = (key + skip + 1);
values = map (name: { inherit name; value = nextToken.value; }) keys;
}]
else abort "Invalid token ${nextToken.type}"
else abort "Invalid token ${token.type}"
;
};
results = concatLists (map (el: el.values) result);
in
{
value = listToAttrs results;
index = (elemAt result ((length result) - 1)).key;
};
in
(parse 0 0).value;
in
parseLockfile (readFile ./yarn.lock)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment