Skip to content

Instantly share code, notes, and snippets.

@whizzter
Created June 27, 2021 00:37
Show Gist options
  • Save whizzter/58135363ef611ef1a45dcd06ecfc9019 to your computer and use it in GitHub Desktop.
Save whizzter/58135363ef611ef1a45dcd06ecfc9019 to your computer and use it in GitHub Desktop.
Esprima.NET SourceMap adjustment
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using Esprima;
using Esprima.Ast;
// 2clause BSD licence applies.
namespace EsprimaSourceMap
{
// partial impl for https://sourcemaps.info/spec.html
public class EsprimaSourceMapTransformer
{
// Regexp to match the sourcemappings comment inside a JS file.
static Regex sourceMappingUrlRe = new Regex("^\\s*//#\\s*sourceMappingURL=(.+)$", RegexOptions.Multiline);
// The public interface of this
public static Esprima.Ast.Script ParseScriptWithSourcemap(string sourceData,string sourceName, Func<string, string> externalSourceMapLoader = null)
{
// Comment should be true above if JavaScriptParser supports comments.
var start = DateTime.UtcNow;
var scriptTree = new Esprima.JavaScriptParser(sourceData, new ParserOptions(sourceName) { Comment = false }).ParseScript();
var stop = DateTime.UtcNow;
System.Console.WriteLine("Parsing application time:" + (stop - start).TotalMilliseconds);
// Temporary hack to get the sourcemap (should be moved to comment extraction inside ApplySourceMap)
var sourceMapMatch = sourceMappingUrlRe.Match(sourceData);
if (sourceMapMatch!=null && sourceMapMatch.Success)
{
string sourceMapURI = sourceMapMatch.Groups[1].Value;
var startRM = DateTime.UtcNow;
ApplySourceMap(scriptTree, sourceMapURI, externalSourceMapLoader); // Uri handling should be handled by reading the fileTree
var stopRM = DateTime.UtcNow;
System.Console.WriteLine("Source-map application time:" + (stopRM - startRM).TotalMilliseconds);
}
return scriptTree;
}
// This method should be public and not have a sourceMapData argument but since Esprima seems to discard comments right now we'll hide it for the time being and take in the URI
private static void ApplySourceMap(Esprima.Ast.Script userScript, string sourceMapURI, Func<string, string> externalSourceMapLoader = null)
{
string sourceMapData = null;
if (sourceMapURI.StartsWith("data:"))
{
// data-protocol, base64-embedded sourcemap.
sourceMapData = System.Text.UTF8Encoding.UTF8.GetString(Convert.FromBase64String(sourceMapURI.Substring(sourceMapURI.IndexOf(',')+1)));
}
else
{
// not embedded, so pass the loading to the loader.
if (externalSourceMapLoader != null)
{
sourceMapData = externalSourceMapLoader(sourceMapURI);
}
}
if (sourceMapData == null)
return; // could not apply it! (hard-fail here?)
// We use the Esprima JS parser to parse the JSON (since it's available)
var esprimaJSON = new Esprima.JavaScriptParser(sourceMapData).ParseExpression();
SourceMapData sourceMapObject = ToObject(esprimaJSON, typeof(SourceMapData)) as SourceMapData;
// for the mapping data we keep the text and an index into it as the state.
var mappingDataText = sourceMapObject.Mappings;
int mappingDataIndex = 0;
// current location (most of the source-mapping data is relative to previous entries)
var currentRemappingEntry = new SourceMapRemappingEntry { Line = 0, Column = 0, Source = 0, SourceLine = 0, SourceColumn = 0 };
// copies are pushed onto the remapping list.
var remappings = new List<SourceMapRemappingEntry>();
// check-tok is used to check for the existence of special tokens (where index might overflow)
char checkTok()
{
// extract a check-token (we transform end-of-string to a group-end symbol)
return mappingDataIndex >= mappingDataText.Length ? ';' : mappingDataText.CharCodeAt(mappingDataIndex);
}
// this function is called to check for (and process) end-of-group or end-of-segment states.
bool processedSegmentOrGroup(bool doSeg)
{
if (doSeg && ',' == checkTok())
{
// push remapping for segment.
remappings.Add(currentRemappingEntry);
// go to next character
mappingDataIndex++;
return true;
}
else if (';' == checkTok())
{
// push remapping for group.
remappings.Add(currentRemappingEntry);
// manipulate the location state according to the spec.
currentRemappingEntry.Line++;
currentRemappingEntry.Column = 0;
// go to next character
mappingDataIndex++;
return true;
}
return false;
}
// main loop of reading mapping-data
while ( mappingDataIndex < mappingDataText.Length )
{
if (processedSegmentOrGroup(false))
continue; // empty-line
currentRemappingEntry.Column += base64VLQDecode(mappingDataText, ref mappingDataIndex);
if (processedSegmentOrGroup(true))
continue; // what was parsed was a simple column-adjusting entry
currentRemappingEntry.Source += base64VLQDecode(mappingDataText, ref mappingDataIndex);
currentRemappingEntry.SourceLine += base64VLQDecode(mappingDataText, ref mappingDataIndex);
currentRemappingEntry.SourceColumn += base64VLQDecode(mappingDataText, ref mappingDataIndex);
if (processedSegmentOrGroup(true))
continue; // a more regular entry that can update more than just the column was found.
// TODO: handling this would enable name-demangling.
int nameReplacement = base64VLQDecode(mappingDataText, ref mappingDataIndex);
if (processedSegmentOrGroup(true))
continue; // a name-remapping entry was found (we don't do anything with these right now)
else throw new ArgumentException("Decoding problem"); // should this be a soft-fault?
}
// Reading the source-data is done at this point, a couple of remapping functions follows and then we just need to invoke them on the tree.
// This function locates the appropriate remapping entry for a Position token.
SourceMapRemappingEntry? getMapping(Position esPos)
{
int mappingLine = esPos.Line - 1; // the source-map format is 0-indexed for lines
int mappingCol = esPos.Column; // both Esprima and the source-map format is 0-indexed for columns
// do a binary search in our data to find a starting point for our entries
int idx = remappings.BinarySearch(new SourceMapRemappingEntry { Line = mappingLine, Column = mappingCol }, lineColumnComparer);
if (idx >= 0)
{
// in the "lucky" case then the symbol matches exactly for a location and we can just return it directly.
return remappings[idx];
}
// In many cases a mapping-segment might span multiple tokens/nodes, BinarySearch gave us the nearest "larger" node
// First off negate the result to find our nearest.
idx = idx ^ (~0);
// If it's out of bounds (end of file) then just pick the last entry
if (idx >= remappings.Count)
idx--;
// since BinarySearch returned the "larger than closest" entry we might need to "back-step" slightly in the array to find the correct one.
while (idx > 0 && (remappings[idx].Line > mappingLine || (remappings[idx].Line == mappingLine && remappings[idx].Column > mappingCol)))
{
// the index's line was larger OR same and the column was larger, then step back
idx--;
}
// however some tokens might be generated and not match up to real source positions.
if (remappings[idx].Line != mappingLine || remappings[idx].Column > mappingCol)
return null;
// this will be the closest preceding entry compared to the tokens position
return remappings[idx];
}
Position repositionPosition(Position pos)
{
var mapping = getMapping(pos);
if (!mapping.HasValue)
return pos;
// if we have a useful mapping then find the offset within it and reproject it to the original source file.
int insideMappingOffset = pos.Column - mapping.Value.Column;
return new Position(mapping.Value.SourceLine+1, mapping.Value.SourceColumn + insideMappingOffset);
}
Location repositionLocation(Location esLoc)
{
var mapping = getMapping(esLoc.Start);
if (!mapping.HasValue)
return esLoc;
return new Location(repositionPosition(esLoc.Start), repositionPosition(esLoc.End), sourceMapObject.Sources[mapping.Value.Source]);
}
// this is a recursive node visitor that goes through the source-tree and relocates the source locations.
void relocate(Node node)
{
if (node == null)
return;
node.Location = repositionLocation(node.Location);
foreach (var child in node.ChildNodes)
{
relocate(child);
}
}
// Finally initiate the tree-relocation process.
relocate(userScript);
}
// This struct mimics the JSON data structure of sourcemaps
public class SourceMapData
{
public double Version { get; set; }
public string File { get; set; }
public string SourceRoot { get; set; }
public List<string> Sources { get; set; }
public List<string> SourcesContent { get; set; }
public List<string> Names { get; set; }
public string Mappings { get; set; }
}
// These entries are the internal result of parsing groups/segments of the SourceMap JSON mappings field.
private struct SourceMapRemappingEntry
{
// line in the mapped file
public int Line;
// column in the mapped file
public int Column;
// source file name index for this entry
public int Source;
// line in the original source file
public int SourceLine;
// column in the original source file
public int SourceColumn;
}
// when searching the remapping entries we're only interested in the line/column part of the data.
static Comparer<SourceMapRemappingEntry> lineColumnComparer = Comparer<SourceMapRemappingEntry>.Create((a, b) => a.Line != b.Line ? a.Line - b.Line : a.Column - b.Column);
// useful description on https://medium.com/@trungutt/yet-another-explanation-on-sourcemap-669797e418ce
private static int base64VLQDecode(string src, ref int idx)
{
uint tempValue = 0;
// data is read from least significant bits to most, shift keeps track of it.
int shift = 0;
for (bool continuation = true; continuation;)
{
// we get a value between 0-63 here.
uint decodedCharVal = b64v(src.CharCodeAt(idx++));
// bit 5 is the continuation flag
continuation = 0 != (decodedCharVal & 0x20);
// bits 0-4 is the actual data that is shifted into place
tempValue |= ((decodedCharVal & 0x1f) << shift);
shift += 5;
}
// flip the value depending on the least-significant bit.
return 0 != (tempValue & 1)
? -(int)(tempValue >> 1)
: (int)(tempValue >> 1);
}
// decode based on the Base64 alphabet
private static uint b64v(char c)
{
return (uint)(
(c >= 'A' && c <= 'Z')
? (int)(c - 'A')
: (c >= 'a' && c <= 'z')
? (int)(c - 'a' + 26)
: (c >= '0' && c <= '9')
? (int)(c - '0' + 52)
: c == '+'
? 62
: c == '/'
? 63
: throw new ArgumentException("Invalid data in B64-VLQ " + c));
}
// A small routine to de-serialize the JSON data parsed into an Esprima AST into "regular" .NET objects
private static object ToObject(Esprima.Ast.Expression expr, Type target = null)
{
if (expr is Esprima.Ast.ObjectExpression objectExpression)
{
var reflectedOutputValue = target != null ? Activator.CreateInstance(target) : null;
var dictionaryOutputValue = target != null ? null : new Dictionary<string, object>();
foreach (var testProperty in objectExpression.Properties)
{
if (testProperty is Esprima.Ast.Property prop)
{
var key = (prop.Key as Esprima.Ast.Literal).StringValue;
if (target != null)
{
// get the target property
var objProperty = target.GetProperty(key, System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.IgnoreCase | System.Reflection.BindingFlags.Instance);
// and set the property via reflection to a value generated by a guided ToObject invocation.
objProperty.SetValue(reflectedOutputValue, ToObject(prop.Value, objProperty.PropertyType));
}
else
{
dictionaryOutputValue[key] = ToObject(prop.Value);
}
}
else throw new ArgumentException(testProperty.GetType().Name);
}
return target != null ? reflectedOutputValue : dictionaryOutputValue;
}
else if (expr is Esprima.Ast.ArrayExpression arrayExpression)
{
var listElementType = target != null && target.IsGenericType ? target.GenericTypeArguments[0] : null;
var outputList = (target != null ? Activator.CreateInstance(target) : new List<object>()) as System.Collections.IList;
foreach (var iexp in arrayExpression.Elements)
{
outputList.Add(ToObject(iexp, listElementType));
}
return outputList;
}
else if (expr is Esprima.Ast.Literal literalExpression)
{
return literalExpression.Value;
}
else throw new ArgumentException(expr.GetType().Name);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment