Skip to content

Instantly share code, notes, and snippets.

@CodaFi
Last active February 3, 2018 23:09
Show Gist options
  • Save CodaFi/e34e1461e97c01d8c75db0b5643b8dce to your computer and use it in GitHub Desktop.
Save CodaFi/e34e1461e97c01d8c75db0b5643b8dce to your computer and use it in GitHub Desktop.
A self-contained port of FileCheck to Swift
/// Playground - noun: a place where people can play
/// FileCheck yourself before you wreck yourself
import Foundation
#if os(Linux)
import Glibc
typealias NSRegularExpression = RegularExpression
#else
import Darwin
#endif
let result = fileCheckOutput(withPrefixes: ["CUSTOM"]) {
// CUSTOM: It Works
print("It Works!")
}
/// `FileCheckOptions` enumerates a set of options that can modify the behavior
/// of the file check verification process.
public struct FileCheckOptions : OptionSet {
/// Retrieves the raw value of this option set.
public let rawValue : UInt64
/// Convert from a value of `RawValue`, succeeding unconditionally.
public init(rawValue : UInt64) {
self.rawValue = rawValue
}
/// Do not treat all horizontal whitespace as equivalent.
public static let strictWhitespace = FileCheckOptions(rawValue: 1 << 0)
/// Add an implicit negative check with this pattern to every positive
/// check. This can be used to ensure that no instances of this pattern
/// occur which are not matched by a positive pattern.
public static let implicitCheckNot = FileCheckOptions(rawValue: 1 << 1)
/// Allow the input file to be empty. This is useful when making checks that
/// some error message does not occur, for example.
public static let allowEmptyInput = FileCheckOptions(rawValue: 1 << 2)
/// Require all positive matches to cover an entire input line. Allows
/// leading and trailing whitespace if `.strictWhitespace` is not also
/// passed.
public static let matchFullLines = FileCheckOptions(rawValue: 1 << 3)
}
/// `FileCheckFD` represents the standard output streams `FileCheck` is capable
/// of overriding to gather output.
public enum FileCheckFD {
/// Standard output.
case stdout
/// Standard error.
case stderr
/// A custom output stream.
case custom(fileno: Int32, ptr: UnsafeMutablePointer<FILE>)
/// Retrieve the file descriptor for this output stream.
var fileno : Int32 {
switch self {
case .stdout:
return STDOUT_FILENO
case .stderr:
return STDERR_FILENO
case let .custom(fileno: fd, ptr: _):
return fd
}
}
/// Retrieve the FILE pointer for this stream.
var filePtr : UnsafeMutablePointer<FILE>! {
switch self {
case .stdout:
#if os(Linux)
return Glibc.stdout
#else
return Darwin.stdout
#endif
case .stderr:
#if os(Linux)
return Glibc.stderr
#else
return Darwin.stderr
#endif
case let .custom(fileno: _, ptr: ptr):
return ptr
}
}
}
/// Reads from the given output stream and runs a file verification procedure
/// by comparing the output to a specified result.
///
/// FileCheck requires total access to whatever input stream is being used. As
/// such it will override printing to said stream until the given block has
/// finished executing.
///
/// - parameter FD: The file descriptor to override and read from.
/// - parameter prefixes: Specifies one or more prefixes to match. By default
/// these patterns are prefixed with "CHECK".
/// - parameter file: The file to check against. Defaults to the file that
/// containing the call to `fileCheckOutput`.
/// - parameter options: Optional arguments to modify the behavior of the check.
/// - parameter block: The block in which output will be emitted to the given
/// file descriptor.
public func fileCheckOutput(of FD : FileCheckFD = .stdout, withPrefixes prefixes : [String] = ["CHECK"], against file : String = #file, options: FileCheckOptions = [], block : () -> ()) -> Bool {
guard let validPrefixes = validateCheckPrefixes(prefixes) else {
print("Supplied check-prefix is invalid! Prefixes must be unique and ",
"start with a letter and contain only alphanumeric characters, ",
"hyphens and underscores")
return false
}
guard let PrefixRE = try? NSRegularExpression(pattern: validPrefixes.joined(separator: "|"), options: []) else {
print("Unable to combine check-prefix strings into a prefix regular ",
"expression! This is likely a bug in FileCheck's verification of ",
"the check-prefix strings. Regular expression parsing failed.")
return false
}
let input = overrideFDAndCollectOutput(file: FD, of: block)
if (input.isEmpty && !options.contains(.allowEmptyInput)) {
print("FileCheck error: input from file descriptor \(FD) is empty.\n")
return false
}
guard let contents = try? String(contentsOfFile: file, encoding: .utf8) else {
return false
}
let buf = contents.cString(using: .utf8)?.withUnsafeBufferPointer { buffer in
return readCheckStrings(in: buffer, withPrefixes: validPrefixes, options: options, PrefixRE)
}
guard let checkStrings = buf else {
return false
}
return check(input: input, against: checkStrings)
}
private func overrideFDAndCollectOutput(file : FileCheckFD, of block : () -> ()) -> String {
fflush(file.filePtr)
let oldFd = dup(file.fileno)
let template = URL(fileURLWithPath: NSTemporaryDirectory()).appendingPathComponent("output.XXXXXX")
return template.withUnsafeFileSystemRepresentation { buffer in
guard let buffer = buffer else {
return ""
}
let newFd = mkstemp(UnsafeMutablePointer(mutating: buffer))
guard newFd != -1 else {
return ""
}
dup2(newFd, file.fileno)
block()
close(newFd)
fflush(file.filePtr)
dup2(oldFd, file.fileno)
close(oldFd)
let url = URL(fileURLWithFileSystemRepresentation: buffer, isDirectory: false, relativeTo: nil)
guard let s = try? String(contentsOf: url, encoding: .utf8) else {
return ""
}
return s
}
}
func validateCheckPrefixes(_ prefixes : [String]) -> [String]? {
let validator = try! NSRegularExpression(pattern: "^[a-zA-Z0-9_-]*$", options: [])
for prefix in prefixes {
// Reject empty prefixes.
if prefix.isEmpty {
return nil
}
let range = NSRange(
location: 0,
length: prefix.distance(from: prefix.startIndex, to: prefix.endIndex)
)
if validator.matches(in: prefix, options: [], range: range).isEmpty {
return nil
}
}
return [String](Set<String>(prefixes))
}
extension CChar {
fileprivate var isPartOfWord : Bool {
return isalnum(Int32(self)) != 0 || self == ("-" as Character).utf8CodePoint || self == ("_" as Character).utf8CodePoint
}
}
extension Character {
var utf8CodePoint : CChar {
return String(self).cString(using: .utf8)!.first!
}
fileprivate var isPartOfWord : Bool {
let utf8Value = self.utf8CodePoint
return isalnum(Int32(utf8Value)) != 0 || self == "-" || self == "_"
}
}
private func findCheckType(in buf : UnsafeBufferPointer<CChar>, with prefix : String) -> CheckType {
let nextChar = buf[prefix.utf8.count]
// Verify that the : is present after the prefix.
if nextChar == (":" as Character).utf8CodePoint {
return .plain
}
if nextChar != ("-" as Character).utf8CodePoint {
return .none
}
let rest = String(
bytesNoCopy: UnsafeMutableRawPointer(
mutating: buf.baseAddress!.advanced(by: prefix.utf8.count + 1)
),
length: buf.count - (prefix.utf8.count + 1),
encoding: .utf8,
freeWhenDone: false
)!
if rest.hasPrefix("NEXT:") {
return .next
}
if rest.hasPrefix("SAME:") {
return .same
}
if rest.hasPrefix("NOT:") {
return .not
}
if rest.hasPrefix("DAG:") {
return .dag
}
if rest.hasPrefix("LABEL:") {
return .label
}
// You can't combine -NOT with another suffix.
let badNotPrefixes = [
"DAG-NOT:",
"NOT-DAG:",
"NEXT-NOT:",
"NOT-NEXT:",
"SAME-NOT:",
"NOT-SAME:",
]
if badNotPrefixes.reduce(false, { (acc, s) in acc || rest.hasPrefix(s) }) {
return .badNot
}
return .none
}
extension UnsafeBufferPointer {
fileprivate func substr(_ start : Int, _ size : Int) -> UnsafeBufferPointer<Element> {
return UnsafeBufferPointer<Element>(start: self.baseAddress!.advanced(by: start), count: size)
}
fileprivate func dropFront(_ n : Int) -> UnsafeBufferPointer<Element> {
precondition(n < self.count)
return UnsafeBufferPointer<Element>(start: self.baseAddress!.advanced(by: n), count: self.count - n)
}
}
func substring(in buffer : UnsafeBufferPointer<CChar>, with range : NSRange) -> String {
precondition(range.location + range.length <= buffer.count)
let ptr = buffer.substr(range.location, range.length)
return String(bytesNoCopy: UnsafeMutableRawPointer(mutating: ptr.baseAddress!), length: range.length, encoding: .utf8, freeWhenDone: false)!
}
private func findFirstMatch(in inbuffer : UnsafeBufferPointer<CChar>, among prefixes : [String], with RE : NSRegularExpression, startingAt startLine: Int) -> (String, CheckType, Int, UnsafeBufferPointer<CChar>) {
var lineNumber = startLine
var buffer = inbuffer
while !buffer.isEmpty {
let str = String(bytesNoCopy: UnsafeMutableRawPointer(mutating: buffer.baseAddress!), length: buffer.count, encoding: .utf8, freeWhenDone: false)!
let match = RE.firstMatch(in: str, options: [], range: NSRange(location: 0, length: str.distance(from: str.startIndex, to: str.endIndex)))
guard let prefix = match else {
return ("", .none, lineNumber, buffer)
}
let skippedPrefix = substring(in: buffer, with: NSMakeRange(0, prefix.range.location))
let prefixStr = str.substring(
with: Range(
uncheckedBounds: (
str.index(str.startIndex, offsetBy: prefix.range.location),
str.index(str.startIndex, offsetBy: NSMaxRange(prefix.range))
)
)
)
// HACK: Conversion between the buffer and `String` causes index
// mismatches when searching for strings. We're instead going to do
// something terribly inefficient here: Use the regular expression to
// look for check prefixes, then use Foundation's Data to find their
// actual locations in the buffer.
let bd = Data(buffer: buffer)
let range = bd.range(of: prefixStr.data(using: .utf8)!)!
buffer = buffer.dropFront(range.lowerBound)
lineNumber += skippedPrefix.characters.filter({ c in c == "\n" }).count
// Check that the matched prefix isn't a suffix of some other check-like
// word.
// FIXME: This is a very ad-hoc check. it would be better handled in some
// other way. Among other things it seems hard to distinguish between
// intentional and unintentional uses of this feature.
if skippedPrefix.isEmpty || !skippedPrefix.characters.last!.isPartOfWord {
// Now extract the type.
let checkTy = findCheckType(in: buffer, with: prefixStr)
// If we've found a valid check type for this prefix, we're done.
if checkTy != .none {
return (prefixStr, checkTy, lineNumber, buffer)
}
}
// If we didn't successfully find a prefix, we need to skip this invalid
// prefix and continue scanning. We directly skip the prefix that was
// matched and any additional parts of that check-like word.
// From the given position, find the next character after the word.
var loc = prefix.range.length
while loc < buffer.count && buffer[loc].isPartOfWord {
loc += 1
}
buffer = buffer.dropFront(loc)
}
return ("", .none, lineNumber, buffer)
}
private func readCheckStrings(in buf : UnsafeBufferPointer<CChar>, withPrefixes prefixes : [String], options: FileCheckOptions, _ RE : NSRegularExpression) -> [CheckString] {
// Keeps track of the line on which CheckPrefix instances are found.
var lineNumber = 1
// std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks
var dagNotMatches = [Pattern]()
var contents = [CheckString]()
var buffer = buf
while true {
// See if a prefix occurs in the memory buffer.
let (usedPrefix, checkTy, ln, newBuffer) = findFirstMatch(in: buffer, among: prefixes, with: RE, startingAt: lineNumber)
if usedPrefix.isEmpty {
break
}
lineNumber = ln
// Skip the buffer to the end.
buffer = newBuffer.dropFront(usedPrefix.utf8.count + checkTy.size)
// Complain about useful-looking but unsupported suffixes.
if checkTy == .badNot {
let loc = CheckLoc.inBuffer(buffer.baseAddress!, buf)
diagnose(.error, loc, "unsupported -NOT combo on prefix '\(usedPrefix)'")
return []
}
// Okay, we found the prefix, yay. Remember the rest of the line, but
// ignore leading whitespace.
if !options.contains(.strictWhitespace) || !options.contains(.matchFullLines) {
guard let idx = buffer.index(where: { c in c != (" " as Character).utf8CodePoint && c != ("\t" as Character).utf8CodePoint }) else {
return []
}
buffer = buffer.dropFront(idx)
}
// Scan ahead to the end of line.
let EOL : Int = buffer.index(of: ("\n" as Character).utf8CodePoint) ?? buffer.index(of: ("\r" as Character).utf8CodePoint)!
// Remember the location of the start of the pattern, for diagnostics.
let patternLoc = CheckLoc.inBuffer(buffer.baseAddress!, buf)
// Parse the pattern.
let pat : Pattern = Pattern(checking: checkTy)
let subBuffer = UnsafeBufferPointer<CChar>(start: buffer.baseAddress, count: EOL)
if pat.parse(in: buf, pattern: subBuffer, withPrefix: usedPrefix, at: lineNumber, options: options) {
return []
}
// Verify that CHECK-LABEL lines do not define or use variables
if (checkTy == .label) && pat.hasVariable {
diagnose(.error, patternLoc, "found '\(usedPrefix)-LABEL:' with variable definition or use")
return []
}
// Verify that CHECK-NEXT lines have at least one CHECK line before them.
if (checkTy == .next || checkTy == .same) && contents.isEmpty {
let type = (checkTy == .next) ? "NEXT" : "SAME"
let loc = CheckLoc.inBuffer(buffer.baseAddress!, buf)
diagnose(.error, loc, "found '\(usedPrefix)-\(type)' without previous '\(usedPrefix): line")
return []
}
buffer = UnsafeBufferPointer<CChar>(
start: buffer.baseAddress!.advanced(by: EOL),
count: buffer.count - EOL
)
// Handle CHECK-DAG/-NOT.
if checkTy == .dag || checkTy == .not {
dagNotMatches.append(pat)
continue
}
// Okay, add the string we captured to the output vector and move on.
contents.append(CheckString(pattern: pat, prefix: usedPrefix, loc: patternLoc))
// std::swap(DagNotMatches, CheckStrings.back().DagNotStrings)
// DagNotMatches = ImplicitNegativeChecks
}
// Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
// prefix as a filler for the error message.
// if !DagNotMatches.isEmpty {
// CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
// SMLoc::getFromPointer(Buffer.data()))
// std::swap(DagNotMatches, CheckStrings.back().DagNotStrings)
// }
if contents.isEmpty {
print("error: no check strings found with prefix\(contents.count == 1 ? " " : "es ")")
for prefix in prefixes {
print("\(prefix):")
}
return []
}
return contents
}
private final class BoxedTable {
var table : [String:String] = [:]
init() {}
subscript(_ i : String) -> String? {
set {
self.table[i] = newValue!
}
get {
return self.table[i]
}
}
}
/// Check the input to FileCheck provided in the \p Buffer against the \p
/// CheckStrings read from the check file.
///
/// Returns false if the input fails to satisfy the checks.
private func check(input b : String, against checkStrings : [CheckString]) -> Bool {
var buffer = b
var failedChecks = false
// This holds all the current filecheck variables.
var variableTable = BoxedTable()
var i = 0
var j = 0
var e = checkStrings.count
while true {
var checkRegion : String
if j == e {
checkRegion = buffer
} else {
let checkStr = checkStrings[j]
if checkStr.pattern.type != .label {
j += 1
continue
}
// Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
guard let (matchLabelPos, matchLabelLen) = checkStr.check(buffer, true, variableTable) else {
// Immediately bail of CHECK-LABEL fails, nothing else we can do.
return false
}
checkRegion = buffer.substring(to: buffer.index(buffer.startIndex, offsetBy: matchLabelPos + matchLabelLen))
buffer = buffer.substring(from: buffer.index(buffer.startIndex, offsetBy: matchLabelPos + matchLabelLen))
j += 1
}
while i != j {
defer { i += 1 }
// Check each string within the scanned region, including a second check
// of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
guard let (matchPos, matchLen) = checkStrings[i].check(checkRegion, false, variableTable) else {
failedChecks = true
i = j
break
}
checkRegion = checkRegion.substring(from: checkRegion.index(checkRegion.startIndex, offsetBy: matchPos + matchLen))
}
if j == e {
break
}
}
// Success if no checks failed.
return !failedChecks
}
private enum CheckLoc {
case inBuffer(UnsafePointer<CChar>, UnsafeBufferPointer<CChar>)
case string(String)
var message : String {
switch self {
case let .inBuffer(ptr, buf):
var startPtr = ptr
while startPtr != buf.baseAddress! && startPtr.predecessor().pointee != ("\n" as Character).utf8CodePoint {
startPtr = startPtr.predecessor()
}
var endPtr = ptr
while endPtr != buf.baseAddress!.advanced(by: buf.endIndex) && endPtr.successor().pointee != ("\n" as Character).utf8CodePoint {
endPtr = endPtr.successor()
}
// One more for good measure.
if endPtr != buf.baseAddress!.advanced(by: buf.endIndex) {
endPtr = endPtr.successor()
}
return substring(in: buf, with: NSMakeRange(buf.baseAddress!.distance(to: startPtr), startPtr.distance(to: endPtr)))
case let .string(s):
return s
}
}
}
enum CheckType {
case none
case plain
case next
case same
case not
case dag
case label
case badNot
/// MatchEOF - When set, this pattern only matches the end of file. This is
/// used for trailing CHECK-NOTs.
case EOF
// Get the size of the prefix extension.
var size : Int {
switch (self) {
case .none:
return 0
case .badNot:
return 0
case .plain:
return ":".utf8.count
case .next:
return "-NEXT:".utf8.count
case .same:
return "-SAME:".utf8.count
case .not:
return "-NOT:".utf8.count
case .dag:
return "-DAG:".utf8.count
case .label:
return "-LABEL:".utf8.count
case .EOF:
fatalError("Should not be using EOF size")
}
}
}
private class Pattern {
var patternLoc : CheckLoc = CheckLoc.string("")
let type : CheckType
/// If non-empty, this pattern is a fixed string match with the specified
/// fixed string.
var fixedString : String = ""
/// If non-empty, this is a regex pattern.
var regExPattern : String = ""
/// Contains the number of line this pattern is in.
var lineNumber : Int = 0
/// Entries in this vector map to uses of a variable in the pattern, e.g.
/// "foo[[bar]]baz". In this case, the regExPattern will contain "foobaz"
/// and we'll get an entry in this vector that tells us to insert the value
/// of bar at offset 3.
var variableUses : Array<(String, Int)> = []
/// Maps definitions of variables to their parenthesized capture numbers.
/// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
var variableDefs : Dictionary<String, Int> = [:]
var hasVariable : Bool {
return !(variableUses.isEmpty && self.variableDefs.isEmpty)
}
init(checking ty : CheckType) {
self.type = ty
}
private func addBackrefToRegEx(_ backRef : Int) {
assert(backRef >= 1 && backRef <= 9, "Invalid backref number")
let Backref = "\\\(backRef)"
self.regExPattern += Backref
}
/// - returns: Returns a value on success or nil on a syntax error.
private func evaluateExpression(_ e : String) -> String? {
var expr = e
// The only supported expression is @LINE([\+-]\d+)?
if !expr.hasPrefix("@LINE") {
return nil
}
expr = expr.substring(from: expr.index(expr.startIndex, offsetBy: "@LINE".utf8.count))
guard let firstC = expr.characters.first else {
return "\(self.lineNumber)"
}
if firstC == "+" {
expr = expr.substring(from: expr.index(after: expr.startIndex))
} else if firstC != "-" {
return nil
}
guard let offset = Int(expr, radix: 10) else {
return nil
}
return "\(self.lineNumber + offset)"
}
/// Matches the pattern string against the input buffer.
///
/// This returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in \p
/// MatchLen.
///
/// The \p VariableTable StringMap provides the current values of filecheck
/// variables and is updated if this match defines new values.
func match(_ buffer : String, _ variableTable : BoxedTable) -> (Int, Int)? {
var matchLen : Int = 0
// If this is the EOF pattern, match it immediately.
if self.type == .EOF {
matchLen = 0
return (buffer.utf8.count, matchLen)
}
// If this is a fixed string pattern, just match it now.
if !self.fixedString.isEmpty {
matchLen = self.fixedString.utf8.count
if let b = buffer.range(of: self.fixedString)?.lowerBound {
return (buffer.distance(from: buffer.startIndex, to: b), matchLen)
}
return nil
}
// Regex match.
// If there are variable uses, we need to create a temporary string with the
// actual value.
var regExToMatch = self.regExPattern
if !self.variableUses.isEmpty {
var insertOffset = 0
for (v, offset) in self.variableUses {
var value : String = ""
if let c = v.characters.first, c == "@" {
guard let v = self.evaluateExpression(v) else {
return nil
}
value = v
} else {
guard let val = variableTable[v] else {
return nil
}
// Look up the value and escape it so that we can put it into the regex.
value += NSRegularExpression.escapedPattern(for: val)
}
// Plop it into the regex at the adjusted offset.
regExToMatch.insert(contentsOf: value.characters, at: regExToMatch.index(regExToMatch.startIndex, offsetBy: offset + insertOffset))
insertOffset += value.utf8.count
}
}
// Match the newly constructed regex.
guard let r = try? NSRegularExpression(pattern: regExToMatch, options: []) else {
return nil
}
let matchInfo = r.matches(in: buffer, options: [], range: NSRange(location: 0, length: buffer.utf8.count))
// Successful regex match.
guard let fullMatch = matchInfo.first else {
fatalError("Didn't get any matches!")
}
// If this defines any variables, remember their values.
for (v, index) in self.variableDefs {
assert(index < fullMatch.numberOfRanges, "Internal paren error")
#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS)
let r = fullMatch.rangeAt(index)
#else
let r = fullMatch.range(at: index)
#endif
variableTable[v] = buffer.substring(
with: Range<String.Index>(
uncheckedBounds: (
buffer.index(buffer.startIndex, offsetBy: r.location),
buffer.index(buffer.startIndex, offsetBy: NSMaxRange(r))
)
)
)
}
matchLen = fullMatch.range.length
return (fullMatch.range.location, matchLen)
}
/// Finds the closing sequence of a regex variable usage or definition.
///
/// \p Str has to point in the beginning of the definition (right after the
/// opening sequence). Returns the offset of the closing sequence within Str,
/// or npos if it was not found.
private func findRegexVarEnd(_ regVar : String) -> String.Index? {
var string = regVar
// Offset keeps track of the current offset within the input Str
var offset = regVar.startIndex
// [...] Nesting depth
var bracketDepth = 0
while let firstChar = string.characters.first {
if string.hasPrefix("]]") && bracketDepth == 0 {
return offset
}
if firstChar == "\\" {
// Backslash escapes the next char within regexes, so skip them both.
string = string.substring(from: string.index(string.startIndex, offsetBy: 2))
offset = regVar.index(offset, offsetBy: 2)
} else {
switch firstChar {
case "[":
bracketDepth += 1
case "]":
if bracketDepth == 0 {
diagnose(.error, .string(regVar), "missing closing \"]\" for regex variable")
return nil
}
bracketDepth -= 1
default:
break
}
string = string.substring(from: string.index(after: string.startIndex))
offset = regVar.index(after: offset)
}
}
return nil
}
private func addRegExToRegEx(_ RS : String, _ cur : Int) -> (Bool, Int) {
do {
let r = try NSRegularExpression(pattern: RS, options: [])
self.regExPattern += RS
return (false, cur + r.numberOfCaptureGroups)
} catch let e {
diagnose(.error, self.patternLoc, "invalid regex: \(e)")
return (true, cur)
}
}
/// Parses the given string into the Pattern.
///
/// \p Prefix provides which prefix is being matched, \p SM provides the
/// SourceMgr used for error reports, and \p LineNumber is the line number in
/// the input file from which the pattern string was read. Returns true in
/// case of an error, false otherwise.
func parse(in buf : UnsafeBufferPointer<CChar>, pattern : UnsafeBufferPointer<CChar>, withPrefix prefix : String, at lineNumber : Int, options: FileCheckOptions) -> Bool {
func mino(_ l : String.Index?, _ r : String.Index?) -> String.Index? {
if l == nil && r == nil {
return nil
} else if l == nil && r != nil {
return r
} else if l != nil && r == nil {
return l
}
return min(l!, r!)
}
self.lineNumber = lineNumber
var patternStr = substring(in: pattern, with: NSRange(location: 0, length: pattern.count))
self.patternLoc = CheckLoc.inBuffer(pattern.baseAddress!, buf)
// Check that there is something on the line.
if patternStr.isEmpty {
diagnose(.error, self.patternLoc, "found empty check string with prefix '\(prefix):'")
return true
}
// Check to see if this is a fixed string, or if it has regex pieces.
if !options.contains(.matchFullLines) &&
(patternStr.utf8.count < 2 ||
(patternStr.range(of: "{{") == nil
&&
patternStr.range(of: "[[") == nil))
{
self.fixedString = patternStr
return false
}
if options.contains(.matchFullLines) {
regExPattern += "^"
if !options.contains(.strictWhitespace) {
regExPattern += " *"
}
}
// Paren value #0 is for the fully matched string. Any new
// parenthesized values add from there.
var curParen = 1
// Otherwise, there is at least one regex piece. Build up the regex pattern
// by escaping scary characters in fixed strings, building up one big regex.
while !patternStr.isEmpty {
// RegEx matches.
if patternStr.range(of: "{{")?.lowerBound == patternStr.startIndex {
// This is the start of a regex match. Scan for the }}.
guard let End = patternStr.range(of: "}}") else {
let loc = CheckLoc.inBuffer(pattern.baseAddress!, buf)
diagnose(.error, loc, "found start of regex string with no end '}}'")
return true
}
// Enclose {{}} patterns in parens just like [[]] even though we're not
// capturing the result for any purpose. This is required in case the
// expression contains an alternation like: CHECK: abc{{x|z}}def. We
// want this to turn into: "abc(x|z)def" not "abcx|zdef".
regExPattern += "("
curParen += 1
let substr = patternStr.substring(
with: Range<String.Index>(
uncheckedBounds: (
patternStr.index(patternStr.startIndex, offsetBy: 2),
patternStr.index(End.lowerBound, offsetBy: 0)
)
)
)
let (res, paren) = self.addRegExToRegEx(substr, curParen)
curParen = paren
if res {
return true
}
regExPattern += ")"
patternStr = patternStr.substring(from: patternStr.index(End.lowerBound, offsetBy: 2))
continue
}
// Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
// (or some other regex) and assigns it to the FileCheck variable 'foo'. The
// second form is [[foo]] which is a reference to foo. The variable name
// itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
// it. This is to catch some common errors.
if patternStr.hasPrefix("[[") {
// Find the closing bracket pair ending the match. End is going to be an
// offset relative to the beginning of the match string.
let regVar = patternStr.substring(from: patternStr.index(patternStr.startIndex, offsetBy: 2))
guard let end = self.findRegexVarEnd(regVar) else {
let loc = CheckLoc.inBuffer(pattern.baseAddress!, buf)
diagnose(.error, loc, "invalid named regex reference, no ]] found")
return true
}
let matchStr = regVar.substring(to: end)
patternStr = patternStr.substring(from: patternStr.index(end, offsetBy: 4))
// Get the regex name (e.g. "foo").
let nameEnd = matchStr.range(of: ":")
let name : String
if let end = nameEnd?.lowerBound {
name = matchStr.substring(to: end)
} else {
name = matchStr
}
if name.isEmpty {
let loc = CheckLoc.inBuffer(pattern.baseAddress!, buf)
diagnose(.error, loc, "invalid name in named regex: empty name")
return true
}
// Verify that the name/expression is well formed. FileCheck currently
// supports @LINE, @LINE+number, @LINE-number expressions. The check here
// is relaxed, more strict check is performed in \c EvaluateExpression.
var isExpression = false
let diagLoc = CheckLoc.inBuffer(pattern.baseAddress!, buf)
for (i, c) in name.characters.enumerated() {
if i == 0 && c == "@" {
if nameEnd == nil {
diagnose(.error, diagLoc, "invalid name in named regex definition")
return true
}
isExpression = true
continue
}
if c != "_" && isalnum(Int32(c.utf8CodePoint)) == 0 && (!isExpression || (c != "+" && c != "-")) {
diagnose(.error, diagLoc, "invalid name in named regex")
return true
}
}
// Name can't start with a digit.
if isdigit(Int32(name.utf8.first!)) != 0 {
diagnose(.error, diagLoc, "invalid name in named regex")
return true
}
// Handle [[foo]].
guard let ne = nameEnd else {
// Handle variables that were defined earlier on the same line by
// emitting a backreference.
if let varParenNum = self.variableDefs[name] {
if varParenNum < 1 || varParenNum > 9 {
diagnose(.error, diagLoc, "Can't back-reference more than 9 variables")
return true
}
self.addBackrefToRegEx(varParenNum)
} else {
variableUses.append((name, regExPattern.characters.count))
}
continue
}
// Handle [[foo:.*]].
self.variableDefs[name] = curParen
regExPattern += "("
curParen += 1
let (res, paren) = self.addRegExToRegEx(matchStr.substring(from: matchStr.index(after: ne.lowerBound)), curParen)
curParen = paren
if res {
return true
}
regExPattern += ")"
}
// Handle fixed string matches.
// Find the end, which is the start of the next regex.
if let fixedMatchEnd = mino(patternStr.range(of: "{{")?.lowerBound, patternStr.range(of: "[[")?.lowerBound) {
self.regExPattern += NSRegularExpression.escapedPattern(for: patternStr.substring(to: fixedMatchEnd))
patternStr = patternStr.substring(from: fixedMatchEnd)
} else {
// No more matches, time to quit.
break
}
}
if options.contains(.matchFullLines) {
if !options.contains(.strictWhitespace) {
regExPattern += " *"
regExPattern += "$"
}
}
return false
}
}
/// Count the number of newlines in the specified range.
func countNumNewlinesBetween(_ r : String) -> (Int, String.Index?) {
var range = r
var NumNewLines = 0
var firstNewLine : String.Index? = nil
while true {
// Scan for newline.
guard let EOL = range.range(of: "\n")?.lowerBound ?? range.range(of: "\r")?.lowerBound else {
return (NumNewLines, firstNewLine)
}
range = range.substring(from: EOL)
if range.isEmpty {
return (NumNewLines, firstNewLine)
}
NumNewLines += 1
// Handle \n\r and \r\n as a single newline.
// if Range.utf8.count > 1 && (Range.utf8[1] == '\n' || Range[1] == '\r') && (Range[0] != Range[1]) {
// Range = Range.substr(1)
// }
range = range.substring(from: range.index(after: range.startIndex))
if NumNewLines == 1 {
firstNewLine = range.startIndex
}
}
}
/// CheckString - This is a check that we found in the input file.
private struct CheckString {
/// Pat - The pattern to match.
let pattern : Pattern
/// Prefix - Which prefix name this check matched.
let prefix : String
/// Loc - The location in the match file that the check string was specified.
let loc : CheckLoc
/// DagNotStrings - These are all of the strings that are disallowed from
/// occurring between this match string and the previous one (or start of
/// file).
let dagNotStrings : Array<Pattern> = []
/// Match check string and its "not strings" and/or "dag strings".
func check(_ buffer : String, _ isLabelScanMode : Bool, _ variableTable : BoxedTable) -> (Int, Int)? {
var lastPos = 0
// IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
// bounds we have not processed variable definitions within the bounded block
// yet so cannot handle any final CHECK-DAG yetthis is handled when going
// over the block again (including the last CHECK-LABEL) in normal mode.
if !isLabelScanMode {
// Match "dag strings" (with mixed "not strings" if any).
guard let lp = self.checkDAG(buffer, variableTable) else {
return nil
}
lastPos = lp
}
// Match itself from the last position after matching CHECK-DAG.
let matchBuffer = buffer.substring(from: buffer.index(buffer.startIndex, offsetBy: lastPos))
guard let (matchPos, matchLen) = self.pattern.match(matchBuffer, variableTable) else {
diagnose(.error, self.loc, self.prefix + ": could not find '\(self.pattern.fixedString)' in input")
return nil
}
// Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
// or CHECK-NOT
if !isLabelScanMode {
let skippedRegion = buffer.substring(
with: Range<String.Index>(
uncheckedBounds: (
buffer.index(buffer.startIndex, offsetBy: lastPos),
buffer.index(buffer.startIndex, offsetBy: matchPos)
)
)
)
let rest = buffer.substring(from: buffer.index(buffer.startIndex, offsetBy: matchPos))
// If this check is a "CHECK-NEXT", verify that the previous match was on
// the previous line (i.e. that there is one newline between them).
if self.checkNext(skippedRegion, rest) {
return nil
}
// If this check is a "CHECK-SAME", verify that the previous match was on
// the same line (i.e. that there is no newline between them).
if self.checkSame(skippedRegion, rest) {
return nil
}
// If this match had "not strings", verify that they don't exist in the
// skipped region.
if self.checkNot(skippedRegion, [], variableTable) {
return nil
}
}
return (lastPos + matchPos, matchLen)
}
/// Verify there is no newline in the given buffer.
private func checkSame(_ buffer : String, _ rest : String) -> Bool {
if self.pattern.type != .same {
return false
}
// Count the number of newlines between the previous match and this one.
// assert(Buffer.data() !=
// SM.getMemoryBuffer(SM.FindBufferContainingLoc(
// SMLoc::getFromPointer(Buffer.data())))
// ->getBufferStart() &&
// "CHECK-SAME can't be the first check in a file")
let (numNewLines, _ /*firstNewLine*/) = countNumNewlinesBetween(buffer)
if numNewLines != 0 {
diagnose(.error, self.loc, self.prefix + "-SAME: is not on the same line as the previous match")
rest.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!, buf)
diagnose(.note, loc, "'next' match was here")
}
buffer.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!, buf)
diagnose(.note, loc, "previous match ended here")
}
return true
}
return false
}
/// Verify there is a single line in the given buffer.
private func checkNext(_ buffer : String, _ rest : String) -> Bool {
if self.pattern.type != .next {
return false
}
// Count the number of newlines between the previous match and this one.
// assert(Buffer.data() !=
// SM.getMemoryBuffer(SM.FindBufferContainingLoc(
// SMLoc::getFromPointer(Buffer.data())))
// ->getBufferStart(), "CHECK-NEXT can't be the first check in a file")
let (numNewLines, firstNewLine) = countNumNewlinesBetween(buffer)
if numNewLines == 0 {
diagnose(.error, self.loc, prefix + "-NEXT: is on the same line as previous match")
rest.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!, buf)
diagnose(.note, loc, "'next' match was here")
}
buffer.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!, buf)
diagnose(.note, loc, "previous match ended here")
}
return true
}
if numNewLines != 1 {
diagnose(.error, self.loc, prefix + "-NEXT: is not on the line after the previous match")
rest.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!, buf)
diagnose(.note, loc, "'next' match was here")
}
buffer.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!, buf)
diagnose(.note, loc, "previous match ended here")
if let fnl = firstNewLine {
let noteLoc = CheckLoc.inBuffer(buf.baseAddress!.advanced(by: buffer.distance(from: buffer.startIndex, to: fnl)), buf)
diagnose(.note, noteLoc, "non-matching line after previous match is here")
}
}
return true
}
return false
}
/// Verify there's no "not strings" in the given buffer.
private func checkNot(_ buffer : String, _ notStrings : [Pattern], _ variableTable : BoxedTable) -> Bool {
for pat in notStrings {
assert(pat.type == .not, "Expect CHECK-NOT!")
guard let (Pos, _)/*(Pos, MatchLen)*/ = pat.match(buffer, variableTable) else {
continue
}
buffer.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc = CheckLoc.inBuffer(buf.baseAddress!.advanced(by: Pos), buf)
diagnose(.error, loc, self.prefix + "-NOT: string occurred!")
}
diagnose(.note, pat.patternLoc, self.prefix + "-NOT: pattern specified here")
return true
}
return false
}
/// Match "dag strings" and their mixed "not strings".
func checkDAG(_ buffer : String, _ variableTable : BoxedTable) -> Int? {
var notStrings = [Pattern]()
if dagNotStrings.isEmpty {
return 0
}
var lastPos = 0
var startPos = lastPos
for pattern in self.dagNotStrings {
assert((pattern.type == .dag || pattern.type == .not), "Invalid CHECK-DAG or CHECK-NOT!")
if pattern.type == .not {
notStrings.append(pattern)
continue
}
assert((pattern.type == .dag), "Expect CHECK-DAG!")
// CHECK-DAG always matches from the start.
let matchBuffer = buffer.substring(from: buffer.index(buffer.startIndex, offsetBy: startPos))
// With a group of CHECK-DAGs, a single mismatching means the match on
// that group of CHECK-DAGs fails immediately.
guard let t = pattern.match(matchBuffer, variableTable) else {
// PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable)
return nil
}
var matchPos = t.0
let matchLen = t.1
// Re-calc it as the offset relative to the start of the original string.
matchPos += startPos
if !notStrings.isEmpty {
if matchPos < lastPos {
// Reordered?
buffer.cString(using: .utf8)?.withUnsafeBufferPointer { buf in
let loc1 = CheckLoc.inBuffer(buf.baseAddress!.advanced(by: matchPos), buf)
diagnose(.error, loc1, prefix + "-DAG: found a match of CHECK-DAG reordering across a CHECK-NOT")
let loc2 = CheckLoc.inBuffer(buf.baseAddress!.advanced(by: lastPos), buf)
diagnose(.note, loc2, prefix + "-DAG: the farthest match of CHECK-DAG is found here")
}
diagnose(.note, notStrings[0].patternLoc, prefix + "-NOT: the crossed pattern specified here")
diagnose(.note, pattern.patternLoc, prefix + "-DAG: the reordered pattern specified here")
return nil
}
// All subsequent CHECK-DAGs should be matched from the farthest
// position of all precedent CHECK-DAGs (including this one.)
startPos = lastPos
// If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
// CHECK-DAG, verify that there's no 'not' strings occurred in that
// region.
let skippedRegion = buffer.substring(
with: Range<String.Index>(
uncheckedBounds: (
buffer.index(buffer.startIndex, offsetBy: lastPos),
buffer.index(buffer.startIndex, offsetBy: matchPos)
)
)
)
if self.checkNot(skippedRegion, notStrings, variableTable) {
return nil
}
// Clear "not strings".
notStrings.removeAll()
}
// Update the last position with CHECK-DAG matches.
lastPos = max(matchPos + matchLen, lastPos)
}
return lastPos
}
}
private enum DiagnosticKind {
case error
case warning
case note
}
private func diagnose(_ kind : DiagnosticKind, _ loc : CheckLoc, _ message : String) {
print(message)
let msg = loc.message
if !msg.isEmpty {
print(msg)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment