Skip to content

Instantly share code, notes, and snippets.

@michael-simons
Created May 1, 2024 17:27
Show Gist options
  • Save michael-simons/277ceb4c5df241f532ebffa71a24b5b2 to your computer and use it in GitHub Desktop.
Save michael-simons/277ceb4c5df241f532ebffa71a24b5b2 to your computer and use it in GitHub Desktop.
///usr/bin/env jbang "$0" "$@" ; exit $?
//JAVA 17
//DEPS info.picocli:picocli:4.7.5
//DEPS info.picocli:picocli-codegen:4.7.5
//DEPS org.neo4j:neo4j-cypher-dsl-parser:2023.9.7
//DEPS com.opencsv:opencsv:5.9
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.neo4j.cypherdsl.core.StatementCatalog;
import org.neo4j.cypherdsl.parser.CypherParser;
import com.opencsv.CSVReaderBuilder;
import picocli.CommandLine;
/**
* Run with
* <pre>
* {@code
* jbang compare_query_and_schema.java \
* --queries ~/tmp/text2cypher_gpt4turbo.csv \
* --schemas ~/tmp/text2cypher_schemas.csv
* }
* </pre>
*/
@CommandLine.Command(mixinStandardHelpOptions = true)
public class compare_query_and_schema implements Callable<Integer> {
@CommandLine.Option(names = "--schemas", required = true)
private Path schemaPath;
@CommandLine.Option(names = "--queries", required = true)
private Path queries;
public static void main(String... args) {
int exitCode = new CommandLine(new compare_query_and_schema()).execute(args);
System.exit(exitCode);
}
record Schema(Map<String, Set<String>> nodeProperties, Map<String, Set<String>> relationshipProperties) {
}
static class SchemaParser {
private Map<String, Set<String>> target = null;
private String currentEntity = null;
private Set<String> currentProperties = null;
private void checkEntityState() {
if (currentEntity == null || currentProperties == null) {
return;
}
target.put(currentEntity, currentProperties);
currentProperties = null;
currentEntity = null;
}
Schema parse(String in) {
var nodeProperties = new HashMap<String, Set<String>>();
var relationshipProperties = new HashMap<String, Set<String>>();
var relPattern = Pattern.compile("\\(.*\\)<?-\\[:(.*)]->?\\(.*\\)");
Matcher relMatcher;
try (var scanner = new Scanner(in)) {
while (scanner.hasNextLine()) {
var line = scanner.nextLine();
if ("Node properties:".equals(line)) {
checkEntityState();
target = nodeProperties;
} else if ("Relationship properties:".equals(line)) {
checkEntityState();
target = relationshipProperties;
} else if ("The relationships:".equals(line)) {
checkEntityState();
target = null;
} else if (line.startsWith("- **") && target != null) {
checkEntityState();
currentEntity = line.substring(4, line.lastIndexOf("**"));
currentProperties = new HashSet<>();
} else if (line.startsWith(" - `") && currentProperties != null) {
currentProperties.add(line.substring(line.indexOf("`"), line.indexOf(":")).replace("`", ""));
} else if ((relMatcher = relPattern.matcher(line)).matches()) {
if (!relationshipProperties.containsKey(relMatcher.group(1))) {
relationshipProperties.put(relMatcher.group(1), new HashSet<>());
}
}
}
}
return new Schema(nodeProperties, relationshipProperties);
}
}
@Override
public Integer call() throws Exception {
var schemas = new HashMap<String, Schema>();
var schemaParser = new SchemaParser();
try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(schemaPath))).withSkipLines(1).build()) {
String[] nextRecord;
while ((nextRecord = csvReader.readNext()) != null) {
schemas.put(nextRecord[0], schemaParser.parse(nextRecord[1]));
}
}
int currentLine = 0;
try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(queries))).withSkipLines(1).build()) {
String[] nextRecord;
while ((nextRecord = csvReader.readNext()) != null) {
++currentLine;
var database = nextRecord[3];
var cypher = nextRecord[1];
var schema = schemas.get(database);
try {
var catalog = CypherParser.parse(nextRecord[1]).getCatalog();
var allLabels = catalog.getNodeLabels().stream().map(StatementCatalog.Token::value);
var allTypes = catalog.getRelationshipTypes().stream().map(StatementCatalog.Token::value);
var labelsNotInSchema = allLabels
.filter(Predicate.not(schema.nodeProperties::containsKey))
.toList();
var typesNotInSchema = allTypes
.filter(Predicate.not(schema.relationshipProperties::containsKey))
.toList();
var propertiesNotInSchema = new HashSet<String>();
catalog.getProperties().forEach(property -> {
property.owningToken().forEach(token -> {
Set<String> properties = switch (token.type()) {
case NODE_LABEL -> schema.nodeProperties.getOrDefault(token.value(), Set.of());
case RELATIONSHIP_TYPE ->
schema.relationshipProperties.getOrDefault(token.value(), Set.of());
};
if (!properties.contains(property.name())) {
propertiesNotInSchema.add(token.value() + "." + property.name());
}
});
});
if (!labelsNotInSchema.isEmpty()) {
System.out.printf("Line %d, database %s, additional labels generated for '%s': %s%n", currentLine, database, nextRecord[0], labelsNotInSchema);
}
if (!typesNotInSchema.isEmpty()) {
System.out.printf("Line %d, database %s, additional types generated for '%s': %s%n", currentLine, database, nextRecord[0], typesNotInSchema);
}
if (!propertiesNotInSchema.isEmpty()) {
System.out.printf("Line %d, database %s, additional properties generated for '%s': %s%n", currentLine, database, nextRecord[0], propertiesNotInSchema);
}
} catch (Exception e) {
System.out.printf("Line %d, database %s, invalid Cypher generated for '%s': %s%n", currentLine, database, nextRecord[0], cypher);
}
}
}
return 0;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment