michael-simons/compare_query_and_schema.java

## compare_query_and_schema.java
///usr/bin/env jbang "$0" "$@" ; exit $?
//JAVA 17
//DEPS info.picocli:picocli:4.7.5
//DEPS info.picocli:picocli-codegen:4.7.5
//DEPS org.neo4j:neo4j-cypher-dsl-parser:2023.9.7
//DEPS com.opencsv:opencsv:5.9

import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.neo4j.cypherdsl.core.StatementCatalog;
import org.neo4j.cypherdsl.parser.CypherParser;

import com.opencsv.CSVReaderBuilder;
import picocli.CommandLine;

/**
 * Run with
 * <pre>
 * {@code
 *    jbang compare_query_and_schema.java \
 *      --queries ~/tmp/text2cypher_gpt4turbo.csv \
 *      --schemas ~/tmp/text2cypher_schemas.csv
 * }
 * </pre>
 */
@CommandLine.Command(mixinStandardHelpOptions = true)
public class compare_query_and_schema implements Callable<Integer> {

	@CommandLine.Option(names = "--schemas", required = true)
	private Path schemaPath;

	@CommandLine.Option(names = "--queries", required = true)
	private Path queries;

	public static void main(String... args) {

		int exitCode = new CommandLine(new compare_query_and_schema()).execute(args);
		System.exit(exitCode);
	}

	record Schema(Map<String, Set<String>> nodeProperties, Map<String, Set<String>> relationshipProperties) {
	}

	static class SchemaParser {

		private Map<String, Set<String>> target = null;
		private String currentEntity = null;
		private Set<String> currentProperties = null;

		private void checkEntityState() {
			if (currentEntity == null || currentProperties == null) {
				return;
			}

			target.put(currentEntity, currentProperties);
			currentProperties = null;
			currentEntity = null;
		}

		Schema parse(String in) {
			var nodeProperties = new HashMap<String, Set<String>>();
			var relationshipProperties = new HashMap<String, Set<String>>();
			var relPattern = Pattern.compile("\\(.*\\)<?-\\[:(.*)]->?\\(.*\\)");
			Matcher relMatcher;

			try (var scanner = new Scanner(in)) {
				while (scanner.hasNextLine()) {
					var line = scanner.nextLine();
					if ("Node properties:".equals(line)) {
						checkEntityState();
						target = nodeProperties;
					} else if ("Relationship properties:".equals(line)) {
						checkEntityState();
						target = relationshipProperties;
					} else if ("The relationships:".equals(line)) {
						checkEntityState();
						target = null;
					} else if (line.startsWith("- **") && target != null) {
						checkEntityState();
						currentEntity = line.substring(4, line.lastIndexOf("**"));
						currentProperties = new HashSet<>();
					} else if (line.startsWith("  - `") && currentProperties != null) {
						currentProperties.add(line.substring(line.indexOf("`"), line.indexOf(":")).replace("`", ""));
					} else if ((relMatcher = relPattern.matcher(line)).matches()) {
						if (!relationshipProperties.containsKey(relMatcher.group(1))) {
							relationshipProperties.put(relMatcher.group(1), new HashSet<>());
						}
					}
				}
			}
			return new Schema(nodeProperties, relationshipProperties);
		}
	}


	@Override
	public Integer call() throws Exception {

		var schemas = new HashMap<String, Schema>();
		var schemaParser = new SchemaParser();

		try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(schemaPath))).withSkipLines(1).build()) {
			String[] nextRecord;
			while ((nextRecord = csvReader.readNext()) != null) {
				schemas.put(nextRecord[0], schemaParser.parse(nextRecord[1]));
			}
		}

		int currentLine = 0;
		try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(queries))).withSkipLines(1).build()) {
			String[] nextRecord;
			while ((nextRecord = csvReader.readNext()) != null) {
				++currentLine;
				var database = nextRecord[3];
				var cypher = nextRecord[1];
				var schema = schemas.get(database);
				try {
					var catalog = CypherParser.parse(nextRecord[1]).getCatalog();

					var allLabels = catalog.getNodeLabels().stream().map(StatementCatalog.Token::value);
					var allTypes = catalog.getRelationshipTypes().stream().map(StatementCatalog.Token::value);

					var labelsNotInSchema = allLabels
						.filter(Predicate.not(schema.nodeProperties::containsKey))
						.toList();
					var typesNotInSchema = allTypes
						.filter(Predicate.not(schema.relationshipProperties::containsKey))
						.toList();

					var propertiesNotInSchema = new HashSet<String>();
					catalog.getProperties().forEach(property -> {
						property.owningToken().forEach(token -> {
							Set<String> properties = switch (token.type()) {
								case NODE_LABEL -> schema.nodeProperties.getOrDefault(token.value(), Set.of());
								case RELATIONSHIP_TYPE ->
									schema.relationshipProperties.getOrDefault(token.value(), Set.of());
							};
							if (!properties.contains(property.name())) {
								propertiesNotInSchema.add(token.value() + "." + property.name());
							}
						});
					});

					if (!labelsNotInSchema.isEmpty()) {
						System.out.printf("Line %d, database %s, additional labels generated for '%s': %s%n", currentLine, database, nextRecord[0], labelsNotInSchema);
					}
					if (!typesNotInSchema.isEmpty()) {
						System.out.printf("Line %d, database %s, additional types generated for '%s': %s%n", currentLine, database, nextRecord[0], typesNotInSchema);
					}
					if (!propertiesNotInSchema.isEmpty()) {
						System.out.printf("Line %d, database %s, additional properties generated for '%s': %s%n", currentLine, database, nextRecord[0], propertiesNotInSchema);
					}
				} catch (Exception e) {
					System.out.printf("Line %d, database %s, invalid Cypher generated for '%s': %s%n", currentLine, database, nextRecord[0], cypher);
				}
			}
		}

		return 0;
	}
}
	///usr/bin/env jbang "$0" "$@" ; exit $?
	//JAVA 17
	//DEPS info.picocli:picocli:4.7.5
	//DEPS info.picocli:picocli-codegen:4.7.5
	//DEPS org.neo4j:neo4j-cypher-dsl-parser:2023.9.7
	//DEPS com.opencsv:opencsv:5.9

	import java.io.InputStreamReader;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Map;
	import java.util.Scanner;
	import java.util.Set;
	import java.util.concurrent.Callable;
	import java.util.function.Predicate;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import org.neo4j.cypherdsl.core.StatementCatalog;
	import org.neo4j.cypherdsl.parser.CypherParser;

	import com.opencsv.CSVReaderBuilder;
	import picocli.CommandLine;

	/**
	* Run with
	* <pre>
	* {@code
	* jbang compare_query_and_schema.java \
	* --queries ~/tmp/text2cypher_gpt4turbo.csv \
	* --schemas ~/tmp/text2cypher_schemas.csv
	* }
	* </pre>
	*/
	@CommandLine.Command(mixinStandardHelpOptions = true)
	public class compare_query_and_schema implements Callable<Integer> {

	@CommandLine.Option(names = "--schemas", required = true)
	private Path schemaPath;

	@CommandLine.Option(names = "--queries", required = true)
	private Path queries;

	public static void main(String... args) {

	int exitCode = new CommandLine(new compare_query_and_schema()).execute(args);
	System.exit(exitCode);
	}

	record Schema(Map<String, Set<String>> nodeProperties, Map<String, Set<String>> relationshipProperties) {
	}

	static class SchemaParser {

	private Map<String, Set<String>> target = null;
	private String currentEntity = null;
	private Set<String> currentProperties = null;

	private void checkEntityState() {
	if (currentEntity == null \|\| currentProperties == null) {
	return;
	}

	target.put(currentEntity, currentProperties);
	currentProperties = null;
	currentEntity = null;
	}

	Schema parse(String in) {
	var nodeProperties = new HashMap<String, Set<String>>();
	var relationshipProperties = new HashMap<String, Set<String>>();
	var relPattern = Pattern.compile("\\(.\\)<?-\\[:(.)]->?\\(.*\\)");
	Matcher relMatcher;

	try (var scanner = new Scanner(in)) {
	while (scanner.hasNextLine()) {
	var line = scanner.nextLine();
	if ("Node properties:".equals(line)) {
	checkEntityState();
	target = nodeProperties;
	} else if ("Relationship properties:".equals(line)) {
	checkEntityState();
	target = relationshipProperties;
	} else if ("The relationships:".equals(line)) {
	checkEntityState();
	target = null;
	} else if (line.startsWith("- **") && target != null) {
	checkEntityState();
	currentEntity = line.substring(4, line.lastIndexOf("**"));
	currentProperties = new HashSet<>();
	} else if (line.startsWith(" - `") && currentProperties != null) {
	currentProperties.add(line.substring(line.indexOf("`"), line.indexOf(":")).replace("`", ""));
	} else if ((relMatcher = relPattern.matcher(line)).matches()) {
	if (!relationshipProperties.containsKey(relMatcher.group(1))) {
	relationshipProperties.put(relMatcher.group(1), new HashSet<>());
	}
	}
	}
	}
	return new Schema(nodeProperties, relationshipProperties);
	}
	}


	@Override
	public Integer call() throws Exception {

	var schemas = new HashMap<String, Schema>();
	var schemaParser = new SchemaParser();

	try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(schemaPath))).withSkipLines(1).build()) {
	String[] nextRecord;
	while ((nextRecord = csvReader.readNext()) != null) {
	schemas.put(nextRecord[0], schemaParser.parse(nextRecord[1]));
	}
	}

	int currentLine = 0;
	try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(queries))).withSkipLines(1).build()) {
	String[] nextRecord;
	while ((nextRecord = csvReader.readNext()) != null) {
	++currentLine;
	var database = nextRecord[3];
	var cypher = nextRecord[1];
	var schema = schemas.get(database);
	try {
	var catalog = CypherParser.parse(nextRecord[1]).getCatalog();

	var allLabels = catalog.getNodeLabels().stream().map(StatementCatalog.Token::value);
	var allTypes = catalog.getRelationshipTypes().stream().map(StatementCatalog.Token::value);

	var labelsNotInSchema = allLabels
	.filter(Predicate.not(schema.nodeProperties::containsKey))
	.toList();
	var typesNotInSchema = allTypes
	.filter(Predicate.not(schema.relationshipProperties::containsKey))
	.toList();

	var propertiesNotInSchema = new HashSet<String>();
	catalog.getProperties().forEach(property -> {
	property.owningToken().forEach(token -> {
	Set<String> properties = switch (token.type()) {
	case NODE_LABEL -> schema.nodeProperties.getOrDefault(token.value(), Set.of());
	case RELATIONSHIP_TYPE ->
	schema.relationshipProperties.getOrDefault(token.value(), Set.of());
	};
	if (!properties.contains(property.name())) {
	propertiesNotInSchema.add(token.value() + "." + property.name());
	}
	});
	});

	if (!labelsNotInSchema.isEmpty()) {
	System.out.printf("Line %d, database %s, additional labels generated for '%s': %s%n", currentLine, database, nextRecord[0], labelsNotInSchema);
	}
	if (!typesNotInSchema.isEmpty()) {
	System.out.printf("Line %d, database %s, additional types generated for '%s': %s%n", currentLine, database, nextRecord[0], typesNotInSchema);
	}
	if (!propertiesNotInSchema.isEmpty()) {
	System.out.printf("Line %d, database %s, additional properties generated for '%s': %s%n", currentLine, database, nextRecord[0], propertiesNotInSchema);
	}
	} catch (Exception e) {
	System.out.printf("Line %d, database %s, invalid Cypher generated for '%s': %s%n", currentLine, database, nextRecord[0], cypher);
	}
	}
	}

	return 0;
	}
	}