Skip to content

Instantly share code, notes, and snippets.

@lotkowskim
lotkowskim / gist:76e8ff265493efd0b2b7175446805a82
Created November 25, 2019 11:31
Proposed InMemoryFileIndex changes
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index dc5c2ff927..9e5511edd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -91,7 +91,12 @@ class InMemoryFileIndex(
val files = listLeafFiles(rootPaths)
cachedLeafFiles =
new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f)
- cachedLeafDirToChildrenFiles = files.toArray.groupBy(_.getPath.getParent)
+ val fullyQualifiedRoots = rootPaths.map(rootPath => rootPath.getFileSystem(hadoopConf).makeQualified(rootPath))