Last active
January 23, 2024 16:48
-
-
Save yorek/3a39fa663a6ea74a7fe108100881fa75 to your computer and use it in GitHub Desktop.
Read FVECS and IVECS file and bulk load into MSSQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static void LoadFiles(string test) | |
{ | |
LoadFile(LoadFVECS, $"c:\\Temp\\vector\\{test}\\{test}_base.fvecs", $"{test}_base", num:1000000, dim:128); | |
LoadFile(LoadFVECS, $"c:\\Temp\\vector\\{test}\\{test}_query.fvecs", $"{test}_query", num:10000, dim:128); | |
LoadFile(LoadIVECS, $"c:\\Temp\\vector\\{test}\\{test}_groundtruth.ivecs", $"{test}_groundtruth", num:10000, dim:100); | |
} | |
private static void LoadFile(Func<BinaryReader, int, string> FileLoader, string file, string tablename, int num, int dim) | |
{ | |
Console.WriteLine($"Reading file {file}..."); | |
Console.WriteLine($"num: {num}, dim: {dim}"); | |
using var conn = new SqlConnection(_connectionString); | |
conn.Open(); | |
using var bulk = new SqlBulkCopy(conn); | |
bulk.DestinationTableName = tablename; | |
var dt = CreateSiftDataTable(); | |
using FileStream fs = new(file, FileMode.Open); | |
using BinaryReader br = new(fs); | |
for (int i = 1; i <= num; i++) | |
{ | |
var d = br.ReadInt32(); | |
if (d != dim) | |
throw new Exception($"Dimension mismatch: {d} != {dim}"); | |
var data = FileLoader(br, dim); | |
dt.Rows.Add(i, data); | |
if (i % 1000 == 0) | |
{ | |
Console.WriteLine($"Writing {i} rows..."); | |
bulk.WriteToServer(dt); | |
dt.Clear(); | |
} | |
} | |
if (dt.Rows.Count > 0) | |
{ | |
Console.WriteLine($"Writing {dt.Rows.Count} rows..."); | |
bulk.WriteToServer(dt); | |
dt.Clear(); | |
} | |
fs.Close(); | |
conn.Close(); | |
Console.WriteLine("Done!"); | |
} | |
private static DataTable CreateSiftDataTable() | |
{ | |
DataTable dt = new(); | |
dt.Columns.Add("id", typeof(int)); | |
dt.Columns.Add("jsonvector", typeof(string)); | |
return dt; | |
} | |
private static string LoadFVECS(BinaryReader br, int dim) | |
{ | |
var data = new float[dim]; | |
for (int j = 0; j < dim; j++) | |
{ | |
data[j] = br.ReadSingle(); | |
} | |
return "[" + string.Join(",", data) + "]"; | |
} | |
private static string LoadIVECS(BinaryReader br, int dim) | |
{ | |
var data = new int[dim]; | |
for (int j = 0; j < dim; j++) | |
{ | |
data[j] = br.ReadInt32(); | |
} | |
return "[" + string.Join(",", data) + "]"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment