Skip to content

Instantly share code, notes, and snippets.

@yorek
Last active January 23, 2024 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yorek/3a39fa663a6ea74a7fe108100881fa75 to your computer and use it in GitHub Desktop.
Save yorek/3a39fa663a6ea74a7fe108100881fa75 to your computer and use it in GitHub Desktop.
Read FVECS and IVECS file and bulk load into MSSQL
private static void LoadFiles(string test)
{
LoadFile(LoadFVECS, $"c:\\Temp\\vector\\{test}\\{test}_base.fvecs", $"{test}_base", num:1000000, dim:128);
LoadFile(LoadFVECS, $"c:\\Temp\\vector\\{test}\\{test}_query.fvecs", $"{test}_query", num:10000, dim:128);
LoadFile(LoadIVECS, $"c:\\Temp\\vector\\{test}\\{test}_groundtruth.ivecs", $"{test}_groundtruth", num:10000, dim:100);
}
private static void LoadFile(Func<BinaryReader, int, string> FileLoader, string file, string tablename, int num, int dim)
{
Console.WriteLine($"Reading file {file}...");
Console.WriteLine($"num: {num}, dim: {dim}");
using var conn = new SqlConnection(_connectionString);
conn.Open();
using var bulk = new SqlBulkCopy(conn);
bulk.DestinationTableName = tablename;
var dt = CreateSiftDataTable();
using FileStream fs = new(file, FileMode.Open);
using BinaryReader br = new(fs);
for (int i = 1; i <= num; i++)
{
var d = br.ReadInt32();
if (d != dim)
throw new Exception($"Dimension mismatch: {d} != {dim}");
var data = FileLoader(br, dim);
dt.Rows.Add(i, data);
if (i % 1000 == 0)
{
Console.WriteLine($"Writing {i} rows...");
bulk.WriteToServer(dt);
dt.Clear();
}
}
if (dt.Rows.Count > 0)
{
Console.WriteLine($"Writing {dt.Rows.Count} rows...");
bulk.WriteToServer(dt);
dt.Clear();
}
fs.Close();
conn.Close();
Console.WriteLine("Done!");
}
private static DataTable CreateSiftDataTable()
{
DataTable dt = new();
dt.Columns.Add("id", typeof(int));
dt.Columns.Add("jsonvector", typeof(string));
return dt;
}
private static string LoadFVECS(BinaryReader br, int dim)
{
var data = new float[dim];
for (int j = 0; j < dim; j++)
{
data[j] = br.ReadSingle();
}
return "[" + string.Join(",", data) + "]";
}
private static string LoadIVECS(BinaryReader br, int dim)
{
var data = new int[dim];
for (int j = 0; j < dim; j++)
{
data[j] = br.ReadInt32();
}
return "[" + string.Join(",", data) + "]";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment