Skip to content

Instantly share code, notes, and snippets.

@thomasjslone
Last active July 25, 2024 16:33
Show Gist options
  • Save thomasjslone/784062d2716e07842aa224eff9402595 to your computer and use it in GitHub Desktop.
Save thomasjslone/784062d2716e07842aa224eff9402595 to your computer and use it in GitHub Desktop.
redid my old dictionary database with efficiency in mind
#@# dictionarydatabank.rb - version 1.0 tested on 2024.07.26 - thomas j slone
## a sort of database, designed for dictionary builders, store billions of words by dividing massive arrays into bank files of limited size.
## final todos
##
## enforce add string size limit
## add a config file for banks so when reloading bank and string sizes dont have to be given only when making a bank for the first time
## add methods for geting and setting size limits and possibly renaming bank though that literally requires moving the entire directory to rename it
class Bank
def initialize *args # dir, name, banksize, entrysize, save_frequency
##validate init arguments
if args[0].to_s.length < 1 ; raise "ARGUEMENT ERROR: args[0] dir is not a valid string." ; end
if args[1].to_s.length < 1 ; raise "ARGUEMENT ERROR: args[1] name is not a valid string." ; end ##we could allow name to be nil it wont affect dir path other than there being two slashes
if args.length > 2
if args[2].is_a?(Integer) == false ; raise "ARGUEMENT ERROR: args[2]: banksize is not an integer."
else ; if args[2] < 99 ; raise "ARGUEMENT ERROR: args[2] banksize: must be larger than 98." ; end
end
if args[3].is_a?(Integer) == false ; raise "ARGUEMENT ERROR: args[3]: entrysize is not an integer."
else ; if args[3] < 1 ; raise "ARGUEMENT ERROR: args[3] entrysize: must be larger than 0." ; end
end
if args[4].is_a?(Integer) == false ; raise "ARGUEMENT ERROR: args[4]: savefrequency must be an integer."
else ; if args[4] < 1 ; raise "ARGUEMENT ERROR: args[4] must be larger than 0."
end
end
##check directory
@dir = args[0].to_s+"/"+args[1].to_s
if File.directory?(@dir) == false
begin ; Dir.mkdir(@dir)
rescue ; raise "ERROR: UNABLE TO MAKE DIRECTORY: " + @dir
end
end
##set banksize if provided
if args[2] == nil
@bank_size_limit = 1000000 ##bytes
else
@bank_size_limit = args[1].to_i
end
##set entrysize if provided
if args[3] == nil
@entry_size_limit = 200
else
@entry_size_limit = args[3].to_i
end
##set savefrequency if provided
if args[4] == nil
@save_frequency = 1000
else
@save_frequency = args[4]
end
## possibly add options for file encoding and illegal/legal characters
@bank = nil
@bank_data = []
@bank_size = 0
@bank_seperator = "\n"
@unsaved_changes = 0
## we do not use file extensions making referencing names in @banks using their index number simple
@banks = Dir.entries(@dir) ; @banks.delete(".") ; @banks.delete("..")
end
## still havent perfected checking size bank would be before/after adding, right now we take into account the size after
## which would cause a new bank to be created while the current one would still have space and be considered open
## one solution to this could be optimising banks(search later banks for words that will fit in earlier banks to make them full)
def add(string) ##add a string to bank
if @bank_size >= @bank_size_limit and (@bank_size+string.to_s.length > @bank_size_limit) ## we might only need to check the later
self.load_open_bank
end
@bank_data << string.to_s
@bank_size += string.length
@unsaved_changes +=1
return true
end
def remove(string)
if @bank_data.delete(string.to_s) == nil ; return false
else ; @unsaved_changes += 1 ; return true
end
end
###############################################################
###############################################################
## THIS METHOD IS INCOMPLETE
def bank_add(bank,string)
if @banks.include?(bank.to_s)
begin
size = File.size(@dir+"/"+bank.to_s)
size += string.to_s.length
if size > @bank ;;end
########### figure out which one to check, if we only check one banks might go over, if we only check the second one they might never get full
rescue
raise "ERROR while accessing bank file: " + bank.to_s
end
else
raise "ARGUEMENT ERROR: no such bank: "+bank.to_s
end
end
###############################################################
###############################################################
## remove string from specified bank
def bank_remove(bank,string)
if @banks.include?(bank.to_s)
begin
f = File.open(@dir+"/"+bank,"r") ; dat = f.read.split(@bank_seperator) ; f.close
if dat.delete(string.to_s) == nil ; return false
else
f = File.open(@dir+"/"+bank.to_s,"w") ; f.write(dat.join(@bank_seperator)) ; f.close
return true
end
rescue
raise "ERROR accessing bank file: " + bank.to_s
end
else
raise "ARGUEMENT ERROR: no such bank: "+bank.to_s
end
end
## save loaded bank to file
def save_bank
if @bank == nil ; raise "Unable to save bank." ; end
begin
f = File.open(@dir+"/"+@bank.to_s,"w")
f.write(@bank_data.join(@bank_seperator))
f.close
@unsaved_changes = 0
return true
rescue
raise "ERROR occured while saving bank: " + @bank.to_s
end
end
##load specified bank
def load_bank(bank)
if File.file?(@dir+"/"+bank) == false
return false
end
begin
@bank = bank
f = File.open(@dir+"/"+@bank.to_s,"r")
dat = f.read
f.close
@bank_data = dat.split(@bank_seperator) ; f.close
@bank_size = dat.length
@unsaved_changes = 0
return @bank
rescue
raise "ERROR: Unable to load bank data: " + bank.to_s
end
end
## unload current bank from memory, use if working with multiple massive banks
def unload_bank
@bank = nil
@bank_data = []
@bank_size = 0
@unsaved_changes = false
return true
end
## find a bank that isnt full and load it, create one if all or full or none exist
def load_open_bank
banks = Dir.entries(@dir) ; banks.delete(".") ; banks.delete("..")
if banks.length == 0 ## no banks exist create one
f = File.open(@dir+"/0","w"); f.close
@banks << "0"
self.load_bank("0")
else
##remove current bank from search list if loaded and full
if @bank != nil
if @bank_size >= @bank_size_limit ; banks.delete(@bank) ; end
end
open = false
banks.each do |b|
if File.size(@dir+"/"+b.to_s) < @bank_size_limit ## found open bank
open = b.split(".")[0] ; break
end
end
if open != false ## load found bank
self.load_bank(open)
else ## no open banks found create one
i = banks[-1].to_i ; i = i + 1
f = File.open(@dir+"/"+i.to_s,"w") ; f.close
@banks << i.to_s
self.load_bank(i.to_s)
end
end
end
##mostly unused because this is already implemented in load open bank but hey, dont say i didnt give you the option
def new_bank
banks = Dir.entries(@dir) ; banks.delete(".") ; banks.delete("..")
if banks.length == 0 ; i = 0
else ; i = banks[-1].to_i ; i = i + 1
end
f = File.open(@dir+"/"+i.to_s,"w") ; f.close
@banks << i.to_s
return i
end
## delete a specified bank
def delete_bank(bank)
if @banks.include?(bank) and File.file?(@dir+"/"+bank)
begin ##a way to improve this later would be to check file permissions instead of just trying to delete then cleaning memory if successfull
File.delete(@dir+"/"+bank)
if @bank.to_s == bank.to_s ; self.unload_bank ; end ##if bank to be deleted is loaded, unload it
@banks.delete(bank)
return true
rescue
raise "ERROR: Unable to delete bank file: "+bank.to_s
end
else
raise "ARGUEMENT ERROR: No such bank: "+bank.to_s
end
end
##delete everything inside a bank but not the bank file
def empty_bank(bank)
if @bank == bank.to_s ##empty loaded bank and save
@bank_data = []
@bank_size = 0
self.save_bank
return true
elsif @banks.include?(bank.to_s) ##check bank files and empty the specified one
begin
f = File.open(@dir+"/"+bank.to_s,"w") ; f.write("") ; f.close
rescue
raise "ERROR accessing bank file: "+bank.to_s
end
else
raise "ARGUEMENT ERROR: No such bank: "+bank.to_s
end
end
##check loaded bank for string
def include(string)
return @bank_data.include?(string)
end
##check specified bank for string
def bank_include(bank,string)
if @banks.include?(bank.to_s)
found = false
begin
f = File.open(@dir+"/"+bank.to_s,"r") ; found = f.read.split(@bank_seperator).include?(string.to_s) ; f.close
rescue
raise "ERROR accessing bank: "+bank.to_s
end
return found
else
raise "ARGUEMENT ERROR, no such bank: "+bank.to_s
end
end
## check all bank files for string, returns false if not found or an array of banks string was found in
def banks_include(string)
found = []
if @banks.length == 0 ; return false ; end ## no bank files, abort
banks = @banks
if self.include(string.to_s) ##check loaded bank first, if none loaded doesnt matter will just move on
found << @bank
banks.delete(@bank)
end
if banks.length != 0
banks.each do |b|
if self.bank_include(string.to_s)
found << b
end
end
end
if found.length == 0 ; return false
else ; return found
end
end
## search loaded bank and return index of string, return an array of all instances found
def search(string)
if @bank_data.include?(string)
found = []
@bank_data.each do |i|
if i == string ; found << @bank_data.index(i) ; end
end
return found
else
return false
end
end
## search specified bank and return an array of indexes for all instances found or false if none
def search_bank(bank,string)
if File.file?(@dir+"/"+bank)
found = []
f = File.open(@dir + "/" + bank,"r") ; dat = f.read.split(@bank_seperator) ; f.close
dat.each do |i|
if i == string ; found << dat.index(string) ; end
end
if found.length == 0 ; return false
else ; return found
end
else
raise "No such bank file: " + bank.to_s
end
end
## search all banks for string, return an array of banks and indexes of all instances found or false if none
def search_banks(string)
found = []
banks = @banks
## check current bank first if loaded
if @bank != nil and @bank_data.length > 0
@bank_data.each do |i|
if i == string ; found << [@bank,@bank_data.index(string)] ; end
end
banks.delete(@bank)
end
## check remaining banks
if banks.length > 0
banks.each do |b|
f = File.open(@dir+"/"b,"r") ; dat = f.read.split(@bank_seperator) ; f.close
dat.each do |i|
if i == string ; found << [b,dat.index[i]]
end
end
end
if found.length == 0 ; return false
else ; return found
end
end
## get size of current bank in bytes, this size does not include final file size with included data seperator
def size
return @bank_size
end
##get size of a bank file, this one does include seperator (bank_data.length-1 * bank_seperator.length) will tell you how many bytes your seperator will add
def bank_size(bank)
if @banks.include?(bank.to_s)
return File.size(@dir+"/"+bank.to_s)
else
raise "ARGUEMENT ERROR no such bank: "+bank.to_s
end
end
## beware if current bank is not saved size may not be accurate to the total file size
def banks_size
if @banks.length == 0 ; return 0 ; end
total = 0
if @bank != nil
total += @bank_size
banks.delete(@bank)
end
if banks.length == 0
return total
else
banks.each do |b|
begin ; total += File.size(@dir+"/"+b)
rescue ; raise "ERROR occured while accessing bank file: "+b.to_s
end
end
return total
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment