Skip to content

Instantly share code, notes, and snippets.

@romiras
Created November 28, 2024 22:56
Show Gist options
  • Save romiras/5be1bae44745a53d3dc52476ecd911cb to your computer and use it in GitHub Desktop.
Save romiras/5be1bae44745a53d3dc52476ecd911cb to your computer and use it in GitHub Desktop.
Dev notes 2024 (Ruby)

Dev notes 2024 (Ruby)

Parse query string params

# Rack::Utils.parse_query URI("http://example.com?par=hello&par2=bye").query
params = Rack::Utils.parse_query(q).with_indifferent_access

args = ActiveSupport::HashWithIndifferentAccess.new( {"job_params"=>{})

ActiveRecord

model.find_in_batches(batch_size: 2_000) {|objs| };

objs.pluck(:field1)
ap model.group(:field1).count

# output as raw object
obj.yaml_field1_before_type_cast

arr.each_with_object({}) {|obj, h| h[obj.id] = {} }

sp.limit(10_000).select(:id).find_in_batches {|objs| print '?'; ids = objs.map(&:id); foo(); sleep(1.5)}; p Time.now.utc

Strong parameters

params = ActionController::Parameters.new JSON.parse j

params = ActionController::Parameters.new({
  "user_id" => "123",
  "ref_id" => "foo",
})

NLP

# Ruby NLP: detecting language:
NLP.detect_language(text)

# Geo-location (uses Google API) and extracts radius from distance between bounding-box coordinates
NLP.find_location(location_string) # 'Sydney'

Ruby misc

require_dependency 'some_lib'
require_relative 'pipeline/sp_dbg'

Rails.logger.level = :debug

Timeout.timeout(5)

indexed_docs = docs.index_by(&:status_id)

Unicode

text.unicode_normalize(:nfkd)
text.codepoints.map {|cp| 'U+'+cp.to_s(16)} # => ["U+7b", "U+22", "U+1f620", "U+22", "U+3a", "U+20", "U+22", "U+22", "U+7d"]

Mecab / Japanese

MeCab::Model.new.createTagging

Social media

Facebook-messenger

https://developers.facebook.com/docs/messenger-platform/getting-started/quick-start https://github.com/jgorset/facebook-messenger https://githubmemory.com/repo/jgorset/facebook-messenger/issues?cursor=Y3Vyc29yOnYyOpK5MjAxOS0wNi0yOFQxNjozNjo0OSswODowMM4biGJm&pagination=prev&page=1

HTTParty

args = {foo: 'bar'}
response = HTTParty.get(url, query: args)

Text Embeddings (TE)

response = OpenAI::Client.new.embeddings(parameters: { model: 'text-embedding-3-small', input: ['lazy fox jumps over the brown dog'] })

Regex

text = '@aa @bb Hello W!'
Redact.redact(text, :multiple_handles, :multiple_hashtags).strip

text.scan(regex) { |occurrence| }

IPv4 regex

(?:[0-9]{1,3}\.){3}[0-9]{1,3}

Replace ENV[] with ENV.fetch()

ENV\["(.+?)"\] -> ENV.fetch("$1")

Concurrency

def break?; Rails.cache.fetch('foo-manual', expires_in: 1.minute) { REDIS.llen('foo') < 10 }; end
loop { SomeService.call; puts Time.now.utc; break if break?; sleep 1 }; Rails.cache.delete('foo-manual'); exit

task = Concurrent::TimerTask.new(execution_interval: 0.5.seconds, timeout_interval: 60) do
  break if break?
  SomeService.call
  puts Time.now.utc
end; task.execute
task.shutdown


def run
  loop do
    SomeService.call
    puts Time.now.utc
    break if break?
    sleep 1
  end
end
task = Concurrent::Future.execute { run }; p task.state
yy=<<YYY
---
run_now: true
run_once: true
priority: 33
job_params:
  klass: SomeModel
job_id: 0a04f026-108c-40d9-893e-f02223ec698d
time: '2021-08-04T05:19:38.651+00:00'
YYY

y = ActiveSupport::HashWithIndifferentAccess.new YAML.load (yy); y.class
y = nil; File.open('large_job.yaml', 'rb') {|f| y = ActiveSupport::HashWithIndifferentAccess.new YAML.load (f.read) }; y.class

Benchmark

t = Time.now
# to do something...
p (Time.now-t)

Redis

REDIS = Redis.new(url: ENV['REDIS_URL'], driver: :hiredis, ssl_params: {verify_mode: OpenSSL::SSL::VERIFY_NONE}); REDIS.ping
REDIS.hgetall(key)

REDIS.llen('list')
REDIS.ltrim('list', 0, 200.pred) # leave only first 200 items in a queue, delete a rest
messages = REDIS.lrange 'list', 1_000, 1_000 + 2
m = Oj.load(REDIS.lrange('list', 0, 1).first, symbolize_names: true)
REDIS.rpush('list', msg.to_json)

REDIS.copy('list', 'list' + '-bak') # backup
# REDIS.copy('list' + '-bak', 'list') # restore

# REDIS.sscan_each('set-dbg') {|id| p id; 1/0}

# receiving from Redis (blocking pop)
_, m = REDIS.blpop 'list'; # JSON
msg_rcv = Oj.load m
ap msg_rcv

def self.listen
  Signal.trap("INT") { throw :terminate }
  Signal.trap("TERM") { throw :terminate }

  catch :terminate do
    loop do
      channel, message = REDIS.blpop(CHANNEL) # blocking operation
      raise StandardError, "channel is not #{CHANNEL}" if channel != CHANNEL

      hash = JSON.parse(message)
      callback(hash)
    end
  end

  log 'Gracefully shutdown a listener'
end

Sidekiq Redis

redis = Redis.new(url: ENV['SIDEKIQ_REDIS_URL'], driver: :hiredis, ssl_params: {verify_mode: OpenSSL::SSL::VERIFY_NONE}); redis.ping
# copy HS jobs from backup queue to regular
loop { m = redis.lpop('my_namespace:queue:que-bak'); break if m.nil?; redis.rpush('my_namespace:queue:que', m); sleep(0.005) }

# Rails cache
cache_redis = Redis.new(redis_config = {
  driver: :hiredis,  
  url: ENV['REDIS_CACHE_URL'],  
  namespace: 'cache',  
  pool_size: Integer(ENV['RAILS_MAX_THREADS'] || 5),  
  pool_timeout: 5
}); cache_redis.ping

cache_key = 'cache:'+key
#puts "expires at: #{cache_redis.ttl(cache_key).seconds.from_now}"
puts "expires at: #{Time.now + cache_redis.ttl(cache_key).seconds}"

# clean foo cache
cache_redis.scan_each(:match => "cache:foo:*") {|key| cache_redis.unlink key }; p Time.now.utc

cache_redis.scan_each(:match => "cache:foo*") {|key| puts key if key =~ /bar/ }
cache_redis.exists 'cache:'+['foo', digest].join('/')

# redis.scan_each(:match => "*") {|key| puts key unless key.start_with? 'profiles:' }


# Sidekiq jobs
redis = Redis.new(redis_config = {
  url: ENV.fetch('SIDEKIQ_REDIS_URL', ENV.fetch('REDIS_URL')),
  namespace: "my_namespace",
  driver: :hiredis,
}); redis.ping
redis.llen 'my_namespace:queue:ig'

# other keys
REDIS.info(:memory)
REDIS.memory :usage, 'testkey'
namespaces = Set.new;  REDIS.scan_each(:match => "*") {|key| ns = key.split(':').first; namespaces << ns }; namespaces.size

REDIS.scan_each(match: 'user:*') do |resume_key_name|
  resume_key_name #=> "user:12"
end

REDIS.scan_each(:match => "bp_metrics:*") {|key| puts key; puts REDIS.get key}

# delete keys
pattern = "recorder:*"
cc = 0; REDIS.scan_each(match: pattern){|e| REDIS.del(e); puts e; cc += 1 }; cc

# migrate some keys from one instance to another
redis = Redis.new url: ENV['REDIS_URL'], ssl_params: {verify_mode: OpenSSL::SSL::VERIFY_NONE} # source Redis
pattern = '["socialscrape",1'
redis.scan_each(:match => "*") {|key| puts(key); if key.starts_with?(pattern); val = redis.get(key); REDIS.setex(key, 1.day, val); redis.del(key); end }; :ok

REDIS.scan_each(match: "template*") {|key| p(key) }

def store_file_content(redis_conn, file_pattern)
  Dir.glob("#{file_pattern}*").each do |file|
    content = File.read(file)
    redis_conn.hset('files', file, content)
  end
end

store_file_content(REDIS, 'filename_prefix_'); REDIS.type 'files' #=> "hash"
REDIS.hscan_each('files') {|key, val| p key}
# REDIS.hscan_each('files') {|key, val| File.write("_data/#{key}", val)}

require_relative 'ruby/redis_helper'

CSV

def dump_csv(rows, csv_name)
  CSV.open(csv_name, "wb") do |csv|
    rows.each do |row|
      csv << Array(row)
    end
  end
  :ok
end

def call
  CSV.open(@path, 'ab', write_headers: false, force_quotes: true) do |csv|
    csv << %w[column1 column2]
    row = [123, 'foo']
    csv << row
  end
end

Datadog tracing APM

https://docs.datadoghq.com/tracing/setup_overview/setup/ruby/#resque DataDog/dd-trace-rb#1710

# StatsD - DataDog
# https://docs.datadoghq.com/events/guides/dogstatsd/
# https://docs.datadoghq.com/tracing/runtime_metrics/ruby/
require 'datadog/statsd'
statsd = Datadog::Statsd.new('localhost', 8125)
statsd.event('An error occurred', "Error message", alert_type: 'error', tags: ['env:dev'])

Reset password manually

# by email
acc = Account.find_by(email: email)
acc.reset_perishable_token!
mail(to: acc.email, subject: 'Password reset instructions').deliver_now

# just set a password manually
acc.password = 'secret'
acc.password_confirmation = 'secret'
acc.save!

Misc

dd = Base64.encode64 Zlib::Deflate.deflate obj.to_json # pack
obj = JSON.parse Zlib::Inflate.inflate Base64.decode64 dd # unpack

ActionController::Base.helpers.strip_tags(html)

klass.constantize

"chains:#{chain_name.underscore}".constantize.perform_bulk(ids)

model.select(:id).find_each{|obj| SomeWorker.perform_async(obj.id); sleep(2) }

# try to update in bigger chunks, then smaller
[50, 5].each {|i| model.in_batches(of: i) {|rel| rel.update(stream_id: new_stream_id) rescue nil } }
# then rest update 1 by 1
model.find_each {|sp| sp.update(stream_id: new_stream_id) rescue nil }

CSV.open(name, "wb") do |csv|
  RequestRecorder.get_many(status_id).each{|r| csv << [Time.at(r['time']).to_s(:db), r['payload']['col1'],r['payload']['col2']] }
end

def write_as_log(file_name)
  log = File.open(file_name, 'ab')
  log.sync = true
  begin
    obj = yield
    if obj.is_a?(String)
      log.write(obj)
      log.write("\n")
    else
      obj.each do |item|
        data = item.is_a?(String) ? item : item.to_json
        log.write(data)
        log.write("\n")
      end
    end
  ensure
    log.close
  end
end


class ApplicationRecord
  def self.execute_sql(*sql_array)     
    connection.execute(send(:sanitize_sql_array, sql_array))
  end
end
res = ActiveRecord::Base.connection_pool.with_connection { |con| con.exec_query( "SELECT 1;" ) }

Book.find_by_sql(sql)

def measure(log_msg = 'Processed')
  t = Time.current
  res = yield
  Rails.logger.info "#{log_msg} in #{(Time.current - t).round(3)} s"
  res
end

Runner

require_relative 'report'
bin/rails runner report.rb  > report-`date +'%F-%H%M'`.txt

Twitter

Add -is:retweet -is:reply to exclude irrelevant twwets.

Deserialize/parse/evaluate raw class instance object

raw_object = "#<Facebook::Messenger::Incoming::Message:0x00007f7393cce328 @messaging={\"timestamp\"=>\"1682899233201\", \"sender\"=>{\"id\"=>\"2216381368490362\"}, \"recipient\"=>{\"id\"=>\"178711405567023\"}, \"message\"=>{\"mid\"=>\"m_jXJjmZaz7r0GNJoy9NR1QZjXP7SP3wnJe4JhyMRXnqPJQyK4FbpiNeMar6duIx9y7ufL6a0Ps-EDNzy2XcVqCw\", \"text\"=>\"🎁 PICK A 🎁\", \"quick_reply\"=>{\"payload\"=>\"{\\\"type\\\": \\\"postback\\\", \\\"ts\\\": 1682899227.618464, \\\"return_to_node\\\": false, \\\"silent_ignore\\\": true, \\\"current_blackbox\\\": \\\"4edad8a4-a20e-4eb4-a4e7-3b2e462b3414\\\", \\\"current_context\\\": \\\"c:91d6d85d-4b10-40b5-8bff-984d6c43837b:2023-04-30\\\", \\\"payload\\\": {\\\"payload\\\": \\\"giftCampaigns:93cc46aa-c28d-4287-a729-94af7b8c60ff:1\\\"}}\"}}}>"
=> "#<Facebook::Messenger::Incoming::Message:0x00007f7393cce328 @messaging={\"timestamp\"=>\"1682899233201\", \"sender\"=>{\"id\"=>\"2216381368490362\"}, \"recipient\"=>{\"id\"=>\"178711405567023\"}, \"message\"=>{\"mid\"=>\"m_jXJjmZaz7r0GNJoy9NR1QZjXP7SP3wnJe4JhyMRXnqPJQyK4FbpiNeMar6duIx9y7ufL6a0Ps-EDNzy2XcVqCw\", \"text\"=>\"🎁 PICK A 🎁\", \"quick_reply\"=>{\"payload\"=>\"{\\\"type\\\": \\\"postback\\\", \\\"ts\\\": 1682899227.618464, \\\"return_to_node\\\": false, \\\"silent_ignore\\\": true, \\\"current_blackbox\\\": \\\"4edad8a4-a20e-4eb4-a4e7-3b2e462b3414\\\", \\\"current_context\\\": \\\"c:91d6d85d-4b10-40b5-8bff-984d6c43837b:2023-04-30\\\", \\\"payload\\\": {\\\"payload\\\": \\\"giftCampaigns:93cc46aa-c28d-4287-a729-94af7b8c60ff:1\\\"}}\"}}}>"
hash = eval raw_object[raw_object.index('=')+1..-2]

Build collection from log file

def build_collection_from_log(log_file)
  Enumerator.new do |yielder|
    File.open(log_file, 'rb') do |f|
      f.each_line do |line|
        yielder.yield(line.strip)
      end
    end
  end
end

@coll = build_collection_from_log('index-docs-1.jsonl')
@coll.next # pop item

Write to file

File.open("log_file.jsonl", 'wb') do |f|
  loop do
    f.write(payload)
    f.write("\n")
    break
  end
end

Redis Helper

require_relative 'lib/maintenance/redis/helper'
coll = Maintenance::Redis::Helper.redis_set_values_collection(REDIS, 'rule:non_hidden')
status_id = coll.first

coll = Maintenance::Redis::Helper.redis_pop_list(REDIS, 'ios-jobs-tmp') { |obj| obj }
coll.each { |item|
  args = Oj.load(item, symbolize_names: true).last; p(args)
  Reviews::Ios::UpdateStreamWorker.new.perform(*args)
  sleep 3
}
# coll = ss.map {|obj| args = Oj.load(Oj.load(obj).last.first, symbolize_names: true).last }; coll.first

Replace in column

model.in_batches(of: 500) {|rel| print '.'; rel.update_all("photo_url = REPLACE(photo_url, 'old_host', 'new_host')"); sleep(.05); print '?'}; puts
heroku ps:scale processing_grp2_worker=1 -a pre-prod-affogata

Slack notifications

# curl -X POST --data-urlencode "payload={\"channel\": \"#slack-alerts\", \"username\": \"reporter\", \"text\": \"This is posted to #slack-alerts and comes from a bot named reporter.\", \"icon_emoji\": \":ghost:\"}" https://hooks.slack.com/services/...

def send_to_slack_channel(url:, channel:, username:, text:)
  payload = {
    channel: channel,
    username: username,
    text: text,
    icon_emoji: ":ghost:"
  }

  HTTParty.post(
    url,
    body: { payload: JSON.dump(payload) },
    headers: { 'Content-Type' => 'application/x-www-form-urlencoded' }
  )
end

res = send_to_slack_channel(
  url: "https://hooks.slack.com/services/...",
  channel: "#slack-alerts",
  username: "reporter",
  text: 'Aloha!',
)

Search in ElasticSearch

# ElasticSearch client
client = Elastic::Client.new
client.perform_request('GET', "_cat/indices", {}, {})
client.perform_request('GET', "foo-000001/_mapping", {}, {})
client.perform_request('GET', 'foo-000001/_analyze?pretty', {}, { text: "foo bar, baz, a @fun @and joy", })

res = client.client.perform_request('GET', "#{idx}/_mapping")
puts(idx) if res.body.dig(idx, 'mappings', 'properties', 'embedding_v2').nil?

opts = {service: 'es', region: ENV['AWS_REGION'], access_key_id: ENV['AWS_ACCESS_KEY_ID'], secret_access_key: ENV['AWS_SECRET_ACCESS_KEY']};
@client = Elasticsearch::Client.new(url: ENV.fetch('ELASTICSEARCH_DATA_URL'), log: true, logger: Logger.new($stdout)) {|c| c.request :aws_sigv4, opts};
document_id = "2-17913554644973943"; document_index = 'ig-000001';
@client.get({ index: document_index, id: document_id })

body = { docs: [{_index: document_index, _id: document_id }] }.with_indifferent_access
doc_props = %w[id text language];
body = { docs: [{_index: document_index, _id: document_id, _source: doc_props }] }.with_indifferent_access
# res = @client.mget(body: body)
# res.dig('docs', 0, '_source')
GET /fb/_search?size=2
{
  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "created_at": {
              "lte": "2022-11-10T14:34:00.000Z"
            }
          }
        },
        {
          "range": {
            "created_at": {
              "gte": "2022-11-06T16:25:00.000Z",
              "lte": "2022-11-10T17:25:00.000Z"
            }
          }
        },
        {
          "script": {
            "script": "doc['some_ids'].length > 1"
          }
        }
      ],
      "must_not": [
        {
          "term": {
            "is_deleted": true
          }
        }
      ]
    }
  }
}

Logging to Elastic

client = LogClient.instance;
client.client.transport.logger = Logger.new($stdout)
resp = client.log!(event_type: 'foo', data: { arg1: 'val1' })

ELK

ES KQL

not trace: "LinkedIn" and not trace: "Tiktok" and not trace: "AndroidpublisherV3"

Resque

See https://stackoverflow.com/questions/21904445/how-to-view-a-resque-job

payload = Resque.data_store.pop_from_queue '14';
parsed = Oj.load payload, symbolize_names: true
klass = parsed[:class].safe_constantize
args = parsed.dig(:args, 0)
job_params = args.fetch(:job_params)
klass.run(foo, args)

MySQL

mysql -h host -u root -p

CREATE USER 'appuser'@'%' IDENTIFIED BY 'topsecret';
GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, INDEX, ALTER, CREATE TEMPORARY TABLES, LOCK TABLES ON `mydb`.* TO 'appuser'@'%';
FLUSH privileges;

Sidekiq

bundle exec sidekiq -C config/sidekiq.yml

Marshal

# dump
File.open('obj.dump', 'wb') {|f| f.write Marshal.dump(obj)}
# load
obj = File.open('obj.dump', 'rb') {|f| Marshal.load f}; obj.class

Loading class

module Rd; end
Rd::Status = Class.new(ActiveRecord::Base) { self.table_name = 'rd_statuses'; self.primary_keys = :id, :directly_related; serialize :full_object }
Rd::Status.take

Control YAML serialization in Ruby

def to_yaml_properties
  [:@id]
end

or

def to_yaml_properties
  instance_variables - [:@some, :@another]
end

Credits: https://coderwall.com/p/wav_tg/control-yaml-serialization-in-ruby

See also:

Yaml/memoization/class loading

Print YaML object:

puts obj.full_object_before_type_cast

Psych ~/.local/asdf/installs/ruby/2.7.8/lib/ruby/2.7.0/psych.rb

node = result.children[0].children.find {|c| c.class.name == 'Psych::Nodes::Scalar' && c.value == 'api'}

On deserializaion issues:

https://github.com/affogata/communit_360/pull/2463/files

# https://gist.github.com/romiras/2a50b2368b348da6e25ccb5788067245
module MemoizedCacheSanitizer
  REMOVE_INSTANCE_VARIABLE = :@_memoized_method_cache

  def self.sanitize(obj)
    return obj if obj.nil? || obj.instance_of?(Hash) || obj.instance_of?(Array)

    attributes = obj.instance_variables.each_with_object({}) do |iv, h|
      attr_name = iv.to_s[1..-1].to_sym
      h[attr_name] = obj.send(attr_name) unless REMOVE_INSTANCE_VARIABLE == iv
    end

    attrs = attributes.delete(:attrs).with_indifferent_access

    # build the new object with the sanitized attributes
    obj.class.new(attrs).tap do |o|
      attributes.each do |k, v|
        o.instance_variable_set(:"@#{k}", v)  # use attribue setter to avoid memoization
      end
    end
  end
end

# FILE: config/initializers/fix_yaml_column.rb

# serializes the columns that break because of memoized cache
#
# alternatively, we could have just removed the string for the memoized cache, but that sounded too dangerous

# hard-require to load the class
require 'active_record/coders/yaml_column'

class ActiveRecord::Coders::YAMLColumn
  private

  def yaml_load(payload)
    ["ThreadSafe::Cache", "Monitor", "Thread::Mutex"].each do |thing_to_remove|
      payload.gsub!("!ruby/object:#{thing_to_remove}", "")
    end

    # make the main object a hash
    ["Memoizable::Memory"].each do |thing|
      payload.gsub!(thing, "Hash")
    end

    # remove the memoized_method_cache so that the memoized gem doesn't try to use it as its cache
    #
    # use a trailing : so that we don't have repeating if we gsub again
    payload.gsub!("_memoized_method_cache:", "_memoized_method_cache_hack:")

    if !ActiveRecord::Base.use_yaml_unsafe_load
      YAML.safe_load(payload, permitted_classes: ActiveRecord::Base.yaml_column_permitted_classes, aliases: true)
    else
      if YAML.respond_to?(:unsafe_load)
        YAML.unsafe_load(payload)
      else
        YAML.load(payload)
      end
    end
  end
end

# https://gist.github.com/romiras/587c5cefd1e58bd88376a178ac48ebf7
# incorrect draft, just to show a direction
  def full_object
    self[:full_object]
  rescue ArgumentError => e
    raw = full_object_before_type_cast

    binding.pry

    raw.gsub!(%r{!ruby/object:Koala::Instagram::API}, '!ruby/object:Koala::Instagram::Api')

    Psych.load(raw, symbolize_names: true)
  end

Zeitwerk/class loading

https://guides.rubyonrails.org/classic_to_zeitwerk_howto.html

bin/rails zeitwerk:check

ActiveSupport::Inflector.inflections => config/initializers/inflections.rb

Graceful break

def graceful_loop(&block)
  quit = false
  trap('INT') { quit = true }

  loop do
    break if quit
    block.call
  end
ensure
  # Reset the trap to its original state
  trap('INT', 'DEFAULT')
end

# draft
def graceful
  Signal.trap("INT") { throw :exit_loop }
  catch(:exit_loop) { yield }
ensure
  Signal.trap('INT', 'DEFAULT')
  puts "Loop exited!"
end

def foo
  puts 'foo'
  p Benchmark.realtime { sleep 1 }
  puts '/foo'
end  

loop { break if graceful { foo } }
# process_hs_pro
graceful { process_hs }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment