# Rack::Utils.parse_query URI("http://example.com?par=hello&par2=bye").query
params = Rack::Utils.parse_query(q).with_indifferent_access
args = ActiveSupport::HashWithIndifferentAccess.new( {"job_params"=>{})
model.find_in_batches(batch_size: 2_000) {|objs| };
objs.pluck(:field1)
ap model.group(:field1).count
# output as raw object
obj.yaml_field1_before_type_cast
arr.each_with_object({}) {|obj, h| h[obj.id] = {} }
sp.limit(10_000).select(:id).find_in_batches {|objs| print '?'; ids = objs.map(&:id); foo(); sleep(1.5)}; p Time.now.utc
params = ActionController::Parameters.new JSON.parse j
params = ActionController::Parameters.new({
"user_id" => "123",
"ref_id" => "foo",
})
# Ruby NLP: detecting language:
NLP.detect_language(text)
# Geo-location (uses Google API) and extracts radius from distance between bounding-box coordinates
NLP.find_location(location_string) # 'Sydney'
require_dependency 'some_lib'
require_relative 'pipeline/sp_dbg'
Rails.logger.level = :debug
Timeout.timeout(5)
indexed_docs = docs.index_by(&:status_id)
text.unicode_normalize(:nfkd)
text.codepoints.map {|cp| 'U+'+cp.to_s(16)} # => ["U+7b", "U+22", "U+1f620", "U+22", "U+3a", "U+20", "U+22", "U+22", "U+7d"]
MeCab::Model.new.createTagging
https://developers.facebook.com/docs/messenger-platform/getting-started/quick-start https://github.com/jgorset/facebook-messenger https://githubmemory.com/repo/jgorset/facebook-messenger/issues?cursor=Y3Vyc29yOnYyOpK5MjAxOS0wNi0yOFQxNjozNjo0OSswODowMM4biGJm&pagination=prev&page=1
args = {foo: 'bar'}
response = HTTParty.get(url, query: args)
response = OpenAI::Client.new.embeddings(parameters: { model: 'text-embedding-3-small', input: ['lazy fox jumps over the brown dog'] })
text = '@aa @bb Hello W!'
Redact.redact(text, :multiple_handles, :multiple_hashtags).strip
text.scan(regex) { |occurrence| }
(?:[0-9]{1,3}\.){3}[0-9]{1,3}
ENV\["(.+?)"\] -> ENV.fetch("$1")
- https://stackoverflow.com/questions/1080993/pure-ruby-concurrent-hash/40598360#40598360
- https://stackoverflow.com/questions/34589715/runtimeerror-cant-add-a-new-key-into-hash-during-iteration
- ruby-concurrency/concurrent-ruby#594
- rails/rails#24627
def break?; Rails.cache.fetch('foo-manual', expires_in: 1.minute) { REDIS.llen('foo') < 10 }; end
loop { SomeService.call; puts Time.now.utc; break if break?; sleep 1 }; Rails.cache.delete('foo-manual'); exit
task = Concurrent::TimerTask.new(execution_interval: 0.5.seconds, timeout_interval: 60) do
break if break?
SomeService.call
puts Time.now.utc
end; task.execute
task.shutdown
def run
loop do
SomeService.call
puts Time.now.utc
break if break?
sleep 1
end
end
task = Concurrent::Future.execute { run }; p task.state
yy=<<YYY
---
run_now: true
run_once: true
priority: 33
job_params:
klass: SomeModel
job_id: 0a04f026-108c-40d9-893e-f02223ec698d
time: '2021-08-04T05:19:38.651+00:00'
YYY
y = ActiveSupport::HashWithIndifferentAccess.new YAML.load (yy); y.class
y = nil; File.open('large_job.yaml', 'rb') {|f| y = ActiveSupport::HashWithIndifferentAccess.new YAML.load (f.read) }; y.class
t = Time.now
# to do something...
p (Time.now-t)
REDIS = Redis.new(url: ENV['REDIS_URL'], driver: :hiredis, ssl_params: {verify_mode: OpenSSL::SSL::VERIFY_NONE}); REDIS.ping
REDIS.hgetall(key)
REDIS.llen('list')
REDIS.ltrim('list', 0, 200.pred) # leave only first 200 items in a queue, delete a rest
messages = REDIS.lrange 'list', 1_000, 1_000 + 2
m = Oj.load(REDIS.lrange('list', 0, 1).first, symbolize_names: true)
REDIS.rpush('list', msg.to_json)
REDIS.copy('list', 'list' + '-bak') # backup
# REDIS.copy('list' + '-bak', 'list') # restore
# REDIS.sscan_each('set-dbg') {|id| p id; 1/0}
# receiving from Redis (blocking pop)
_, m = REDIS.blpop 'list'; # JSON
msg_rcv = Oj.load m
ap msg_rcv
def self.listen
Signal.trap("INT") { throw :terminate }
Signal.trap("TERM") { throw :terminate }
catch :terminate do
loop do
channel, message = REDIS.blpop(CHANNEL) # blocking operation
raise StandardError, "channel is not #{CHANNEL}" if channel != CHANNEL
hash = JSON.parse(message)
callback(hash)
end
end
log 'Gracefully shutdown a listener'
end
redis = Redis.new(url: ENV['SIDEKIQ_REDIS_URL'], driver: :hiredis, ssl_params: {verify_mode: OpenSSL::SSL::VERIFY_NONE}); redis.ping
# copy HS jobs from backup queue to regular
loop { m = redis.lpop('my_namespace:queue:que-bak'); break if m.nil?; redis.rpush('my_namespace:queue:que', m); sleep(0.005) }
# Rails cache
cache_redis = Redis.new(redis_config = {
driver: :hiredis,
url: ENV['REDIS_CACHE_URL'],
namespace: 'cache',
pool_size: Integer(ENV['RAILS_MAX_THREADS'] || 5),
pool_timeout: 5
}); cache_redis.ping
cache_key = 'cache:'+key
#puts "expires at: #{cache_redis.ttl(cache_key).seconds.from_now}"
puts "expires at: #{Time.now + cache_redis.ttl(cache_key).seconds}"
# clean foo cache
cache_redis.scan_each(:match => "cache:foo:*") {|key| cache_redis.unlink key }; p Time.now.utc
cache_redis.scan_each(:match => "cache:foo*") {|key| puts key if key =~ /bar/ }
cache_redis.exists 'cache:'+['foo', digest].join('/')
# redis.scan_each(:match => "*") {|key| puts key unless key.start_with? 'profiles:' }
# Sidekiq jobs
redis = Redis.new(redis_config = {
url: ENV.fetch('SIDEKIQ_REDIS_URL', ENV.fetch('REDIS_URL')),
namespace: "my_namespace",
driver: :hiredis,
}); redis.ping
redis.llen 'my_namespace:queue:ig'
# other keys
REDIS.info(:memory)
REDIS.memory :usage, 'testkey'
namespaces = Set.new; REDIS.scan_each(:match => "*") {|key| ns = key.split(':').first; namespaces << ns }; namespaces.size
REDIS.scan_each(match: 'user:*') do |resume_key_name|
resume_key_name #=> "user:12"
end
REDIS.scan_each(:match => "bp_metrics:*") {|key| puts key; puts REDIS.get key}
# delete keys
pattern = "recorder:*"
cc = 0; REDIS.scan_each(match: pattern){|e| REDIS.del(e); puts e; cc += 1 }; cc
# migrate some keys from one instance to another
redis = Redis.new url: ENV['REDIS_URL'], ssl_params: {verify_mode: OpenSSL::SSL::VERIFY_NONE} # source Redis
pattern = '["socialscrape",1'
redis.scan_each(:match => "*") {|key| puts(key); if key.starts_with?(pattern); val = redis.get(key); REDIS.setex(key, 1.day, val); redis.del(key); end }; :ok
REDIS.scan_each(match: "template*") {|key| p(key) }
def store_file_content(redis_conn, file_pattern)
Dir.glob("#{file_pattern}*").each do |file|
content = File.read(file)
redis_conn.hset('files', file, content)
end
end
store_file_content(REDIS, 'filename_prefix_'); REDIS.type 'files' #=> "hash"
REDIS.hscan_each('files') {|key, val| p key}
# REDIS.hscan_each('files') {|key, val| File.write("_data/#{key}", val)}
require_relative 'ruby/redis_helper'
def dump_csv(rows, csv_name)
CSV.open(csv_name, "wb") do |csv|
rows.each do |row|
csv << Array(row)
end
end
:ok
end
def call
CSV.open(@path, 'ab', write_headers: false, force_quotes: true) do |csv|
csv << %w[column1 column2]
row = [123, 'foo']
csv << row
end
end
https://docs.datadoghq.com/tracing/setup_overview/setup/ruby/#resque DataDog/dd-trace-rb#1710
# StatsD - DataDog
# https://docs.datadoghq.com/events/guides/dogstatsd/
# https://docs.datadoghq.com/tracing/runtime_metrics/ruby/
require 'datadog/statsd'
statsd = Datadog::Statsd.new('localhost', 8125)
statsd.event('An error occurred', "Error message", alert_type: 'error', tags: ['env:dev'])
# by email
acc = Account.find_by(email: email)
acc.reset_perishable_token!
mail(to: acc.email, subject: 'Password reset instructions').deliver_now
# just set a password manually
acc.password = 'secret'
acc.password_confirmation = 'secret'
acc.save!
dd = Base64.encode64 Zlib::Deflate.deflate obj.to_json # pack
obj = JSON.parse Zlib::Inflate.inflate Base64.decode64 dd # unpack
ActionController::Base.helpers.strip_tags(html)
klass.constantize
"chains:#{chain_name.underscore}".constantize.perform_bulk(ids)
model.select(:id).find_each{|obj| SomeWorker.perform_async(obj.id); sleep(2) }
# try to update in bigger chunks, then smaller
[50, 5].each {|i| model.in_batches(of: i) {|rel| rel.update(stream_id: new_stream_id) rescue nil } }
# then rest update 1 by 1
model.find_each {|sp| sp.update(stream_id: new_stream_id) rescue nil }
CSV.open(name, "wb") do |csv|
RequestRecorder.get_many(status_id).each{|r| csv << [Time.at(r['time']).to_s(:db), r['payload']['col1'],r['payload']['col2']] }
end
def write_as_log(file_name)
log = File.open(file_name, 'ab')
log.sync = true
begin
obj = yield
if obj.is_a?(String)
log.write(obj)
log.write("\n")
else
obj.each do |item|
data = item.is_a?(String) ? item : item.to_json
log.write(data)
log.write("\n")
end
end
ensure
log.close
end
end
class ApplicationRecord
def self.execute_sql(*sql_array)
connection.execute(send(:sanitize_sql_array, sql_array))
end
end
res = ActiveRecord::Base.connection_pool.with_connection { |con| con.exec_query( "SELECT 1;" ) }
Book.find_by_sql(sql)
def measure(log_msg = 'Processed')
t = Time.current
res = yield
Rails.logger.info "#{log_msg} in #{(Time.current - t).round(3)} s"
res
end
require_relative 'report'
bin/rails runner report.rb > report-`date +'%F-%H%M'`.txt
Add -is:retweet -is:reply
to exclude irrelevant twwets.
raw_object = "#<Facebook::Messenger::Incoming::Message:0x00007f7393cce328 @messaging={\"timestamp\"=>\"1682899233201\", \"sender\"=>{\"id\"=>\"2216381368490362\"}, \"recipient\"=>{\"id\"=>\"178711405567023\"}, \"message\"=>{\"mid\"=>\"m_jXJjmZaz7r0GNJoy9NR1QZjXP7SP3wnJe4JhyMRXnqPJQyK4FbpiNeMar6duIx9y7ufL6a0Ps-EDNzy2XcVqCw\", \"text\"=>\"🎁 PICK A 🎁\", \"quick_reply\"=>{\"payload\"=>\"{\\\"type\\\": \\\"postback\\\", \\\"ts\\\": 1682899227.618464, \\\"return_to_node\\\": false, \\\"silent_ignore\\\": true, \\\"current_blackbox\\\": \\\"4edad8a4-a20e-4eb4-a4e7-3b2e462b3414\\\", \\\"current_context\\\": \\\"c:91d6d85d-4b10-40b5-8bff-984d6c43837b:2023-04-30\\\", \\\"payload\\\": {\\\"payload\\\": \\\"giftCampaigns:93cc46aa-c28d-4287-a729-94af7b8c60ff:1\\\"}}\"}}}>"
=> "#<Facebook::Messenger::Incoming::Message:0x00007f7393cce328 @messaging={\"timestamp\"=>\"1682899233201\", \"sender\"=>{\"id\"=>\"2216381368490362\"}, \"recipient\"=>{\"id\"=>\"178711405567023\"}, \"message\"=>{\"mid\"=>\"m_jXJjmZaz7r0GNJoy9NR1QZjXP7SP3wnJe4JhyMRXnqPJQyK4FbpiNeMar6duIx9y7ufL6a0Ps-EDNzy2XcVqCw\", \"text\"=>\"🎁 PICK A 🎁\", \"quick_reply\"=>{\"payload\"=>\"{\\\"type\\\": \\\"postback\\\", \\\"ts\\\": 1682899227.618464, \\\"return_to_node\\\": false, \\\"silent_ignore\\\": true, \\\"current_blackbox\\\": \\\"4edad8a4-a20e-4eb4-a4e7-3b2e462b3414\\\", \\\"current_context\\\": \\\"c:91d6d85d-4b10-40b5-8bff-984d6c43837b:2023-04-30\\\", \\\"payload\\\": {\\\"payload\\\": \\\"giftCampaigns:93cc46aa-c28d-4287-a729-94af7b8c60ff:1\\\"}}\"}}}>"
hash = eval raw_object[raw_object.index('=')+1..-2]
def build_collection_from_log(log_file)
Enumerator.new do |yielder|
File.open(log_file, 'rb') do |f|
f.each_line do |line|
yielder.yield(line.strip)
end
end
end
end
@coll = build_collection_from_log('index-docs-1.jsonl')
@coll.next # pop item
File.open("log_file.jsonl", 'wb') do |f|
loop do
f.write(payload)
f.write("\n")
break
end
end
require_relative 'lib/maintenance/redis/helper'
coll = Maintenance::Redis::Helper.redis_set_values_collection(REDIS, 'rule:non_hidden')
status_id = coll.first
coll = Maintenance::Redis::Helper.redis_pop_list(REDIS, 'ios-jobs-tmp') { |obj| obj }
coll.each { |item|
args = Oj.load(item, symbolize_names: true).last; p(args)
Reviews::Ios::UpdateStreamWorker.new.perform(*args)
sleep 3
}
# coll = ss.map {|obj| args = Oj.load(Oj.load(obj).last.first, symbolize_names: true).last }; coll.first
model.in_batches(of: 500) {|rel| print '.'; rel.update_all("photo_url = REPLACE(photo_url, 'old_host', 'new_host')"); sleep(.05); print '?'}; puts
heroku ps:scale processing_grp2_worker=1 -a pre-prod-affogata
# curl -X POST --data-urlencode "payload={\"channel\": \"#slack-alerts\", \"username\": \"reporter\", \"text\": \"This is posted to #slack-alerts and comes from a bot named reporter.\", \"icon_emoji\": \":ghost:\"}" https://hooks.slack.com/services/...
def send_to_slack_channel(url:, channel:, username:, text:)
payload = {
channel: channel,
username: username,
text: text,
icon_emoji: ":ghost:"
}
HTTParty.post(
url,
body: { payload: JSON.dump(payload) },
headers: { 'Content-Type' => 'application/x-www-form-urlencoded' }
)
end
res = send_to_slack_channel(
url: "https://hooks.slack.com/services/...",
channel: "#slack-alerts",
username: "reporter",
text: 'Aloha!',
)
# ElasticSearch client
client = Elastic::Client.new
client.perform_request('GET', "_cat/indices", {}, {})
client.perform_request('GET', "foo-000001/_mapping", {}, {})
client.perform_request('GET', 'foo-000001/_analyze?pretty', {}, { text: "foo bar, baz, a @fun @and joy", })
res = client.client.perform_request('GET', "#{idx}/_mapping")
puts(idx) if res.body.dig(idx, 'mappings', 'properties', 'embedding_v2').nil?
opts = {service: 'es', region: ENV['AWS_REGION'], access_key_id: ENV['AWS_ACCESS_KEY_ID'], secret_access_key: ENV['AWS_SECRET_ACCESS_KEY']};
@client = Elasticsearch::Client.new(url: ENV.fetch('ELASTICSEARCH_DATA_URL'), log: true, logger: Logger.new($stdout)) {|c| c.request :aws_sigv4, opts};
document_id = "2-17913554644973943"; document_index = 'ig-000001';
@client.get({ index: document_index, id: document_id })
body = { docs: [{_index: document_index, _id: document_id }] }.with_indifferent_access
doc_props = %w[id text language];
body = { docs: [{_index: document_index, _id: document_id, _source: doc_props }] }.with_indifferent_access
# res = @client.mget(body: body)
# res.dig('docs', 0, '_source')
GET /fb/_search?size=2
{
"query": {
"bool": {
"filter": [
{
"range": {
"created_at": {
"lte": "2022-11-10T14:34:00.000Z"
}
}
},
{
"range": {
"created_at": {
"gte": "2022-11-06T16:25:00.000Z",
"lte": "2022-11-10T17:25:00.000Z"
}
}
},
{
"script": {
"script": "doc['some_ids'].length > 1"
}
}
],
"must_not": [
{
"term": {
"is_deleted": true
}
}
]
}
}
}
client = LogClient.instance;
client.client.transport.logger = Logger.new($stdout)
resp = client.log!(event_type: 'foo', data: { arg1: 'val1' })
not trace: "LinkedIn" and not trace: "Tiktok" and not trace: "AndroidpublisherV3"
See https://stackoverflow.com/questions/21904445/how-to-view-a-resque-job
payload = Resque.data_store.pop_from_queue '14';
parsed = Oj.load payload, symbolize_names: true
klass = parsed[:class].safe_constantize
args = parsed.dig(:args, 0)
job_params = args.fetch(:job_params)
klass.run(foo, args)
mysql -h host -u root -p
CREATE USER 'appuser'@'%' IDENTIFIED BY 'topsecret';
GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, INDEX, ALTER, CREATE TEMPORARY TABLES, LOCK TABLES ON `mydb`.* TO 'appuser'@'%';
FLUSH privileges;
bundle exec sidekiq -C config/sidekiq.yml
# dump
File.open('obj.dump', 'wb') {|f| f.write Marshal.dump(obj)}
# load
obj = File.open('obj.dump', 'rb') {|f| Marshal.load f}; obj.class
module Rd; end
Rd::Status = Class.new(ActiveRecord::Base) { self.table_name = 'rd_statuses'; self.primary_keys = :id, :directly_related; serialize :full_object }
Rd::Status.take
def to_yaml_properties
[:@id]
end
or
def to_yaml_properties
instance_variables - [:@some, :@another]
end
Credits: https://coderwall.com/p/wav_tg/control-yaml-serialization-in-ruby
See also:
- https://binarysolo.blog/how-to-generate-yaml-from-ruby-objects-without-type-annotations/
- <https://staaldraad.github.io/post/2021-01-09-universal-rce-ruby-yaml-load-updated/
- https://book.jorianwoltjer.com/languages/yaml
Print YaML object:
puts obj.full_object_before_type_cast
Psych ~/.local/asdf/installs/ruby/2.7.8/lib/ruby/2.7.0/psych.rb
node = result.children[0].children.find {|c| c.class.name == 'Psych::Nodes::Scalar' && c.value == 'api'}
On deserializaion issues:
https://github.com/affogata/communit_360/pull/2463/files
# https://gist.github.com/romiras/2a50b2368b348da6e25ccb5788067245
module MemoizedCacheSanitizer
REMOVE_INSTANCE_VARIABLE = :@_memoized_method_cache
def self.sanitize(obj)
return obj if obj.nil? || obj.instance_of?(Hash) || obj.instance_of?(Array)
attributes = obj.instance_variables.each_with_object({}) do |iv, h|
attr_name = iv.to_s[1..-1].to_sym
h[attr_name] = obj.send(attr_name) unless REMOVE_INSTANCE_VARIABLE == iv
end
attrs = attributes.delete(:attrs).with_indifferent_access
# build the new object with the sanitized attributes
obj.class.new(attrs).tap do |o|
attributes.each do |k, v|
o.instance_variable_set(:"@#{k}", v) # use attribue setter to avoid memoization
end
end
end
end
# FILE: config/initializers/fix_yaml_column.rb
# serializes the columns that break because of memoized cache
#
# alternatively, we could have just removed the string for the memoized cache, but that sounded too dangerous
# hard-require to load the class
require 'active_record/coders/yaml_column'
class ActiveRecord::Coders::YAMLColumn
private
def yaml_load(payload)
["ThreadSafe::Cache", "Monitor", "Thread::Mutex"].each do |thing_to_remove|
payload.gsub!("!ruby/object:#{thing_to_remove}", "")
end
# make the main object a hash
["Memoizable::Memory"].each do |thing|
payload.gsub!(thing, "Hash")
end
# remove the memoized_method_cache so that the memoized gem doesn't try to use it as its cache
#
# use a trailing : so that we don't have repeating if we gsub again
payload.gsub!("_memoized_method_cache:", "_memoized_method_cache_hack:")
if !ActiveRecord::Base.use_yaml_unsafe_load
YAML.safe_load(payload, permitted_classes: ActiveRecord::Base.yaml_column_permitted_classes, aliases: true)
else
if YAML.respond_to?(:unsafe_load)
YAML.unsafe_load(payload)
else
YAML.load(payload)
end
end
end
end
# https://gist.github.com/romiras/587c5cefd1e58bd88376a178ac48ebf7
# incorrect draft, just to show a direction
def full_object
self[:full_object]
rescue ArgumentError => e
raw = full_object_before_type_cast
binding.pry
raw.gsub!(%r{!ruby/object:Koala::Instagram::API}, '!ruby/object:Koala::Instagram::Api')
Psych.load(raw, symbolize_names: true)
end
https://guides.rubyonrails.org/classic_to_zeitwerk_howto.html
bin/rails zeitwerk:check
ActiveSupport::Inflector.inflections
=> config/initializers/inflections.rb
def graceful_loop(&block)
quit = false
trap('INT') { quit = true }
loop do
break if quit
block.call
end
ensure
# Reset the trap to its original state
trap('INT', 'DEFAULT')
end
# draft
def graceful
Signal.trap("INT") { throw :exit_loop }
catch(:exit_loop) { yield }
ensure
Signal.trap('INT', 'DEFAULT')
puts "Loop exited!"
end
def foo
puts 'foo'
p Benchmark.realtime { sleep 1 }
puts '/foo'
end
loop { break if graceful { foo } }
# process_hs_pro
graceful { process_hs }