-
-
Save aetherknight/3d0af086a92bac8f3ee9 to your computer and use it in GitHub Desktop.
module WhileWhile | |
def wait_while(timeout = 2, retry_interval = 0.1, &block) | |
start = Time.now | |
while (result = !block.call) | |
break if (Time.now - start).to_i >= timeout | |
sleep(retry_interval) | |
end | |
!result | |
end | |
module_function :wait_while | |
end | |
require 'active_record/connection_adapters/postgresql_adapter' | |
class ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter | |
# Reverting to Rails 3.x behavior. | |
def active? | |
@connection.query 'SELECT 1' | |
true | |
rescue PGError | |
false | |
end | |
#### v | |
RECONNECT_RETRY_TIMEOUT = 2 | |
RECONNECT_RETRY_INTERVAL = 0.25 | |
#### ^ | |
# Close then reopen the connection. | |
# | |
# Additonally, retry reconnecting (in case of a temporary outage), and | |
# properly detect whether @connection.reset actually reconnected. | |
def reconnect! | |
super | |
#### v | |
raise PG::ConnectionBad, "Failed to reconnect after #{RECONNECT_RETRY_TIMEOUT} seconds" unless WaitWhile.wait_while(RECONNECT_RETRY_TIMEOUT, RECONNECT_RETRY_INTERVAL) do | |
Rails.logger.warn("Reconnect attempt") | |
@connection.reset | |
@connection.status == PGconn::CONNECTION_OK | |
end | |
#### ^ | |
configure_connection | |
end | |
end | |
require 'active_record/query_cache' | |
class ::ActiveRecord::QueryCache | |
def call(env) | |
enabled = ActiveRecord::Base.connection.query_cache_enabled | |
connection_id = ActiveRecord::Base.connection_id | |
ActiveRecord::Base.connection.enable_query_cache! | |
response = @app.call(env) | |
response[2] = Rack::BodyProxy.new(response[2]) do | |
restore_query_cache_settings(connection_id, enabled) | |
end | |
response | |
rescue Exception => e | |
#### v | |
Rails.logger.fatal("Caught exception in QueryCache:\n#{e.class} (#{e.message}):\n #{e.backtrace.join("\n ")}\n\n") | |
#### ^ | |
restore_query_cache_settings(connection_id, enabled) | |
raise e | |
end | |
end | |
require 'active_record/connection_adapters/abstract/connection_pool' | |
# If we have an error while trying to checkout a connection, we really ought | |
# to clean it up. Otherwise, we don't ever recover the connection and the | |
# connection pool gets exhausted. ConnectionTimeoutErrors are the one | |
# exception to this --- they occur when the connection pool is already | |
# exhausted, meaning that a connectionw as never checked out. | |
class ::ActiveRecord::ConnectionAdapters::ConnectionPool | |
def checkout | |
synchronize do | |
#### v | |
begin | |
#### ^ | |
conn = acquire_connection | |
conn.lease | |
checkout_and_verify(conn) | |
#### v | |
rescue | |
unless conn.nil? | |
Rails.logger.fatal("Error after checking out a connection --- cleaning it up to avoid exhausting the connection pool.") | |
remove conn | |
conn.disconnect! | |
end | |
raise | |
end | |
#### ^ | |
end | |
end | |
end | |
require 'active_record/connection_adapters/abstract_adapter' | |
# adding logging of a backtrace to abstractadapter, because we are getting the | |
# error logged, but not the full backtrace. | |
class ::ActiveRecord::ConnectionAdapters::AbstractAdapter | |
protected | |
def log(sql, name = "SQL", binds = []) | |
@instrumenter.instrument( | |
"sql.active_record", | |
:sql => sql, | |
:name => name, | |
:connection_id => object_id, | |
:binds => binds) { yield } | |
rescue => e | |
message = "#{e.class.name}: #{e.message}: #{sql}" | |
@logger.error message if @logger | |
#### v | |
@logger.error "extra logging: #{e.class.name}: #{e.message}: #{sql}:\n #{e.backtrace.join("\n ")}" if @logger | |
#### ^ | |
exception = translate_exception(e, message) | |
exception.set_backtrace e.backtrace | |
raise exception | |
end | |
end |
fortifying the reconnect and checkout pieces hasn't been folded into a stable release yet. is there any reason it shouldn't be? We're running into similar issues and there's lots of good fixes floating around that attack the problem in different ways, so we're trying to shepherd them into a proper release we can use if possible.
I am on rails 3.2.19 using active record 3.2.19. Seem to be facing similar issues. I have this component that connects to postgresql with a pool size of 100. Now, when I restart my haproxy which is proxying the pg databases, I run into errors with the app. The app also does not seem to be able to recover as well, even after haproxy has restarted and the postgresql database is available. Seems to me like some sort of connection caching is going but cannot figure out where. The patches you wrote above seem to be already there in my version of rails, considering the "checkout" and "active?" functions. My version of the pg gem is 0.14.1.
The changes to abstract_adapter.rb and query_cache.rb are just for working around a lack of error information.
Our setup:
We still get spurious
PG::ConnectionBad: PQconsumeInput() SSL SYSCALL error: EOF detected
andPG::ConnectionBad: PQsocket() can't get a socket descriptor
errors, but they only affect single connections and don't hose the Rails process by exhausting the ConnectionPool. They might be related to our proxy, although I haven't had a chance to investigate further.