rdp/Temp.java

## Temp.java

import java.sql.*;

/**
 * Example code for showing how to use raw postgres cursors to read chunked
 * from the psql source converted to java
 * <p>
 * To run the demo:
 * $ psql
 * CREATE TABLE DOCUMENT_TEMPLATE(
 * ID INTEGER NOT NULL,
 * NAME TEXT,
 * SHORT_DESCRIPTION TEXT,
 * AUTHOR TEXT,
 * DESCRIPTION TEXT,
 * CONTENT TEXT,
 * LAST_UPDATED DATE,
 * CREATED DATE
 * );
 * INSERT INTO DOCUMENT_TEMPLATE(id,name, short_description, author,
 * description,content, last_updated,created)
 * SELECT id, 'name', md5(random()::text), 'name2'
 * ,md5(random()::text),md5(random()::text)
 * ,NOW() - '1 day'::INTERVAL * (RANDOM()::int * 100)
 * ,NOW() - '1 day'::INTERVAL * (RANDOM()::int * 100 + 100)
 * FROM generate_series(1,6000000) id;
 */
public class Temp {

  public static void main(String[] args) throws SQLException {
    String url = "jdbc:postgresql://localhost/postgres";
    Connection conn = DriverManager.getConnection(url);
    String originalQuery = "select name, short_description from document_template";
    long start = System.currentTimeMillis();
    ConsumerWithException<ResultSet, SQLException> consumer = rs -> {
      String name = rs.getString("short_description");
      // uncomment this to get "first row" timing info [if it's low, that means chunking is working]
//      throw new SQLException("first row took=" + (System.currentTimeMillis() - start));
    };
    defaultWay(conn, originalQuery, consumer);
    System.out.println("old original way took=" + (System.currentTimeMillis() - start));

    long startDefault2 = System.currentTimeMillis();
    defaultWay(conn, originalQuery, consumer);
    System.out.println("old original way2 took=" + (System.currentTimeMillis() - startDefault2)); // sometimes it goes faster the second time? huh?
    defaultWay(conn, originalQuery, consumer);
    defaultWay(conn, originalQuery, consumer);

    int fetchCount = 1000;

    long startChunked = System.currentTimeMillis();
    readLargeQueryInChunksManualCursor(conn, originalQuery, fetchCount, consumer);
    System.out.println("cursor chunked way took=" + (System.currentTimeMillis() - startChunked));

    long startJdbc = System.currentTimeMillis();
    readLargeQueryInChunksJdbcWay(conn, originalQuery, fetchCount, consumer);
    System.out.println("jdbc chunked way took=" + (System.currentTimeMillis() - startJdbc));

    // results (6M rows):
    // total time to start processing first row for default way: 5.2s
    //     first row for cursor/jdbc way: 0.066s
    // total time to process all rows default way: 6.5s
    //   cursor way: 3.8s
    //   jdbc chunked way: 3.0s
  }

  @FunctionalInterface
  public interface ConsumerWithException<T, E extends Exception> {
    void accept(T t) throws E;
  }

  static void readLargeQueryInChunksJdbcWay(Connection conn, String originalQuery, int fetchCount, ConsumerWithException<ResultSet, SQLException> consumer) throws SQLException {
    boolean originalAutoCommit = conn.getAutoCommit();
    if (originalAutoCommit) {
      conn.setAutoCommit(false); // start temp transaction
    }
    try (Statement statement = conn.createStatement()) {
      statement.setFetchSize(fetchCount);
      ResultSet rs = statement.executeQuery(originalQuery);
      while (rs.next()) {
        consumer.accept(rs); // or just do you work here
      }
    } finally {
      if (originalAutoCommit) {
        conn.setAutoCommit(true); // reset it, also ends (commits) temp transaction
      }
    }
  }

  // purpose of this method is to read in chunks to not run out of RAM with "huge number of rows" of selects in Postgres, while also leaving the connection in default autoCommit=true mode
  // turns out that it makes even "normal" selects total wall time less as well, woot!  This is based on the source of the psql command line
  // which basically "wraps" its selects within a mini transaction, if FETCH_COUNT variable is set, in order to get the RAM savings...
  static void readLargeQueryInChunksManualCursor(Connection conn, String originalQuery, int fetchCount, ConsumerWithException<ResultSet, SQLException> consumer) throws SQLException {
    try (Statement statement = conn.createStatement()) {
      if (conn.getAutoCommit()) {
        // transaction is required for using a cursor, and we're not "in a transaction" presently, so "BEGIN" a short-lived, select only transaction:
        statement.execute("BEGIN");
        // could instead do conn.setAutoCommit(false); here ... if you remember to turn it off when you're done :)
      } // else it's already part of some other larger transaction, so don't modify it [postgres doesn't support nested transactions]
      try {
        statement.execute("DECLARE _read_chunks_cursor NO SCROLL CURSOR FOR " + originalQuery);
        int nRows;
        do {
          ResultSet rs = statement.executeQuery("FETCH FORWARD " + fetchCount + " FROM _read_chunks_cursor");  // use fetchCount regardless of what the connection default fetch size is...
          nRows = 0;
          while (rs.next()) {
            nRows++;
            consumer.accept(rs);
          }
        } while (nRows == fetchCount);
        // close cursor so if they are within a larger transaction, they can call this select method twice...
        statement.execute("CLOSE _read_chunks_cursor");
        if (conn.getAutoCommit()) {
          statement.execute("COMMIT"); // close our short-lived internal transaction...
        }
      }
      catch (SQLException failure) {
        if (conn.getAutoCommit()) {
          statement.execute("ROLLBACK");
        }
        throw failure;
      }
    }
  }

  static void defaultWay(Connection conn, String query, ConsumerWithException<ResultSet, SQLException> consumer) throws SQLException {
    ResultSet rs = conn.createStatement().executeQuery(query);
    while (rs.next()) {
      consumer.accept(rs);
    }
  }

}

	import java.sql.*;

	/**
	* Example code for showing how to use raw postgres cursors to read chunked
	* from the psql source converted to java
	* <p>
	* To run the demo:
	* $ psql
	* CREATE TABLE DOCUMENT_TEMPLATE(
	* ID INTEGER NOT NULL,
	* NAME TEXT,
	* SHORT_DESCRIPTION TEXT,
	* AUTHOR TEXT,
	* DESCRIPTION TEXT,
	* CONTENT TEXT,
	* LAST_UPDATED DATE,
	* CREATED DATE
	* );
	* INSERT INTO DOCUMENT_TEMPLATE(id,name, short_description, author,
	* description,content, last_updated,created)
	* SELECT id, 'name', md5(random()::text), 'name2'
	* ,md5(random()::text),md5(random()::text)
	* ,NOW() - '1 day'::INTERVAL * (RANDOM()::int * 100)
	* ,NOW() - '1 day'::INTERVAL * (RANDOM()::int * 100 + 100)
	* FROM generate_series(1,6000000) id;
	*/
	public class Temp {

	public static void main(String[] args) throws SQLException {
	String url = "jdbc:postgresql://localhost/postgres";
	Connection conn = DriverManager.getConnection(url);
	String originalQuery = "select name, short_description from document_template";
	long start = System.currentTimeMillis();
	ConsumerWithException<ResultSet, SQLException> consumer = rs -> {
	String name = rs.getString("short_description");
	// uncomment this to get "first row" timing info [if it's low, that means chunking is working]
	// throw new SQLException("first row took=" + (System.currentTimeMillis() - start));
	};
	defaultWay(conn, originalQuery, consumer);
	System.out.println("old original way took=" + (System.currentTimeMillis() - start));

	long startDefault2 = System.currentTimeMillis();
	defaultWay(conn, originalQuery, consumer);
	System.out.println("old original way2 took=" + (System.currentTimeMillis() - startDefault2)); // sometimes it goes faster the second time? huh?
	defaultWay(conn, originalQuery, consumer);
	defaultWay(conn, originalQuery, consumer);

	int fetchCount = 1000;

	long startChunked = System.currentTimeMillis();
	readLargeQueryInChunksManualCursor(conn, originalQuery, fetchCount, consumer);
	System.out.println("cursor chunked way took=" + (System.currentTimeMillis() - startChunked));

	long startJdbc = System.currentTimeMillis();
	readLargeQueryInChunksJdbcWay(conn, originalQuery, fetchCount, consumer);
	System.out.println("jdbc chunked way took=" + (System.currentTimeMillis() - startJdbc));

	// results (6M rows):
	// total time to start processing first row for default way: 5.2s
	// first row for cursor/jdbc way: 0.066s
	// total time to process all rows default way: 6.5s
	// cursor way: 3.8s
	// jdbc chunked way: 3.0s
	}

	@FunctionalInterface
	public interface ConsumerWithException<T, E extends Exception> {
	void accept(T t) throws E;
	}

	static void readLargeQueryInChunksJdbcWay(Connection conn, String originalQuery, int fetchCount, ConsumerWithException<ResultSet, SQLException> consumer) throws SQLException {
	boolean originalAutoCommit = conn.getAutoCommit();
	if (originalAutoCommit) {
	conn.setAutoCommit(false); // start temp transaction
	}
	try (Statement statement = conn.createStatement()) {
	statement.setFetchSize(fetchCount);
	ResultSet rs = statement.executeQuery(originalQuery);
	while (rs.next()) {
	consumer.accept(rs); // or just do you work here
	}
	} finally {
	if (originalAutoCommit) {
	conn.setAutoCommit(true); // reset it, also ends (commits) temp transaction
	}
	}
	}

	// purpose of this method is to read in chunks to not run out of RAM with "huge number of rows" of selects in Postgres, while also leaving the connection in default autoCommit=true mode
	// turns out that it makes even "normal" selects total wall time less as well, woot! This is based on the source of the psql command line
	// which basically "wraps" its selects within a mini transaction, if FETCH_COUNT variable is set, in order to get the RAM savings...
	static void readLargeQueryInChunksManualCursor(Connection conn, String originalQuery, int fetchCount, ConsumerWithException<ResultSet, SQLException> consumer) throws SQLException {
	try (Statement statement = conn.createStatement()) {
	if (conn.getAutoCommit()) {
	// transaction is required for using a cursor, and we're not "in a transaction" presently, so "BEGIN" a short-lived, select only transaction:
	statement.execute("BEGIN");
	// could instead do conn.setAutoCommit(false); here ... if you remember to turn it off when you're done :)
	} // else it's already part of some other larger transaction, so don't modify it [postgres doesn't support nested transactions]
	try {
	statement.execute("DECLARE _read_chunks_cursor NO SCROLL CURSOR FOR " + originalQuery);
	int nRows;
	do {
	ResultSet rs = statement.executeQuery("FETCH FORWARD " + fetchCount + " FROM _read_chunks_cursor"); // use fetchCount regardless of what the connection default fetch size is...
	nRows = 0;
	while (rs.next()) {
	nRows++;
	consumer.accept(rs);
	}
	} while (nRows == fetchCount);
	// close cursor so if they are within a larger transaction, they can call this select method twice...
	statement.execute("CLOSE _read_chunks_cursor");
	if (conn.getAutoCommit()) {
	statement.execute("COMMIT"); // close our short-lived internal transaction...
	}
	}
	catch (SQLException failure) {
	if (conn.getAutoCommit()) {
	statement.execute("ROLLBACK");
	}
	throw failure;
	}
	}
	}

	static void defaultWay(Connection conn, String query, ConsumerWithException<ResultSet, SQLException> consumer) throws SQLException {
	ResultSet rs = conn.createStatement().executeQuery(query);
	while (rs.next()) {
	consumer.accept(rs);
	}
	}

	}