ramv-dailymotion/ReadBigQyery.java

## ReadBigQyery.java
    public static JavaPairRDD<LongWritable, JsonObject> fetchBigQueryRDD(JavaSparkContext jsc,
                                                                         String projectId,
                                                                         String fullyQualifiedInputTableId,
                                                                         String bucket,
                                                                         int numPartitions,
                                                                         double sampleSize)
            throws Exception {


        Configuration hadoopConfiguration = jsc.hadoopConfiguration();


        // Set the job-level projectId.
        hadoopConfiguration.set(BigQueryConfiguration.PROJECT_ID_KEY, projectId);

        // Use the systemBucket for temporary BigQuery export data used by the InputFormat.
        hadoopConfiguration.set(BigQueryConfiguration.GCS_BUCKET_KEY, bucket);


        // Configure input for BigQuery access
        BigQueryConfiguration.configureBigQueryInput(hadoopConfiguration, fullyQualifiedInputTableId);
        LOGGER.debug(hadoopConfiguration.get("fs.gs.system.bucket"));

        JavaPairRDD<LongWritable, JsonObject> tableData = jsc.newAPIHadoopRDD(hadoopConfiguration, GsonBigQueryInputFormat.class, LongWritable.class, JsonObject.class);
        LOGGER.info("Number of rows in the table {}", tableData.count());

        tableData = tableData.repartition(numPartitions).persist(StorageLevel.MEMORY_AND_DISK_SER_2());
        return tableData;
    }
	public static JavaPairRDD<LongWritable, JsonObject> fetchBigQueryRDD(JavaSparkContext jsc,
	String projectId,
	String fullyQualifiedInputTableId,
	String bucket,
	int numPartitions,
	double sampleSize)
	throws Exception {


	Configuration hadoopConfiguration = jsc.hadoopConfiguration();


	// Set the job-level projectId.
	hadoopConfiguration.set(BigQueryConfiguration.PROJECT_ID_KEY, projectId);

	// Use the systemBucket for temporary BigQuery export data used by the InputFormat.
	hadoopConfiguration.set(BigQueryConfiguration.GCS_BUCKET_KEY, bucket);


	// Configure input for BigQuery access
	BigQueryConfiguration.configureBigQueryInput(hadoopConfiguration, fullyQualifiedInputTableId);
	LOGGER.debug(hadoopConfiguration.get("fs.gs.system.bucket"));

	JavaPairRDD<LongWritable, JsonObject> tableData = jsc.newAPIHadoopRDD(hadoopConfiguration, GsonBigQueryInputFormat.class, LongWritable.class, JsonObject.class);
	LOGGER.info("Number of rows in the table {}", tableData.count());

	tableData = tableData.repartition(numPartitions).persist(StorageLevel.MEMORY_AND_DISK_SER_2());
	return tableData;
	}