Skip to content

Instantly share code, notes, and snippets.

@HiroNakamura
Created July 21, 2024 15:50
Show Gist options
  • Save HiroNakamura/352827cecc47ccabef41902e38ecab57 to your computer and use it in GitHub Desktop.
Save HiroNakamura/352827cecc47ccabef41902e38ecab57 to your computer and use it in GitHub Desktop.
BigQuery en ejemplos
package main;
// Leer un CSV y guardar el contenido en una tabla BigQuery
import com.google.cloud.bigquery.*;
import org.apache.commons.csv.*;
import java.io.*;
public class App {
public static void main(String... args) throws Exception {
// Step 1: Initialize BigQuery service
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Step 2: Define the dataset and table IDs
String datasetName = "tu_dataset";
String tableName = "tu_tabla";
// Step 3: Read the CSV file and load its data into BigQuery
String csvFilePath = "ruta/al/archivo.csv";
try (Reader reader = new FileReader(csvFilePath);
CSVParser csvParser = CSVFormat.DEFAULT.withFirstRecordAsHeader().parse(reader)) {
TableId tableId = TableId.of(datasetName, tableName);
TableDefinition tableDefinition = StandardTableDefinition.of(getSchema(csvParser.getHeaderMap()));
// Step 4: Create the table if it doesn't exist
if (!bigquery.getTable(tableId).exists()) {
bigquery.create(TableInfo.newBuilder(tableId, tableDefinition).build());
}
// Step 5: Insert rows into the table
InsertAllRequest.Builder insertRequestBuilder = InsertAllRequest.newBuilder(tableId);
for (CSVRecord record : csvParser) {
insertRequestBuilder.addRow(FieldValueList.of(getFields(record)));
}
InsertAllResponse insertResponse = bigquery.insertAll(insertRequestBuilder.build());
if (insertResponse.hasErrors()) {
throw new RuntimeException("Error inserting rows into BigQuery table: " + insertResponse.getInsertErrors());
} else {
System.out.println("Rows inserted successfully into BigQuery table.");
}
}
}
// Helper method to convert CSV header map to BigQuery schema
private static Schema getSchema(CSVRecord headerMap) {
Schema.Builder schemaBuilder = Schema.newBuilder();
for (String columnName : headerMap.keySet()) {
schemaBuilder.addField(Field.newBuilder(columnName, LegacySQLTypeName.STRING).build());
}
return schemaBuilder.build();
}
// Helper method to convert CSV record to BigQuery FieldValueList
private static FieldValueList getFields(CSVRecord record) {
FieldValueList.Builder fieldBuilder = FieldValueList.newBuilder();
for (String value : record) {
fieldBuilder.add(FieldValue.of(LegacySQLTypeName.STRING, value));
}
return fieldBuilder.build();
}
}
// Eliminando esas dos filas
import com.google.cloud.bigquery.*;
public class AppFour {
public static void main(String... args) throws Exception {
// Step 1: Initialize BigQuery service
BigQuery bigquery = BigQueryOptions.newBuilder().setProjectId("sample-project-330313")
.build().getService();
// Step 2: Prepare query job to delete the rows
final String DELETE_VEGETABLES =
"DELETE FROM `sample-project-330313.sample_dataset.vegetables` WHERE id IN (1, 2)";
QueryJobConfiguration queryConfig =
QueryJobConfiguration.newBuilder(DELETE_VEGETABLES).build();
// Step 3: Run the delete job on BigQuery
Job deleteJob = bigquery.create(JobInfo.newBuilder(queryConfig).build());
deleteJob = deleteJob.waitFor();
if (deleteJob == null) {
throw new Exception("Job no longer exists");
}
// once the job is done, check if any error occurred
if (deleteJob.getStatus().getError() != null) {
throw new Exception(deleteJob.getStatus().getError().toString());
}
// Step 4: Display results
// Here, we will print the total number of rows that were deleted
JobStatistics.QueryStatistics stats = deleteJob.getStatistics();
Long rowsDeleted = stats.getDmlStats().getDeletedRowCount();
System.out.printf("%d rows deleted\n", rowsDeleted);
}
}
// Actualizando esas dos filas.
import com.google.cloud.bigquery.*;
public class AppThree {
public static void main(String... args) throws Exception {
// Step 1: Initialize BigQuery service
BigQuery bigquery = BigQueryOptions.newBuilder().setProjectId("sample-project-330313")
.build().getService();
// Step 2: Prepare the data for updating
String updateDataQuery =
"SELECT * EXCEPT(id), 'updated_' || name AS name FROM `sample-project-330313.sample_dataset.vegetables` WHERE id IN (1, 2)";
// Step 3: Create a new table with the updated data
String updatedTableId = "sample_dataset.updated_vegetables";
TableId tableId = TableId.of("sample-project-330313", updatedTableId);
CreateDisposition createDisposition = CreateDisposition.CREATE_IF_NEEDED;
WriteDisposition writeDisposition = WriteDisposition.WRITE_TRUNCATE;
ExtractJobConfiguration extractConfig = ExtractJobConfiguration.newBuilder(updateDataQuery, tableId)
.setCreateDisposition(createDisposition)
.setWriteDisposition(writeDisposition)
.build();
Job extractJob = bigquery.create(JobInfo.of(extractConfig));
extractJob = extractJob.waitFor();
// Step 4: Check for errors in the job
if (extractJob == null) {
throw new Exception("Job no longer exists");
}
if (extractJob.getStatus().getError() != null) {
throw new Exception(extractJob.getStatus().getError().toString());
}
// Step 5: Rename the updated table to the original table
bigquery.delete(TableId.of("sample-project-330313", "sample_dataset.vegetables"));
bigquery.update(TableInfo.newBuilder(TableId.of("sample-project-330313", "sample_dataset.vegetables"), tableId).build());
System.out.println("Rows updated successfully.");
}
}
package main;
// Ejemplo de cómo extraer datos de un archivo CSV utilizando BigQuery en Java:
import com.google.cloud.bigquery.*;
public class AppTwo {
public static void main(String... args) throws Exception {
// Step 1: Initialize BigQuery service
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Step 2: Define la configuración del job para cargar el archivo CSV
String datasetName = "tu_dataset";
String tableName = "tu_tabla";
String bucketName = "tu_bucket";
String fileName = "archivo.csv";
String fileUri = "gs://" + bucketName + "/" + fileName;
// Define la configuración del job para cargar el archivo CSV en BigQuery
JobId jobId = JobId.of();
Job loadJob = null;
try {
TableId tableId = TableId.of(datasetName, tableName);
LoadJobConfiguration loadConfig = LoadJobConfiguration.newBuilder(tableId, fileUri)
.setFormatOptions(FormatOptions.csv())
.setAutodetect(true)
.build();
// Step 3: Crea y ejecuta el job de carga
loadJob = bigquery.create(JobInfo.newBuilder(loadConfig).setJobId(jobId).build());
loadJob = loadJob.waitFor();
// Step 4: Verifica si el job se completó con éxito
if (loadJob == null) {
throw new Exception("Job no longer exists");
}
if (loadJob.getStatus().getError() != null) {
throw new Exception(loadJob.getStatus().getError().toString());
}
System.out.println("Archivo CSV cargado exitosamente en BigQuery.");
} finally {
if (loadJob != null) {
loadJob = loadJob.waitFor();
}
}
// Step 5: Consulta los datos en BigQuery
String query = "SELECT * FROM " + datasetName + "." + tableName;
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();
// Ejecuta la consulta
TableResult result = bigquery.query(queryConfig);
// Step 6: Muestra los resultados
for (FieldValueList row : result.iterateAll()) {
String fruta = row.get("fruta").getStringValue();
String color = row.get("color").getStringValue();
double precio = row.get("precio").getDoubleValue();
System.out.printf("Fruta: %s, Color: %s, Precio: %.2f%n", fruta, color, precio);
}
}
}
import com.google.cloud.bigquery.*;
public class BigQueryFunction {
public static double calcularExponente(double p1, double p2) {
// Crea una instancia de BigQuery
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Define la consulta SQL dinámica
String query = String.format("SELECT POW(%f, %f) AS resultado", p1, p2);
// Ejecuta la consulta
try {
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();
TableResult result = bigquery.query(queryConfig);
// Obtiene el resultado de la consulta y devuelve el valor del exponente
for (FieldValueList row : result.iterateAll()) {
return row.get("resultado").getDoubleValue();
}
} catch (BigQueryException e) {
System.out.println("Error al ejecutar la consulta: " + e.getMessage());
e.printStackTrace();
}
// Si hay un error o la consulta no devuelve resultados, devuelve 0
return 0.0;
}
public static void main(String[] args) {
// Ejemplo de uso de la función
double resultado = calcularExponente(2.0, 3.0);
System.out.println("El resultado del cálculo del exponente es: " + resultado);
}
}
package main;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import java.util.UUID;
public class BigQueryFunctionInvoker {
public static void main(String[] args) {
// Instancia del cliente de BigQuery
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Parámetros para el proyecto y el dataset
String myProject = "myproject";
String myDataset = "mydataset";
// Construir la consulta SQL con los parámetros
String query = String.format("SELECT %s.%s.get_count() AS total_count", myProject, myDataset);
// Configuración del trabajo de consulta
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();
// Crear un JobId único para la consulta
JobId jobId = JobId.of(UUID.randomUUID().toString());
// Ejecutar la consulta
Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
// Esperar a que el Job termine
try {
queryJob = queryJob.waitFor();
if (queryJob == null) {
throw new RuntimeException("Job no encontrado.");
} else if (queryJob.getStatus().getError() != null) {
throw new RuntimeException(queryJob.getStatus().getError().toString());
}
// Obtener y manejar los resultados de la consulta
TableResult result = queryJob.getQueryResults();
result.iterateAll().forEach(row -> {
long totalCount = row.get("total_count").getLongValue();
System.out.println("Total count: " + totalCount);
});
System.out.println("La función fue invocada con éxito.");
} catch (InterruptedException e) {
System.out.println("Error: " + e.getMessage());
Thread.currentThread().interrupt();
}
}
}
package main;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import java.util.UUID;
public class BigQueryFunctionInvokerTwo {
public static void main(String[] args) {
// Instancia del cliente de BigQuery
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Consulta SQL para invocar la función
String query = "SELECT myproject.mydataset.get_count() AS total_count";
// Configuración del trabajo de consulta
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();
// Crear un JobId único para la consulta
JobId jobId = JobId.of(UUID.randomUUID().toString());
// Ejecutar la consulta
Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
// Esperar a que el Job termine
try {
queryJob = queryJob.waitFor();
if (queryJob == null) {
throw new RuntimeException("Job no encontrado.");
} else if (queryJob.getStatus().getError() != null) {
throw new RuntimeException(queryJob.getStatus().getError().toString());
}
// Obtener y manejar los resultados de la consulta
TableResult result = queryJob.getQueryResults();
result.iterateAll().forEach(row -> {
long totalCount = row.get("total_count").getLongValue();
System.out.println("Total count: " + totalCount);
});
System.out.println("La función fue invocada con éxito.");
} catch (InterruptedException e) {
System.out.println("Error: " + e.getMessage());
Thread.currentThread().interrupt();
}
}
}
package main;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import java.util.UUID;
public class BigQueryProcedureInvoker {
public static void main(String[] args) {
// Instancia del cliente de BigQuery
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Parámetro para el nombre del Stored Procedure
String nameStored = "nameStored";
// Consulta SQL para invocar el procedimiento almacenado
String query = String.format("CALL your_dataset.get_routine_details('%s');", nameStored);
// Configuración del trabajo de consulta
QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build();
// Crear un JobId único para la consulta
JobId jobId = JobId.of(UUID.randomUUID().toString());
// Ejecutar la consulta
Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
// Esperar a que el Job termine
try {
queryJob = queryJob.waitFor();
if (queryJob == null) {
throw new RuntimeException("Job no encontrado.");
} else if (queryJob.getStatus().getError() != null) {
throw new RuntimeException(queryJob.getStatus().getError().toString());
}
// Obtener y manejar los resultados de la consulta
TableResult result = queryJob.getQueryResults();
result.iterateAll().forEach(row -> {
String routineCatalog = row.get("routine_catalog").getStringValue();
String routineSchema = row.get("routine_schema").getStringValue();
String routineName = row.get("routine_name").getStringValue();
String routineType = row.get("routine_type").getStringValue();
String routineDefinition = row.get("routine_definition").getStringValue();
System.out.printf("Routine: %s.%s.%s (%s)\nDefinition: %s\n", routineCatalog, routineSchema, routineName, routineType, routineDefinition);
});
System.out.println("El procedimiento almacenado fue invocado con éxito.");
} catch (InterruptedException e) {
System.out.println("Error: " + e.getMessage());
Thread.currentThread().interrupt();
}
}
}
package main;
// Crear tabla
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.StandardTableDefinition;
import com.google.cloud.bigquery.TableDefinition;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
public class CreateTable {
public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
Schema schema =
Schema.of(
Field.of("stringField", StandardSQLTypeName.STRING),
Field.of("booleanField", StandardSQLTypeName.BOOL));
createTable(datasetName, tableName, schema);
}
public static void createTable(String datasetName, String tableName, Schema schema) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
TableId tableId = TableId.of(datasetName, tableName);
TableDefinition tableDefinition = StandardTableDefinition.of(schema);
TableInfo tableInfo = TableInfo.newBuilder(tableId, tableDefinition).build();
bigquery.create(tableInfo);
System.out.println("Table created successfully");
} catch (BigQueryException e) {
System.out.println("Table was not created. \n" + e.toString());
}
}
}
package main;
// Borrar tabla
https://cloud.google.com/bigquery/docs/samples/bigquery-delete-table
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.TableId;
public class DeleteTable {
public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
deleteTable(datasetName, tableName);
}
public static void deleteTable(String datasetName, String tableName) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
boolean success = bigquery.delete(TableId.of(datasetName, tableName));
if (success) {
System.out.println("Table deleted successfully");
} else {
System.out.println("Table was not found");
}
} catch (BigQueryException e) {
System.out.println("Table was not deleted. \n" + e.toString());
}
}
}
package main;
// Código ETL con Java y BigQuery
import com.google.cloud.bigquery.*;
import com.google.cloud.storage.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
public class ETLExample {
public static void main(String[] args) throws Exception {
// Configurar servicios BigQuery y Google Cloud Storage
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Storage storage = StorageOptions.getDefaultInstance().getService();
// Variables de configuración
String bucketName = "tu_bucket";
String fileName = "datos.csv";
String datasetName = "tu_dataset";
String tableName = "tu_tabla";
// Paso 1: Extracción - Leer archivo CSV desde Google Cloud Storage
Blob blob = storage.get(BlobId.of(bucketName, fileName));
String csvData = new String(blob.getContent(), StandardCharsets.UTF_8);
// Paso 2: Transformación - Procesar los datos
List<InsertAllRequest.RowToInsert> rows = new ArrayList<>();
String[] lines = csvData.split("\n");
for (String line : lines) {
if (!line.trim().isEmpty()) {
String[] fields = line.split(",");
if (fields.length == 4) {
String fecha = fields[0];
String clave = fields[1];
float porcentaje = Float.parseFloat(fields[2]);
float plazo = Float.parseFloat(fields[3]);
rows.add(InsertAllRequest.RowToInsert.of(
ImmutableMap.of(
"fecha", fecha,
"clave", clave,
"porcentaje", porcentaje,
"plazo", plazo
)
));
}
}
}
// Crear la tabla en BigQuery si no existe
TableId tableId = TableId.of(datasetName, tableName);
if (bigquery.getTable(tableId) == null) {
Schema schema = Schema.of(
Field.of("fecha", LegacySQLTypeName.DATE),
Field.of("clave", LegacySQLTypeName.STRING),
Field.of("porcentaje", LegacySQLTypeName.FLOAT),
Field.of("plazo", LegacySQLTypeName.FLOAT)
);
bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
}
// Paso 3: Carga - Insertar los datos en BigQuery
InsertAllResponse response = bigquery.insertAll(InsertAllRequest.newBuilder(tableId).setRows(rows).build());
if (response.hasErrors()) {
response.getInsertErrors().forEach((index, errors) ->
System.out.printf("Error al insertar fila %d: %s\n", index, errors));
} else {
System.out.println("Datos cargados correctamente en BigQuery.");
}
}
}
package main;
import com.google.cloud.bigquery.*;
import java.util.ArrayList;
import java.util.List;
public class InsertDataToBigQuery {
public static void main(String[] args) {
// Paso 1: Configurar el entorno
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Paso 2: Definir la estructura de la tabla si no existe
String datasetName = "tu_dataset";
String tableName = "datos";
TableId tableId = TableId.of(datasetName, tableName);
if (!bigquery.getTable(tableId).exists()) {
Schema schema = Schema.of(
Field.of("fecha", LegacySQLTypeName.DATE),
Field.of("clave", LegacySQLTypeName.STRING),
Field.of("porcentaje", LegacySQLTypeName.FLOAT),
Field.of("plazo", LegacySQLTypeName.FLOAT)
);
bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
}
// Paso 3: Iterar sobre la lista y preparar los datos
List<String> listTemp = new ArrayList<>();
listTemp.add("20240412,FYDY, 3242, 0.0000000");
listTemp.add("20240412,FYDY, 3442, 0.0000000");
listTemp.add("20240412,FYDY, 3242, 0.0000000");
listTemp.add("20240412,FYDY, 3542, 0.0000000");
listTemp.add("20240412,FYDY, 3222, 0.0000000");
for (String line : listTemp) {
String[] fields = line.split(",");
String fecha = fields[0];
String clave = fields[1];
float porcentaje = Float.parseFloat(fields[2]);
float plazo = Float.parseFloat(fields[3]);
// Paso 4: Insertar los datos en la tabla de BigQuery
InsertAllResponse response = bigquery.insertAll(
InsertAllRequest.newBuilder(tableId)
.addRow(ImmutableMap.of(
"fecha", fecha,
"clave", clave,
"porcentaje", porcentaje,
"plazo", plazo
))
.build()
);
if (response.hasErrors()) {
System.out.println("Error al insertar los datos en BigQuery: " + response.getInsertErrors());
} else {
System.out.println("Datos insertados correctamente en BigQuery.");
}
}
}
}
package com.example.bigquery;
// [START bigquery_simple_app_all]
// [START bigquery_simple_app_deps]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.FieldValueList;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import java.util.UUID;
// [END bigquery_simple_app_deps]
public class SimpleApp {
public static void main(String... args) throws Exception {
// [START bigquery_simple_app_client]
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// [END bigquery_simple_app_client]
// [START bigquery_simple_app_query]
QueryJobConfiguration queryConfig =
QueryJobConfiguration.newBuilder(
"SELECT CONCAT('https://stackoverflow.com/questions/', "
+ "CAST(id as STRING)) as url, view_count "
+ "FROM `bigquery-public-data.stackoverflow.posts_questions` "
+ "WHERE tags like '%google-bigquery%' "
+ "ORDER BY view_count DESC "
+ "LIMIT 10")
// Use standard SQL syntax for queries.
// See: https://cloud.google.com/bigquery/sql-reference/
.setUseLegacySql(false)
.build();
// Create a job ID so that we can safely retry.
JobId jobId = JobId.of(UUID.randomUUID().toString());
Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
// Wait for the query to complete.
queryJob = queryJob.waitFor();
// Check for errors
if (queryJob == null) {
throw new RuntimeException("Job no longer exists");
} else if (queryJob.getStatus().getError() != null) {
// You can also look at queryJob.getStatus().getExecutionErrors() for all
// errors, not just the latest one.
throw new RuntimeException(queryJob.getStatus().getError().toString());
}
// [END bigquery_simple_app_query]
// [START bigquery_simple_app_print]
// Get the results.
TableResult result = queryJob.getQueryResults();
// Print all pages of the results.
for (FieldValueList row : result.iterateAll()) {
// String type
String url = row.get("url").getStringValue();
String viewCount = row.get("view_count").getStringValue();
System.out.printf("%s : %s views\n", url, viewCount);
}
// [END bigquery_simple_app_print]
}
}
package main;
// Extraer y cargar datos
import com.google.cloud.bigquery.*;
import com.google.cloud.storage.*;
import java.nio.file.Files;
import java.nio.file.Paths;
public class UploadCsvToBigQuery {
public static void main(String[] args) throws Exception {
// Configurar el entorno
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Storage storage = StorageOptions.getDefaultInstance().getService();
// Definir nombres del bucket y archivo
String bucketName = "tu_bucket";
String fileName = "datos.csv";
String datasetName = "tu_dataset";
String tableName = "tu_tabla";
// Crear la tabla en BigQuery si no existe
TableId tableId = TableId.of(datasetName, tableName);
if (bigquery.getTable(tableId) == null) {
Schema schema = Schema.of(
Field.of("fecha", LegacySQLTypeName.DATE),
Field.of("clave", LegacySQLTypeName.STRING),
Field.of("porcentaje", LegacySQLTypeName.FLOAT),
Field.of("plazo", LegacySQLTypeName.FLOAT)
);
TableDefinition tableDefinition = StandardTableDefinition.of(schema);
bigquery.create(TableInfo.newBuilder(tableId, tableDefinition).build());
}
// Definir la URI del archivo en Google Cloud Storage
String uri = String.format("gs://%s/%s", bucketName, fileName);
// Configurar la carga de datos en BigQuery
LoadJobConfiguration loadConfig = LoadJobConfiguration.newBuilder(
tableId, uri, FormatOptions.csv())
.setIgnoreUnknownValues(true) // Opcional: Ignorar valores desconocidos
.setSkipLeadingRows(1) // Opcional: Saltar la primera fila si es un encabezado
.build();
// Ejecutar la carga de datos
Job loadJob = bigquery.create(JobInfo.of(loadConfig));
loadJob = loadJob.waitFor();
if (loadJob.isDone()) {
System.out.println("Archivo CSV cargado correctamente en la tabla de BigQuery.");
} else {
System.out.println("Error al cargar el archivo CSV en la tabla de BigQuery: " + loadJob.getStatus().getError());
}
}
}
package main;
// Un ejemplo de cómo subir un archivo TXT a una tabla de BigQuery utilizando Java, BigQuery y Google Cloud Storage:
import com.google.cloud.bigquery.*;
import com.google.cloud.storage.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
public class UploadTxtToBigQuery {
public static void main(String[] args) throws IOException, InterruptedException {
// Paso 1: Configurar el entorno
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
Storage storage = StorageOptions.getDefaultInstance().getService();
// Paso 2: Preparar el archivo y la tabla
String datasetName = "your_dataset";
String tableName = "your_table";
String bucketName = "your_bucket";
String fileName = "your_file.txt";
String filePath = "path/to/your/file.txt";
// Crear la tabla en BigQuery si no existe
TableId tableId = TableId.of(datasetName, tableName);
if (!bigquery.getTable(tableId).exists()) {
Schema schema = Schema.of(Field.of("column_name", LegacySQLTypeName.STRING));
bigquery.create(TableInfo.of(tableId, StandardTableDefinition.of(schema)));
}
// Paso 3: Cargar el archivo en Google Cloud Storage
BlobId blobId = BlobId.of(bucketName, fileName);
BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();
storage.create(blobInfo, Files.readAllBytes(Paths.get(filePath)));
// Paso 4: Cargar los datos del archivo en la tabla de BigQuery
String uri = "gs://" + bucketName + "/" + fileName;
Job job = bigquery.load(LoadJobConfiguration.newBuilder(tableId, uri, FormatOptions.csv()).build());
job = job.waitFor();
if (job != null && job.getStatus().getError() == null) {
System.out.println("Archivo TXT cargado correctamente en la tabla de BigQuery.");
} else {
System.out.println("Error al cargar el archivo TXT en la tabla de BigQuery: " + job.getStatus().getError());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment