Boot up with an Fedora Live USB stick.
- Run
vgs
to check if there's any space:
$ sudo vgs
VG #PV #LV #SN Attr VSize VFree
fedora 1 3 0 wz--n- <237.28g 0
# %% | |
import httpx | |
import pandas as pd | |
# %% Read CSV and rename headers | |
websites = pd.read_csv("resources/popular_websites.csv", index_col=0) | |
print(websites) | |
# %% Define function to check connection |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.iceberg.*; | |
import org.apache.iceberg.catalog.Catalog; | |
import org.apache.iceberg.catalog.TableIdentifier; | |
import org.apache.iceberg.data.GenericRecord; | |
import org.apache.iceberg.data.IcebergGenerics; | |
import org.apache.iceberg.data.Record; | |
import org.apache.iceberg.data.parquet.GenericParquetWriter; | |
import org.apache.iceberg.hadoop.HadoopCatalog; | |
import org.apache.iceberg.io.CloseableIterable; |
# Copyright 2020 Google LLC | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# https://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, |
// UserDefinedAggregateFunction is the contract to define | |
// user-defined aggregate functions (UDAFs) | |
class MyCountUDAF extends UserDefinedAggregateFunction { | |
// Este método abaixo define pode ser invocado apenas assim: inputSchema(0) | |
// Isto é feito via inversão de dependência pelo Spark | |
// o retorno é um objeto StructField assim: | |
// StructField("id", LongType, true, {}) | |
// o objeto StructField é do pacote org.apache.spark.sql.types | |
override def inputSchema: StructType = { | |
new StructType().add("id", LongType, nullable = true) |
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.sql.SparkSession.Builder | |
import org.apache.spark.SparkContext | |
import org.apache.log4j.{Level, Logger} | |
// A sparkSession é provida pelo proprio Spark Shell | |
// O nivel de log também já é configurado pela Spark Shell | |
def boolean_udf_wrapper(a:String, b:String, t:Any): Boolean = { true } | |
def string_udf_wrapper(a:String, b:String, t:Any): String = { "••••" } | |
import org.apache.spark.sql.functions.expr | |
import org.apache.spark.sql.functions.sum |
def happyEmployees(salary: Int) => salary > 2200 | |
def smartTextCase(name: String) => name.toUpperCase() |
# Usando o Kubernetes com Docker in Docker (DIND) | |
sudo mkdir -p /usr/local | |
cd /usr/local | |
sudo mkdir dind-cluster | |
cd dind-cluster/ | |
sudo chmod o+w . | |
ls -lat .. | head | |
# wget https://cdn.rawgit.com/kubernetes-sigs/kubeadm-dind-cluster/master/fixed/dind-cluster-v1.10.sh | |
curl -O https://cdn.rawgit.com/kubernetes-sigs/kubeadm-dind-cluster/master/fixed/dind-cluster-v1.10.sh |