Boot up with an Fedora Live USB stick.
- Run
vgs
to check if there's any space:
$ sudo vgs
VG #PV #LV #SN Attr VSize VFree
fedora 1 3 0 wz--n- <237.28g 0
# %% | |
import httpx | |
import pandas as pd | |
# %% Read CSV and rename headers | |
websites = pd.read_csv("resources/popular_websites.csv", index_col=0) | |
print(websites) | |
# %% Define function to check connection |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.iceberg.*; | |
import org.apache.iceberg.catalog.Catalog; | |
import org.apache.iceberg.catalog.TableIdentifier; | |
import org.apache.iceberg.data.GenericRecord; | |
import org.apache.iceberg.data.IcebergGenerics; | |
import org.apache.iceberg.data.Record; | |
import org.apache.iceberg.data.parquet.GenericParquetWriter; | |
import org.apache.iceberg.hadoop.HadoopCatalog; | |
import org.apache.iceberg.io.CloseableIterable; |
# Copyright 2020 Google LLC | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# https://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, |
/***************************** COPYRIGHT NOTICES *********************** | |
Some of this code is based on metaphone.c file, which can be found here: | |
http://www2.varzeapaulista.sp.gov.br/metaphone/ | |
The metaphone port is authored by Carlos Costa Jordao <carlosjordao@gmail.com> | |
and is covered under this copyright: | |
Copyright 2014, Carlos Costa Jordao <carlosjordao@gmail.com>. | |
All rights reserved. |
<!DOCTYPE html> | |
<html> | |
<head> | |
<!-- --> | |
<meta charset="UTF-8"> | |
<title>Teste de Módulo Metaphone</title> | |
<!-- CSS: Alloy Bootstrap --> | |
<link href="http://cdn.alloyui.com/2.0.0/aui-css/css/bootstrap.min.css" rel="stylesheet"> | |
<!-- O loader do AUI fica em /js/aui/aui e os diversos modulos em /js/aui --> | |
<script charset="utf8" src="http://cdn.alloyui.com/2.0.0/aui/aui-min.js"></script> |
// UserDefinedAggregateFunction is the contract to define | |
// user-defined aggregate functions (UDAFs) | |
class MyCountUDAF extends UserDefinedAggregateFunction { | |
// Este método abaixo define pode ser invocado apenas assim: inputSchema(0) | |
// Isto é feito via inversão de dependência pelo Spark | |
// o retorno é um objeto StructField assim: | |
// StructField("id", LongType, true, {}) | |
// o objeto StructField é do pacote org.apache.spark.sql.types | |
override def inputSchema: StructType = { | |
new StructType().add("id", LongType, nullable = true) |
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.sql.SparkSession.Builder | |
import org.apache.spark.SparkContext | |
import org.apache.log4j.{Level, Logger} | |
// A sparkSession é provida pelo proprio Spark Shell | |
// O nivel de log também já é configurado pela Spark Shell | |
def boolean_udf_wrapper(a:String, b:String, t:Any): Boolean = { true } | |
def string_udf_wrapper(a:String, b:String, t:Any): String = { "••••" } | |
import org.apache.spark.sql.functions.expr | |
import org.apache.spark.sql.functions.sum |