Skip to content

Instantly share code, notes, and snippets.

View tmnd1991's full-sized avatar
🌴

Antonio Murgia tmnd1991

🌴
View GitHub Profile
#!/bin/bash
uv run --no-project - "$@" <<'EOF'
import base64
import mimetypes
import urllib.request
import urllib.error
import urllib.parse
from pathlib import Path
import re
import ibis
from ibis import BaseBackend, Table
from pyspark.errors import AnalysisException
from pyspark.sql import SparkSession
import pyarrow.compute as pc
def main():
# con, db = polars_backend(None)
con, db = spark_backend("not_default")
@tmnd1991
tmnd1991 / CustomLoggerFactory.java
Created January 14, 2025 14:53
How to add an appName to structured logging in the context of multiple tasks run by a single spark context
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.core.Appender;
import org.apache.logging.log4j.core.LoggerContext;
import org.apache.logging.log4j.core.appender.ConsoleAppender;
import org.apache.logging.log4j.layout.template.json.JsonTemplateLayout;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
# gpt aided implementation to understand the best time slots to play beach volley between friends
from pulp import LpProblem, LpVariable, lpSum, LpMaximize
import pulp
import time
import random
# Dati
voti = {
"slot1": {"A", "B", "C", "D", "E", "F"},
"slot2": {"A", "B", "C", "F", "G"},
@tmnd1991
tmnd1991 / DoItWhileYouHaveTime.scala
Created January 25, 2024 11:57
Simple scala util function to execute actions until there's time
import java.time.Clock
import java.util.Date
import scala.annotation.tailrec
import scala.concurrent.duration._
object DoItWhileYouHaveTime {
def doItWhileYouHaveTime(
clock: Clock,
deadline: Long,
expectation: FiniteDuration,
Component 6.13.0 6.0.1
Spark 3.4.1 2.4.4
Hadoop 3.3.3 3.2.1
Zookeeper 3.5.10 3.4.14
rm=yarn
conf=$(curl -sB http://$rm:9870/conf)
nn1=$(echo $conf | xmllint --xpath "//property[name='dfs.namenode.rpc-address.ha-nn-uri.nn1']/value/text()" -)
nn2=$(echo $conf | xmllint --xpath "//property[name='dfs.namenode.rpc-address.ha-nn-uri.nn2']/value/text()" -)
nn3=$(echo $conf | xmllint --xpath "//property[name='dfs.namenode.rpc-address.ha-nn-uri.nn3']/value/text()" -)
nns=($nn1, $nn2, $nn3)
nameNode=''
for nn in $nn1 $nn2 $nn3; do
if hdfs dfs -fs hdfs://$nn -ls / > /dev/null 2>&1; then
%sh
# deletes files and folders older than 7 days
staging="/var/log/spark/apps/"
hdfs dfs -du -h -s $staging
hdfs dfs -ls -t $staging | awk -v now=$(date +%s) '{if ((now-mktime(gensub("-", " ", "g", $6) " 00 00 00"))/86400 > 7) print $8}' | xargs hdfs dfs -rm -r -skipTrash
hdfs dfs -du -h -s $staging
<html>
<head>
<script type="text/javascript"
src="https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.26.0/moment-with-locales.min.js"></script>
<!-- <script type="text/javascript" src="script.js"></script> -->
<script type="text/javascript">
const Weeks = Object.freeze({ FirstWeek: 1, SecondWeek: 2, ThirdWeek: 3, FourthWeek: 4 })
const Days = Object.freeze({ Monday: 1, Tuesday: 2, Wednesday: 3, Thursday: 4, Friday: 5, Saturday: 6, Sunday: 6 })
function firstMonday(m, y) {
import java.io.ByteArrayOutputStream
import com.typesafe.config.Config
import it.agilelab.darwin.manager.AvroSchemaManagerFactory
import it.agilelab.darwin.manager.util.AvroSingleObjectEncodingUtils
import org.apache.avro.Schema
import org.apache.avro.file.SeekableByteArrayInput
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{BinaryDecoder, BinaryEncoder, DecoderFactory, EncoderFactory}