bin/kafka-topics.sh --zookeeper localhost:2181 --list
bin/kafka-topics.sh --zookeeper localhost:2181 --describe --topic mytopic
bin/kafka-topics.sh --zookeeper localhost:2181 --alter --topic mytopic --config retention.ms=1000
... wait a minute ...
1. Removing quotes and nested quotes: ^ {4}(\w+ wrote:(?:\n+(?: {4,}.*))*) | |
This will match quotes likes this one: | |
``` | |
user1 wrote: | |
user2 wrote: | |
I use Lens X on Camera Y. It's a super good combo. I have bought the lens station on Amazon. Upgraded to latest firmware and I have full ibis now. | |
I just got my 85mm and it does not work well on my Camera W. As others have noted, IBIS and BB+ shooting does not work. |
{ | |
"$schema": "https://raw.githubusercontent.com/jsonresume/resume-schema/v1.0.0/schema.json", | |
"basics": { | |
"name": "Viacheslav Rodionov", | |
"label": "BIG DATA architect", | |
"location": { | |
"city": "Munich", | |
"countryCode": "DE", | |
"region": "Bavaria" | |
}, |
SECRET_NAME=id_rsa | |
SECRET_PATH=~/.ssh/id_rsa | |
# store the secret content as an item in bitwarden | |
echo "{\"organizationId\":null,\"folderId\":null,\"type\":2,\"name\":\"${SECRET_NAME}\",\"notes\":\"$(base64 -w 0 ${SECRET_PATH})\",\"favorite\":false,\"fields\":[],\"login\":null,\"secureNote\":{\"type\":0},\"card\":null,\"identity\":null}" | bw encode | bw create item | |
bw sync # optional | |
# retrieve the secret | |
# assuming a single search result | |
bw list items --search id_rsa | jq -r '.[0].notes' | base64 -d > ${SECRET_PATH} | |
# in case you're using chezmoi here's a template that will retrieve that secret automatically | |
#$cat $(chezmoi source-path ${SECRET_PATH}) |
#!/usr/bin/env bash | |
topic-size() { kafka-log-dirs --command-config /opt/kafka/ssl/client.txt --bootstrap-server server:9093 --topic-list ${1} --describe | tail -n1 | jq '.brokers[0].logDirs[0].partitions | map(.size/1000000000) | add' | xargs echo ${1} =; } | |
list-topics() { kafka-topics --command-config /opt/kafka/ssl/client.txt --bootstrap-server server:9093 --list; } | |
export -f topic-size | |
TEMP_FILE=$(mktemp) | |
list-topics | xargs -I{} bash -c 'topic-size "{}"' > $TEMP_FILE | |
sort -g -k3 $TEMP_FILE | |
rm $TEMP_FILE |
# In case you're getting an error like: | |
# This is standby RM. The redirect url is: | |
# or (for yarn util): | |
# INFO client.INFO retry.RetryInvocationHandler: java.net.ConnectException: Call From xxx to yyy:pppp failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused, while invoking ApplicationClientProtocolPBClientImpl.getApplications over rm2 after 1 failover attempts. Trying to failover after sleeping for x ms.ConfiguredRMFailoverProxyProvider: Failing over to rm2 | |
# | |
# this workaround might be needed for multi-master setups (for exanple AWS EMR 5.x YARN has this issue) | |
ACTIVE_HOST=$(curl -s -i http://${HOSTNAME}:8088/ws/v1/cluster/metrics | grep "Location:" | grep http | cut -d' ' -f2 | cut -d'/' -f3 | cut -d':' -f1) | |
RM_HOSTNAME=${ACTIVE_HOST:-$HOSTNAME} | |
curl -s -L http://${RM_HOSTNAME}:8088/ws/v1/cluster/apps?state=RUNNING | jq -c ".apps.app[].id" | xargs yarn application --kill |
### based on great SO answers: https://stackoverflow.com/a/50593885/918211 and https://stackoverflow.com/a/46768243/918211 | |
## Debian/Ubuntu specific | |
# sudo apt install -y firefox-geckodriver | |
# python3 -m venv venv | |
# cd venv | |
# source bin/activate | |
# pip install selenium beautifulsoup4 |
bin/kafka-topics.sh --zookeeper localhost:2181 --list
bin/kafka-topics.sh --zookeeper localhost:2181 --describe --topic mytopic
bin/kafka-topics.sh --zookeeper localhost:2181 --alter --topic mytopic --config retention.ms=1000
... wait a minute ...
# for advanced users only | |
# sudo add-apt-repository ppa:ubuntu-toolchain-r/test | |
GCC_VERSION=11 # or whatever | |
sudo apt update | |
sudo apt install gcc-${GCC_VERSION} gcc-${GCC_VERSION}-locales gcc-${GCC_VERSION}-multilib g++-${GCC_VERSION} g++-${GCC_VERSION}-multilib cpp-${GCC_VERSION} | |
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${GCC_VERSION} 10 | |
# test with | |
g++-${GCC_VERSION} --version | |
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${GCC_VERSION} 10 | |
# test with |
#!/usr/bin/env bash | |
# This script generates a JSON structure that represents | |
# topic offsets per partition for a given moment of time | |
# this structure is very useful when used as a | |
# "startingOffsets" parameter in Spark Structured Streaming | |
shopt -s expand_aliases | |
#### YOU NEED TO SET UP THIS PART #### |
val path = "s3://some/dir" | |
val df = spark.read.parquet(path) | |
val df2 = df.select($"value") // suppose value is a string with JSON | |
val ds = df2.as[String] | |
val dsj = spark.read.json(ds) | |
val schema = dsj.schema // here is your schema | |
println(schema.json) | |
println(schema.toDDL) |