Skip to content

Instantly share code, notes, and snippets.

@gyakkun
Last active November 1, 2022 05:58
Show Gist options
  • Save gyakkun/fae171d09342af9b6f78a4d33c741ac9 to your computer and use it in GitHub Desktop.
Save gyakkun/fae171d09342af9b6f78a4d33c741ac9 to your computer and use it in GitHub Desktop.
#!/bin/bash
G_GIT_REPO_DIR=${BGM_GIT_REPO:-~/source/bgm-archive}
G_GROUP_TOPIC_AVOID_LIST=()
G_PWD=`pwd`
G_RET=""
BGM_RAUKEN_TOPICLIST_URL_TEMPLATE="https://%s/rakuen/topiclist?type=group"
BGM_RAUKEN_TOPICLIST_URL=
BGM_GROUP_TOPIC_URL_TEMPLATE="https://%s/m/topic/group"
BGM_GROUP_TOPIC_URL=
#BGM_DOMAIN_LIST=(bangumi.tv bgm.tv chii.in)
BGM_DOMAIN_LIST=(bgm.tv)
export http_proxy=192.168.3.15:1183
export https_proxy=192.168.3.15:1183
curlToFile() {
curl --connect-timeout 10 \
--max-time 10 \
--retry 6 \
--retry-delay 3 \
--retry-max-time 40 \
-s -L --output $2 $1
}
trimHtml() {
sed -i 's|<script.*</script>||g' $1
sed -i 's|^[ \t]*||g' $1
sed -i '/^$/d' $1
}
randBgmDomain() {
G_RET=${BGM_DOMAIN_LIST["$[RANDOM % ${#BGM_DOMAIN_LIST[@]}]"]};
}
currentTimeMills() {
G_RET=$(($(date +%s%N)/1000000))
}
currentTimeISO() {
G_RET=$(date -u +'%Y-%m-%dT%H:%M:%S.%3NZ')
}
command_exists() {
command -v "$1" >/dev/null 2>&1
}
mkdir -p $G_GIT_REPO_DIR/group
cd $G_GIT_REPO_DIR
echo `pwd`
git init
proxychains git pull
cd $G_PWD
echo `pwd`
currentTimeMills
echo $G_RET
currentTimeISO
echo $G_RET
randBgmDomain
echo random picked : $G_RET
printf -v BGM_RAUKEN_TOPICLIST_URL "$BGM_RAUKEN_TOPICLIST_URL_TEMPLATE" $G_RET
#echo after format $BGM_RAUKEN_TOPICLIST_URL $G_GIT_REPO_DIR/tmp.html
curlToFile $BGM_RAUKEN_TOPICLIST_URL $G_GIT_REPO_DIR/tmp.html
topic_list=`grep -Po '(?<=href="/rakuen/topic/group/)[0-9]+' $G_GIT_REPO_DIR/tmp.html | sort | uniq`
#Clear
> $G_GIT_REPO_DIR/group/topiclist.txt
for i in $topic_list
do
echo "$i" >> $G_GIT_REPO_DIR/group/topiclist.txt
done
topic_list=$(shuf -e "${topic_list[@]}")
topic_list=`grep -Po '(?<=href="/rakuen/topic/group/)[0-9]+' $G_GIT_REPO_DIR/tmp.html | sort | uniq | shuf`
echo $topic_list
for i in $topic_list
do
echo archiving group topic $i
if [[ " ${G_GROUP_TOPIC_AVOID_LIST[@]} " =~ " $i " ]]
then
echo $i is in AVOID LIST
else
ten_thousand=$(expr $i / 10000)
printf -v ten_thousand "%02d" $ten_thousand
hundred=$(expr $(expr $i % 10000) / 100)
printf -v hundred "%02d" $hundred
output_dir=$G_GIT_REPO_DIR/group/$ten_thousand/$hundred
output_loc=$output_dir/$i.html
mkdir -p $output_dir
randBgmDomain
printf -v BGM_GROUP_TOPIC_URL "$BGM_GROUP_TOPIC_URL_TEMPLATE" $G_RET
echo $BGM_GROUP_TOPIC_URL/$i to $output_loc
curlToFile $BGM_GROUP_TOPIC_URL/$i $output_loc
trimHtml $output_loc
if command_exists tidy; then
echo tidying
tidy --drop-empty-elements no \
--tidy-mark no \
--wrap 0 \
--sort-attributes alpha \
--quiet yes \
--show-warnings no \
-o $output_loc $output_loc
fi
sleep 1
fi
done
cd $G_GIT_REPO_DIR
git_commit_msg="GROUP TOPIC: "
currentTimeISO
git_commit_msg+=" $G_RET "
currentTimeMills
git_commit_msg+="| $G_RET"
rm -rf tmp.html
git add *
git commit --allow-empty -m "$git_commit_msg"
proxychains git push
cd $G_PWD
echo "success"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment