Last active
December 18, 2024 20:21
-
-
Save dewomser/0e9f374409c98f9d5fcee935008512f6 to your computer and use it in GitHub Desktop.
Googlequery output as csv a Bash 1-liner
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Setze die Variablen | |
query="linux%20worms" | |
output_file="output.csv" | |
count=0 | |
# Erstelle die CSV-Datei und füge die Header-Zeile hinzu | |
echo "Zeilenummer;URL" > "$output_file" | |
# Starte die Google-Suche und verarbeite die Ergebnisse | |
lynx -dump -cookies -accept_all_cookies --display_charset=utf-8 "https://google.com/search?q=${query// /+}" | | |
while IFS= read -r line; do | |
if [[ $line == *"https://www.google.com/url?q="* ]]; then | |
url="${line##*https://www.google.com/url?q=}" | |
url="${url%%&sa=*}" | |
if [[ $url != *google* ]]; then | |
# Nummeriere die Zeilen | |
count=$((count + 1)) | |
line_num=$(printf "%05d" "$count") | |
echo "$line_num;${url}" >> "$output_file" | |
fi | |
fi | |
done | |
echo "CSV-Datei erstellt: $output_file" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# mit http | |
# funktioniert sofort. | |
query="linux%20worms";lynx -dump -cookies="" --display_charset=utf-8 "http://google.com/search?q=$(echo $query | sed 's/ /+/g')"|grep '^[[:blank:]][[:blank:]].[[:digit:]][[:punct:]]'| grep http://www.google.com/url\?q=http | sed -e 's/http:\/\/www.google.com\/url?q=//g' |sed -e 's/&sa=.*$//g'|grep -v google | cut -c5- | sed -rn "=;p;"| sed -r "N;s/\n//" | sed -e "s/\<\([0-9]\{1,2\}\)\>/0000\1/; s/\<0*\([0-9]\{2\}\)\>/\1/" |sed -e "s/^/ /g"| sed -e "s/\./;/1" > googlequery.csv | |
# mit https und cookies. | |
# Damit die Zeile funktioniert: | |
# zuerst "lynx-cookies -accept_all_cookies https://www.google.com" | |
# dann das Zeugs bestätigen damit alle Cookies persistent gesetzt sind | |
# danach funktioniert diese Zeile solange die Cookies nicht gelöscht werden. | |
query="linux%20worms";lynx -dump -cookies -accept_all_cookies --display_charset=utf-8 "https://google.com/search?q=$(echo $query | sed 's/ /+/g')"|grep '^[[:blank:]][[:blank:]].[[:digit:]][[:punct:]]'| grep https://www.google.com/url\?q=http | sed -e 's/https:\/\/www.google.com\/url?q=//g' |sed -e 's/&sa=.*$//g'|grep -v google | cut -c5- | sed -rn "=;p;"| sed -r "N;s/\n//" | sed -e "s/\<\([0-9]\{1,2\}\)\>/0000\1/; s/\<0*\([0-9]\{2\}\)\>/\1/" |sed -e "s/^/ /g"| sed -e "s/\./;/1" > output.csv |
https Version mit cookies.
idee für den Text
lynx -dump -cookies -accept_all_cookies --display_charset=utf-8 "https://google.com/search?q=$(echo Worms | sed 's/ /+/g')"|grep '[[[:digit:]]' | sed 's/[/\n[/g'| sed '/^[[:space:]]$/d'| sed 's/^[[:space:]].//g'| sed '/^$/d' > output.csv
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Trennzeichen ist ;

Parameter werden it dem Aufruf übergeben.
Beispiel : google_query_csv.sh "wie funktioniert das Internet"
LInks werden jetzt in korrektem UTF-8 angezeigt