Created
December 30, 2019 14:01
-
-
Save vpnwall-services/624e392053ca62fe10669b74f20ea51b to your computer and use it in GitHub Desktop.
[Instagram Web Retriever] Instagram web content retriever to json object #instagram #web #retriever #bash #script #curl #json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
CUSTOMER_NAME=dove | |
URL_INSTA="https://www.instagram.com/" | |
PIC_SIZE="640x640" | |
# Fill custerlist.txt with customer ID (one by line) | |
# create empty final json file | |
while read CUSTOMER_NAME | |
do | |
# sleep 60 | |
mkdir -p epresspack-instagram-feed/${CUSTOMER_NAME} | |
echo -e "[" > epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e "\n" >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
curl -L -X GET ${URL_INSTA}${CUSTOMER_NAME} > ${CUSTOMER_NAME}-list | |
cat ${CUSTOMER_NAME}-list| grep -o -P '(?<=sharedData = ).*(?=</script>)' > ${CUSTOMER_NAME}-object.json | |
cat ${CUSTOMER_NAME}-object.json | jq .entry_data.ProfilePage[].graphql.user[] > ${CUSTOMER_NAME}-object2.json | |
cat ${CUSTOMER_NAME}-object2.json | jq .edges[].node.id | tr -d '"'> ${CUSTOMER_NAME}-postsid.txt | |
echo "$(sort -u ${CUSTOMER_NAME}-postsid.txt)" > ${CUSTOMER_NAME}-postsid.txt | |
while read ID | |
do | |
echo $ID | |
cat ${CUSTOMER_NAME}-object.json | jq --arg ID "$ID" '.entry_data.ProfilePage[].graphql.user[]' > ${CUSTOMER_NAME}-object2.json | |
cat ${CUSTOMER_NAME}-object2.json | jq --arg ID "$ID" '.edges[]|select(.node.id == $ID)'|sed 's/^{//' > ${CUSTOMER_NAME}-object-results.json | |
sed -i '1 i\{' ${CUSTOMER_NAME}-object-results.json | |
SHORTCODE=$(cat ${CUSTOMER_NAME}-object-results.json|jq .node.shortcode) | |
# PIC_LINK=$(cat ${CUSTOMER_NAME}-object-results.json|jq -c '.node.thumbnail_resources[]|select(.config_width == 640)|select(.config_height == 640)|.src') | |
PIC_LINK=$(cat ${CUSTOMER_NAME}-object-results.json|jq -c '.node.thumbnail_src') | |
TIMESTAMP=$(cat ${CUSTOMER_NAME}-object-results.json|jq -c '.node.taken_at_timestamp') | |
# CLEAN VARS | |
SHORTCODE=$(echo $SHORTCODE|tr -d '"') | |
echo -e " {" >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e " \"instaLink\": \"${URL_INSTA}/p/${SHORTCODE}\"," >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e " \"instaPic\": $PIC_LINK," >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e " \"instaId\": \"$ID\"," >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e " \"timestamp\": \"$TIMESTAMP\"" >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e " }," >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
done <${CUSTOMER_NAME}-postsid.txt | |
sed -i '$d' epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e " }" >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
echo -e "]" >> epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
mv epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}-temp.json | |
cat epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}-temp.json | jq '.|=sort_by(.timestamp) | reverse' > epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}.json | |
rm -rf epresspack-instagram-feed/${CUSTOMER_NAME}/${CUSTOMER_NAME}-temp.json | |
rm -rf ${CUSTOMER_NAME}-list ${CUSTOMER_NAME}-object.json ${CUSTOMER_NAME}-object-results.json ${CUSTOMER_NAME}-postsid.txt ${CUSTOMER_NAME}-object2.json | |
done < customerlist.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment