flovv/scrapeGoogleImages.js

## scrapeGoogleImages.js
var url ='https://www.google.de/search?q=Yahoo+logo&source=lnms&tbm=isch&sa=X';
   var page = new WebPage()
    var fs = require('fs');

var vWidth = 1080;
var vHeight = 1920;

page.viewportSize = {
    width: vWidth ,
    height: vHeight
};

//Scroll throu!
var s = 0;
var sBase = page.evaluate(function () { return document.body.scrollHeight; });
page.scrollPosition = {
    top: sBase,
    left: 0
};

function sc() {
    var sBase2 = page.evaluate(function () { return document.body.scrollHeight; });
    if (sBase2 != sBase) {
        sBase = sBase2;
    }
    if (s> sBase) {
        page.viewportSize = {width: vWidth, height: vHeight};
        return;
    }
    page.scrollPosition = {
        top: s,
        left: 0
    };
    page.viewportSize = {width: vWidth, height: s};
    s += Math.min(sBase/20,400);
    setTimeout(sc, 110);
}

function just_wait() {
    setTimeout(function() {
            fs.write('1.html', page.content, 'w');
            phantom.exit();
    }, 2500);
}

page.open(url, function (status) {
	    sc();
        just_wait();
});


## scrapeGoogleImages.r


library(plyr)
library(reshape2)
require(rvest)


scrapeJSSite <- function(searchTerm){
  url <- paste0("https://www.google.de/search?q=",searchTerm, "&source=lnms&tbm=isch&sa=X")

  lines <- readLines("imageScrape.js")
  lines[1] <- paste0("var url ='", url ,"';")
  writeLines(lines, "imageScrape.js")

  ## Download website
  system("phantomjs imageScrape.js")

  pg <- read_html("1.html")
  files <- pg %>% html_nodes("img") %>% html_attr("src")
  df <- data.frame(images=files, search=searchTerm)
  return(df)
}


downloadImages <- function(files, brand, outPath="images"){
  for(i in 1:length(files)){
    download.file(files[i], destfile = paste0(outPath, "/", brand, "_", i, ".jpg"), mode = 'wb')
  }

}

### exchange the search terms here!
gg <- scrapeJSSite(searchTerm = "Adidas+logo")
downloadImages(as.character(gg$images), i)
	var url ='https://www.google.de/search?q=Yahoo+logo&source=lnms&tbm=isch&sa=X';
	var page = new WebPage()
	var fs = require('fs');

	var vWidth = 1080;
	var vHeight = 1920;

	page.viewportSize = {
	width: vWidth ,
	height: vHeight
	};

	//Scroll throu!
	var s = 0;
	var sBase = page.evaluate(function () { return document.body.scrollHeight; });
	page.scrollPosition = {
	top: sBase,
	left: 0
	};

	function sc() {
	var sBase2 = page.evaluate(function () { return document.body.scrollHeight; });
	if (sBase2 != sBase) {
	sBase = sBase2;
	}
	if (s> sBase) {
	page.viewportSize = {width: vWidth, height: vHeight};
	return;
	}
	page.scrollPosition = {
	top: s,
	left: 0
	};
	page.viewportSize = {width: vWidth, height: s};
	s += Math.min(sBase/20,400);
	setTimeout(sc, 110);
	}

	function just_wait() {
	setTimeout(function() {
	fs.write('1.html', page.content, 'w');
	phantom.exit();
	}, 2500);
	}

	page.open(url, function (status) {
	sc();
	just_wait();
	});


	library(plyr)
	library(reshape2)
	require(rvest)


	scrapeJSSite <- function(searchTerm){
	url <- paste0("https://www.google.de/search?q=",searchTerm, "&source=lnms&tbm=isch&sa=X")

	lines <- readLines("imageScrape.js")
	lines[1] <- paste0("var url ='", url ,"';")
	writeLines(lines, "imageScrape.js")

	## Download website
	system("phantomjs imageScrape.js")

	pg <- read_html("1.html")
	files <- pg %>% html_nodes("img") %>% html_attr("src")
	df <- data.frame(images=files, search=searchTerm)
	return(df)
	}


	downloadImages <- function(files, brand, outPath="images"){
	for(i in 1:length(files)){
	download.file(files[i], destfile = paste0(outPath, "/", brand, "_", i, ".jpg"), mode = 'wb')
	}

	}

	### exchange the search terms here!
	gg <- scrapeJSSite(searchTerm = "Adidas+logo")
	downloadImages(as.character(gg$images), i)