P1n3appl3/apistuff.py

## apistuff.py
import httplib2
import sys
import urllib2
import xml.etree.ElementTree as ET

# 1. To get all of this set up, you need python 2.7 and pip
# 2. Run "pip install --upgrade google-api-python-client" to get the dependencies
# 3. Go to console.cloud.google.com and create a new project
# 4. Go to API's and Services, and generate an OAuth client ID. Download the json
#   it gives you and place it in the same folder as this script. Make sure to name
#   it "client_secrets.json"
# 5  Go to https://www.googleapis.com/discovery/v1/apis/youtube/v3/rest and save the page as a json file
# 6. Rename the file "youtube-v3-api-captions.json" and put it in the same folder as this script

from apiclient.discovery import build_from_document
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import argparser, run_flow

CLIENT_SECRETS_FILE = "client_secrets.json"
YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"


def get_authenticated_service(args):
    flow = flow_from_clientsecrets(
        CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE)

    storage = Storage("%s-oauth2.json" % sys.argv[0])
    credentials = storage.get()

    if credentials is None or credentials.invalid:
        credentials = run_flow(flow, storage, args)

    with open("youtube-v3-api-captions.json", "r") as f:
        doc = f.read()
        return build_from_document(
            doc, http=credentials.authorize(httplib2.Http()))


def remove_empty_kwargs(**kwargs):
    good_kwargs = {}
    if kwargs is not None:
        for key, value in kwargs.iteritems():
            if value:
                good_kwargs[key] = value
    return good_kwargs


def video_categories_list(client, **kwargs):
    kwargs = remove_empty_kwargs(**kwargs)

    response = client.videoCategories().list(**kwargs).execute()
    results = []
    for i in response["items"]:
        results.append((i["id"], i["snippet"]["title"]))
    return results


def videos_list_most_popular(client, **kwargs):
    kwargs = remove_empty_kwargs(**kwargs)

    response = client.videos().list(**kwargs).execute()
    results = []
    for i in response["items"]:
        results.append((i["id"], i["snippet"]["channelId"]))
    return results


def search(client, **kwargs):
    kwargs = remove_empty_kwargs(**kwargs)
    response = client.search().list(**kwargs).execute()
    results = []
    for i in response["items"]:
        if i["id"]["kind"] == "youtube#video":
            results.append(i["id"]["videoId"])
    return results


def check_captions(vidId):
    site = "http://video.google.com/timedtext?lang=en&v="
    response = urllib2.urlopen(site + vidId)
    raw = response.read()
    if len(raw) == 0:
        return False
    return True


def get_trending_vids(client, thoroughness=10):
    categories = video_categories_list(client, part='snippet', regionCode='US')
    results = []
    for cat in categories + [('', "no category")]:
        top_results = []
        try:
            top_results = videos_list_most_popular(
                client,
                part='snippet',
                chart='mostPopular',
                regionCode='US',
                videoCategoryId=cat[0],
                maxResults=thoroughness)
        except:
            print "Couldn't get videos in category:", cat[1]
            continue
        hits = 0
        for vid in top_results:
            if check_captions(vid[0]):
                results.append(vid[1])
                hits += 1
        print hits, '/', len(top_results), cat[1]
    f = open("potential.txt", 'w')
    for i in results:
        f.write(i + '\n')
    f.close()


def filter_channels(client, potential, thoroughness=10, threshold=.5):
    good_channels = []
    for channel in potential:
        vids = search(
            youtube,
            part='snippet',
            maxResults=thoroughness,
            channelId=channel.strip(),
            order="date")
        hits = [check_captions(i) for i in vids].count(True)
        print hits, '/', len(vids)
        if float(hits) / len(vids) >= threshold:
            good_channels.append(channel)

    print len(good_channels), "of those had a good ammount of captions"
    f = open("results.txt", 'a')
    for i in good_channels:
        f.write(i)
    f.close()


youtube = get_authenticated_service(argparser.parse_args())

get_trending_vids(youtube, 10)

f = open("potential.txt")
potential = set(f.readlines())
f.close()

print "found", len(potential), "potential channels..."

filter_channels(youtube, potential, 10, .5)
	import httplib2
	import sys
	import urllib2
	import xml.etree.ElementTree as ET

	# 1. To get all of this set up, you need python 2.7 and pip
	# 2. Run "pip install --upgrade google-api-python-client" to get the dependencies
	# 3. Go to console.cloud.google.com and create a new project
	# 4. Go to API's and Services, and generate an OAuth client ID. Download the json
	# it gives you and place it in the same folder as this script. Make sure to name
	# it "client_secrets.json"
	# 5 Go to https://www.googleapis.com/discovery/v1/apis/youtube/v3/rest and save the page as a json file
	# 6. Rename the file "youtube-v3-api-captions.json" and put it in the same folder as this script

	from apiclient.discovery import build_from_document
	from apiclient.errors import HttpError
	from oauth2client.client import flow_from_clientsecrets
	from oauth2client.file import Storage
	from oauth2client.tools import argparser, run_flow

	CLIENT_SECRETS_FILE = "client_secrets.json"
	YOUTUBE_READ_WRITE_SSL_SCOPE = "https://www.googleapis.com/auth/youtube.force-ssl"
	YOUTUBE_API_SERVICE_NAME = "youtube"
	YOUTUBE_API_VERSION = "v3"


	def get_authenticated_service(args):
	flow = flow_from_clientsecrets(
	CLIENT_SECRETS_FILE, scope=YOUTUBE_READ_WRITE_SSL_SCOPE)

	storage = Storage("%s-oauth2.json" % sys.argv[0])
	credentials = storage.get()

	if credentials is None or credentials.invalid:
	credentials = run_flow(flow, storage, args)

	with open("youtube-v3-api-captions.json", "r") as f:
	doc = f.read()
	return build_from_document(
	doc, http=credentials.authorize(httplib2.Http()))


	def remove_empty_kwargs(**kwargs):
	good_kwargs = {}
	if kwargs is not None:
	for key, value in kwargs.iteritems():
	if value:
	good_kwargs[key] = value
	return good_kwargs


	def video_categories_list(client, **kwargs):
	kwargs = remove_empty_kwargs(**kwargs)

	response = client.videoCategories().list(**kwargs).execute()
	results = []
	for i in response["items"]:
	results.append((i["id"], i["snippet"]["title"]))
	return results


	def videos_list_most_popular(client, **kwargs):
	kwargs = remove_empty_kwargs(**kwargs)

	response = client.videos().list(**kwargs).execute()
	results = []
	for i in response["items"]:
	results.append((i["id"], i["snippet"]["channelId"]))
	return results


	def search(client, **kwargs):
	kwargs = remove_empty_kwargs(**kwargs)
	response = client.search().list(**kwargs).execute()
	results = []
	for i in response["items"]:
	if i["id"]["kind"] == "youtube#video":
	results.append(i["id"]["videoId"])
	return results


	def check_captions(vidId):
	site = "http://video.google.com/timedtext?lang=en&v="
	response = urllib2.urlopen(site + vidId)
	raw = response.read()
	if len(raw) == 0:
	return False
	return True


	def get_trending_vids(client, thoroughness=10):
	categories = video_categories_list(client, part='snippet', regionCode='US')
	results = []
	for cat in categories + [('', "no category")]:
	top_results = []
	try:
	top_results = videos_list_most_popular(
	client,
	part='snippet',
	chart='mostPopular',
	regionCode='US',
	videoCategoryId=cat[0],
	maxResults=thoroughness)
	except:
	print "Couldn't get videos in category:", cat[1]
	continue
	hits = 0
	for vid in top_results:
	if check_captions(vid[0]):
	results.append(vid[1])
	hits += 1
	print hits, '/', len(top_results), cat[1]
	f = open("potential.txt", 'w')
	for i in results:
	f.write(i + '\n')
	f.close()


	def filter_channels(client, potential, thoroughness=10, threshold=.5):
	good_channels = []
	for channel in potential:
	vids = search(
	youtube,
	part='snippet',
	maxResults=thoroughness,
	channelId=channel.strip(),
	order="date")
	hits = [check_captions(i) for i in vids].count(True)
	print hits, '/', len(vids)
	if float(hits) / len(vids) >= threshold:
	good_channels.append(channel)

	print len(good_channels), "of those had a good ammount of captions"
	f = open("results.txt", 'a')
	for i in good_channels:
	f.write(i)
	f.close()


	youtube = get_authenticated_service(argparser.parse_args())

	get_trending_vids(youtube, 10)

	f = open("potential.txt")
	potential = set(f.readlines())
	f.close()

	print "found", len(potential), "potential channels..."

	filter_channels(youtube, potential, 10, .5)