alangampel/web_search2_json_urls.py

## web_search2_json_urls.py
import urllib
import urllib.request
import urllib.parse
import json


print('\n')
print('----------------------------------')
print('Searching for internet matches')
print('----------------------------------')
# open file with greek content
print('\n')
url_str = input('File URL?   ')
response = urllib.request.urlopen(url_str).read()
response_str = response.decode("utf-8")


# buffer all text in the file
text_buf = ''
for line in response_str:
    text_buf += line

# tokenize

tokens = text_buf.split();

print('\n')
print('There are ' + str(len(tokens)) + ' tokens in the file')
print('All tokens from file:\n')
print(tokens)
print('\n')

# create string of x tokens

token_count_str = input('How many words in search phrase?  ')
token_count = int(token_count_str)

print('\n')

# create index into text tokens

token_idx = 0
num_tokens = len(tokens)
loop_termination_value = num_tokens - token_count

while token_idx < loop_termination_value:


    #delineate new query
    print ('-----------------------------------------------------------')

    print("Search number " + str (token_idx + 1))
    # concatenate the search string contents, i.e. the token_count words fromt the file
    i = 0
    content_str = ''
    while i < token_count:
        content_str += tokens[i + token_idx] + ' '
        i += 1

    print("Content string is:")
    print(content_str)
    print('')

    # build the URL
    url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&"
    query = url+content_str
    query = urllib.parse.urlencode({'q':content_str})
    print("URL query is: ")
    print('')
    print(url + query)
    print('')

    # get the response from google
    response = urllib.request.urlopen(url+query).read()
    json_data = json.loads(response.decode("utf-8"))
    print ("JSON data: ")
    print (json_data)
    data_present_test = json_data['responseData']
    if str(data_present_test) == 'None':
        print("No data for this query")
        token_idx += 1
        continue

    results = json_data['responseData']['results']

    # format the response and print it
    indx = 0
    for result in results:
        title = result['title']
        url = result['url']
        print("JSON result " , indx + 1, ':')
        print ( title + '; ' + url )
        print('')
        indx += 1

    print('\n')

    # increment token_idx so as to move to the next chunk of text in the file
    token_idx += 1

# end of while token_idx < loop_termination_value
	import urllib
	import urllib.request
	import urllib.parse
	import json


	print('\n')
	print('----------------------------------')
	print('Searching for internet matches')
	print('----------------------------------')
	# open file with greek content
	print('\n')
	url_str = input('File URL? ')
	response = urllib.request.urlopen(url_str).read()
	response_str = response.decode("utf-8")


	# buffer all text in the file
	text_buf = ''
	for line in response_str:
	text_buf += line

	# tokenize

	tokens = text_buf.split();

	print('\n')
	print('There are ' + str(len(tokens)) + ' tokens in the file')
	print('All tokens from file:\n')
	print(tokens)
	print('\n')

	# create string of x tokens

	token_count_str = input('How many words in search phrase? ')
	token_count = int(token_count_str)

	print('\n')

	# create index into text tokens

	token_idx = 0
	num_tokens = len(tokens)
	loop_termination_value = num_tokens - token_count

	while token_idx < loop_termination_value:


	#delineate new query
	print ('-----------------------------------------------------------')

	print("Search number " + str (token_idx + 1))
	# concatenate the search string contents, i.e. the token_count words fromt the file
	i = 0
	content_str = ''
	while i < token_count:
	content_str += tokens[i + token_idx] + ' '
	i += 1

	print("Content string is:")
	print(content_str)
	print('')

	# build the URL
	url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&"
	query = url+content_str
	query = urllib.parse.urlencode({'q':content_str})
	print("URL query is: ")
	print('')
	print(url + query)
	print('')

	# get the response from google
	response = urllib.request.urlopen(url+query).read()
	json_data = json.loads(response.decode("utf-8"))
	print ("JSON data: ")
	print (json_data)
	data_present_test = json_data['responseData']
	if str(data_present_test) == 'None':
	print("No data for this query")
	token_idx += 1
	continue

	results = json_data['responseData']['results']

	# format the response and print it
	indx = 0
	for result in results:
	title = result['title']
	url = result['url']
	print("JSON result " , indx + 1, ':')
	print ( title + '; ' + url )
	print('')
	indx += 1

	print('\n')

	# increment token_idx so as to move to the next chunk of text in the file
	token_idx += 1

	# end of while token_idx < loop_termination_value