argonism/IIR_1_1.3.py

## IIR_1_1.3.py
# p: posting list
# p must be like [docID_1, docID_2, docID_3, ...]
def intersect(p1, p2):
  answer = []
  p1_idx = 0
  p2_idx = 0
  while len(p1) > p1_idx and len(p2) > p2_idx:
    if p1[p1_idx] == p2[p2_idx]:
      answer.append(p1[p1_idx])
      p1_idx += 1
      p2_idx += 1
    else:
      if p1[p1_idx] < p2[p2_idx]:
        p1_idx += 1
      else:
        p2_idx += 1
  return answer

reverse_index = {
  'python': {3: [1, 2, 3]},
  'ruby': {6: [1, 3, 4, 6, 7, 11]},
  'perl': {5: [1, 4, 5, 6, 12]}
}

def intersect2(terms):
  # 文書頻度で昇順ソート
  def sort_by_increasing_frequency(terms):
    sorted_terms = sorted(terms, key=lambda term: list(reverse_index[term].keys())[0])
    return sorted_terms
  # 用語のポスティングリストを返す
  def postings(term):
    return list(reverse_index[term].values())[0]
  # 最初の要素
  def first(terms):
    return terms[0]
  # 最初以外の要素
  def rest(terms):
    return terms[1:]

  terms = sort_by_increasing_frequency(terms)
  result = postings(first(terms))
  terms = rest(terms)
  while not terms == [] and not result == []:
    result = intersect(result, postings(first(terms)))
    terms = rest(terms)
  return result


if __name__ == "__main__":
  test_p1 = [1, 2, 3, 6, 7, 20, 44, 32, 59]
  test_p2 = [1, 2, 5, 7, 8, 10, 20, 45, 59]

  answer = intersect(test_p1, test_p2)
  print(answer)
  #
  # expected to be [1, 2, 7, 20, 59]
  #

  terms = ['python', 'ruby', 'perl']
  match_postings = intersect2(terms)
  print(match_postings)
  #
  # expected to be [1]
  #
	# p: posting list
	# p must be like [docID_1, docID_2, docID_3, ...]
	def intersect(p1, p2):
	answer = []
	p1_idx = 0
	p2_idx = 0
	while len(p1) > p1_idx and len(p2) > p2_idx:
	if p1[p1_idx] == p2[p2_idx]:
	answer.append(p1[p1_idx])
	p1_idx += 1
	p2_idx += 1
	else:
	if p1[p1_idx] < p2[p2_idx]:
	p1_idx += 1
	else:
	p2_idx += 1
	return answer

	reverse_index = {
	'python': {3: [1, 2, 3]},
	'ruby': {6: [1, 3, 4, 6, 7, 11]},
	'perl': {5: [1, 4, 5, 6, 12]}
	}

	def intersect2(terms):
	# 文書頻度で昇順ソート
	def sort_by_increasing_frequency(terms):
	sorted_terms = sorted(terms, key=lambda term: list(reverse_index[term].keys())[0])
	return sorted_terms
	# 用語のポスティングリストを返す
	def postings(term):
	return list(reverse_index[term].values())[0]
	# 最初の要素
	def first(terms):
	return terms[0]
	# 最初以外の要素
	def rest(terms):
	return terms[1:]

	terms = sort_by_increasing_frequency(terms)
	result = postings(first(terms))
	terms = rest(terms)
	while not terms == [] and not result == []:
	result = intersect(result, postings(first(terms)))
	terms = rest(terms)
	return result


	if __name__ == "__main__":
	test_p1 = [1, 2, 3, 6, 7, 20, 44, 32, 59]
	test_p2 = [1, 2, 5, 7, 8, 10, 20, 45, 59]

	answer = intersect(test_p1, test_p2)
	print(answer)
	#
	# expected to be [1, 2, 7, 20, 59]
	#

	terms = ['python', 'ruby', 'perl']
	match_postings = intersect2(terms)
	print(match_postings)
	#
	# expected to be [1]
	#