HiroshiMatsumoto/021.py

## 021.py
#! /usr/bin/env python
# -*- python -*-
# -*- encoding: utf-8 -*-

import sys

for line in sys.stdin.readline().split("."):
    #文字列先頭の空白部(' ')除去
    print line+"."

## 022.py
#! /usr/bin/env python
# -*- python -*-
# -*- encoding: utf-8 -*-
# Task:標準入力から英語のテキストを読み込み，ピリオド→スペース→大文字を文の区切りと見なし，１行１文の形式で標準出力に書き出せ．

import sys
import re

for line in sys.stdin.readline().split("."):
    print re.match("\.\s[A-Z]",line)

## 024.py
#! /usr/bin/env python
# -*- python -*-
# -*- encoding: utf-8 -*-
import re

#f=open("test2.txt","r")
f=open("j98_1002.txt","r")
w=open("024out.txt","w")
line = f.readline()
while line:
    line = line.strip("\n")
    word = re.findall("\s*,?([\(\)\<\>\{\}]|[^\".\n][\w'-]*)[\s\.,]?", line)
    #word = re.findall("\s*,?([^\".\n][\w'-]*)[\s\.,]?", line) #proto#1
    #word = re.findall("([^,\n]\w+)", line)
    #print word
    for i in range(len(word)):
        if len(line):
            print word[i]
            w.write(word[i])
            w.write("\n")
    print "\n",
    w.write("\n")
    line = f.readline()
f.close()
w.close()

## 025.py
#!/usr/bin/env python
import re

fopen = open("024out.txt","r")
fwrite = open("025out.txt","w")
word = fopen.readline()
while word:
    #word = "\n" #for debug
    if not re.match("^\s+\n$",word):
        word = word.strip("\n")
        line = str(word)+"\t"+str(word.lower())+"\n"
        print line,
        fwrite.write(line)
    word = fopen.readline()
    #break #for debug
fopen.close()
fwrite.close()


## 026.py
#!/usr/bin/env python
import re
#inFile = open("025out.txt","r")
inFile = open("test2.txt","r")
outFile = open("026out.txt","w")

Word = inFile.readline()
setWord = set()
while Word:
    #Getting lowered words (located after original_word \t)
    Word.strip("\n")
    pairWord = re.match("^(.*)\t(.*)$",Word)
    loweredWord = pairWord.group(2)
    #print loweredWord
    setWord.add(loweredWord)
    #print listWord
    Word = inFile.readline()
    #break #for debug

listWord = list(setWord)
listSuffix_ly = set()
#making a list of  -ly words
for i in range(len(listWord)):
    matched = re.match("^(.*)ly$",listWord[i])
    if matched:
        listSuffix_ly.add(matched.group(1))
        #print matched.group(1)
#picking -ness words out of listWord and checking matched with listSuffix_ly
for i in range(len(listWord)):
    matched = re.match("^(.*)ness$",listWord[i])
    if matched and matched.group(1) in listSuffix_ly:
        print matched.group(1)

inFile.close()
outFile.close()


## 027.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-

from f010 import RankList

outFile = open("027out.txt","w")
RankedList = RankList("025out.txt")
#ref: http://blog.livedoor.jp/yawamen/archives/51492355.html
for key, value in sorted(RankedList.items(), key=lambda x:x[1]):
    print "%s:%d" % (key, value)

## 028.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import re

def Ngram(Content, N):
    FreqNgramList = {}
    for line in Content:
        for i in range(len(line)):
            if i+N < len(line):
                word = line[i:i+N]#line[i:i+N] iからi+N-1までの文字列
                if FreqNgramList.has_key(word):
                    FreqNgramList[word]+=1
                else:
                    FreqNgramList[word]=0
    return FreqNgramList

N = 2
Content = open("025out.txt","r")
FreqBigramList = Ngram(Content, N)
Content.close()
for key, value in sorted(FreqBigramList.items(), key=lambda x:x[1]):
    print r"%s:%d" % (key, value)

## 029.py
#! /usr/bin/env python
#-*- coding:utf-8 -*-

from stemming.porter2 import stem
print stem("factionally")

## 030.py
#! /usr/bin/env python
#-*- coding:utf-8 -*-

from stemming.porter2 import stem
import re
inFile = open("025out.txt","r")
outFile = open("030out.txt", "w")

for Line in inFile:
    Line = Line.strip("\n")
    Words = re.match("^(\w*)\t(\w*)$",Line)
    #NewLine = Words.group(1)+"\t"+Words.group(2)+"\t"+stem(Words.group(2))
    if Words:
        outFile.write(Words.group(1)+"\t"+Words.group(2)+"\t"+stem(Words.group(2))+"\n")

inFile.close()
outFile.close()
	#! /usr/bin/env python
	# -- python --
	# -- encoding: utf-8 --

	import sys

	for line in sys.stdin.readline().split("."):
	#文字列先頭の空白部(' ')除去
	print line+"."
	#!/usr/bin/env python
	import re

	fopen = open("024out.txt","r")
	fwrite = open("025out.txt","w")
	word = fopen.readline()
	while word:
	#word = "\n" #for debug
	if not re.match("^\s+\n$",word):
	word = word.strip("\n")
	line = str(word)+"\t"+str(word.lower())+"\n"
	print line,
	fwrite.write(line)
	word = fopen.readline()
	#break #for debug
	fopen.close()
	fwrite.close()
	#!/usr/bin/env python
	import re
	#inFile = open("025out.txt","r")
	inFile = open("test2.txt","r")
	outFile = open("026out.txt","w")

	Word = inFile.readline()
	setWord = set()
	while Word:
	#Getting lowered words (located after original_word \t)
	Word.strip("\n")
	pairWord = re.match("^(.)\t(.)$",Word)
	loweredWord = pairWord.group(2)
	#print loweredWord
	setWord.add(loweredWord)
	#print listWord
	Word = inFile.readline()
	#break #for debug

	listWord = list(setWord)
	listSuffix_ly = set()
	#making a list of -ly words
	for i in range(len(listWord)):
	matched = re.match("^(.*)ly$",listWord[i])
	if matched:
	listSuffix_ly.add(matched.group(1))
	#print matched.group(1)
	#picking -ness words out of listWord and checking matched with listSuffix_ly
	for i in range(len(listWord)):
	matched = re.match("^(.*)ness$",listWord[i])
	if matched and matched.group(1) in listSuffix_ly:
	print matched.group(1)

	inFile.close()
	outFile.close()
	#!/usr/bin/env python
	#-- coding: utf-8 --

	from f010 import RankList

	outFile = open("027out.txt","w")
	RankedList = RankList("025out.txt")
	#ref: http://blog.livedoor.jp/yawamen/archives/51492355.html
	for key, value in sorted(RankedList.items(), key=lambda x:x[1]):
	print "%s:%d" % (key, value)
	#! /usr/bin/env python
	# -- coding: utf-8 --
	import re

	def Ngram(Content, N):
	FreqNgramList = {}
	for line in Content:
	for i in range(len(line)):
	if i+N < len(line):
	word = line[i:i+N]#line[i:i+N] iからi+N-1までの文字列
	if FreqNgramList.has_key(word):
	FreqNgramList[word]+=1
	else:
	FreqNgramList[word]=0
	return FreqNgramList

	N = 2
	Content = open("025out.txt","r")
	FreqBigramList = Ngram(Content, N)
	Content.close()
	for key, value in sorted(FreqBigramList.items(), key=lambda x:x[1]):
	print r"%s:%d" % (key, value)
	#! /usr/bin/env python
	#-- coding:utf-8 --

	from stemming.porter2 import stem
	print stem("factionally")