Skip to content

Instantly share code, notes, and snippets.

@esehara
Created April 18, 2011 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save esehara/925422 to your computer and use it in GitHub Desktop.
Save esehara/925422 to your computer and use it in GitHub Desktop.
Test pdf => text
#-*- coding:utf-8 -*-
import re
class textshredder:
def fileopen(self,fileopen):
self.textdata = open(fileopen,"r").read()
self.textsplit("hoge@gmail.com")
def textsplit(self,email):
self.textpage = []
self.textpage = re.split("This PDF is prepared for hoge@gmail.com, personal use only.\n",self.textdata)
enter_search = re.compile(r'([一-龠]|[ぁ-ん]|[ァ-ヴー])\n([一-龠]|[ぁ-ん]|[ァ-ヴー])')
self.textdata = enter_search.sub(r'',self.textdata)
print self.textdata
if __name__=="__main__":
work = textshredder()
text = work.fileopen("javascript.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment