Last active
August 29, 2015 14:10
-
-
Save FGtatsuro/5d206af15c90900ba0be to your computer and use it in GitHub Desktop.
Python3で文字列を処理する際の心掛け ref: http://qiita.com/FGtatsuro/items/f45c349e06d6df95839b
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(py3.4)~ » ipython | |
(省略) | |
>>> b'a' # バイト列 | |
Out[1]: b'a' | |
# 非ASCII文字を含む場合、リテラル表記が使えない | |
# 文字列を特定のエンコード方式でエンコードする必要がある | |
>>> b'あ' | |
File "<ipython-input-2-c12eb8e58bcd>", line 1 | |
b'あ' | |
^ | |
SyntaxError: bytes can only contain ASCII literal characters. | |
>>> 'あ'.encode('utf-8') # 文字列->バイト列(エンコード) | |
Out[3]: b'\xe3\x81\x82' | |
>>> 'あ' # 文字列 | |
Out[4]: 'あ' | |
>>> b'\xe3\x81\x82'.decode('utf-8') # バイト列->文字列(デコード) | |
Out[5]: 'あ' | |
# Python2(再掲) | |
(py2.7)~ » ipython | |
(省略) | |
>>> 'あ' # バイト文字列 | |
Out[1]: '\xe3\x81\x82' | |
>>> u'あ' # ユニコード文字列 | |
Out[2]: u'\u3042' | |
>>> 'あ'.decode('utf-8') (or unicode('あ', 'utf-8')) # バイト文字列->ユニコード文字列(=デコード) | |
Out[3]: u'\u3042' | |
>>> u'あ'.encode('utf-8') # ユニコード文字列->バイト文字列(=エンコード) | |
Out[4]: '\xe3\x81\x82' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> type(b'a') | |
Out[6]: bytes # ≒ Python2のstr型 | |
>>> type('a') | |
Out[7]: str # ≒ Python2のunicode型 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> s = 'str' # 文字列 | |
>>> b = b'byte' # バイト列 | |
>>> s + b # 文字列 + バイト列はエラー | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-20-5fe2240a1b50> in <module>() | |
----> 1 s + b | |
TypeError: Can't convert 'bytes' object to str implicitly | |
>>> s.find('t') # 文字列はfindメソッドをサポートしている | |
Out[11]: 1 | |
>>> b.find('y') # バイト列はfindメソッドをサポートしていない。 | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-24-e1b070a5aaba> in <module>() | |
----> 1 b.find('y') | |
TypeError: Type str doesn't support the buffer API |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(py3.4)~ » cat test.py | |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
print('あ' + 'いう') | |
# ターミナルで実行 (標準入出力をターミナルに接続している) | |
(py3.4)~ » python test.py | |
あいう | |
# ファイルにリダイレクト (標準入出力をターミナル以外に接続している) | |
(py3.4)~ » python test.py > test.txt | |
(py3.4)~ » cat test.txt | |
あいう |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(py3.4)~ » cat test.py | |
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import sys | |
#print('あ' + 'いう') # printはsys.stdoutに書き込む | |
sys.stdout.write('あ' + 'いう' + '\n') # sys.stdoutに文字列を書き込む | |
sys.stdout.buffer.write(('あ' + 'いう' + '\n').encode('utf-8')) # sys.stdout.bufferにバイト列を書き込む | |
# ターミナルで実行 | |
(py3.4)~ » python test.py | |
あいう | |
あいう | |
# ファイルにリダイレクト | |
(py3.4)~ » python test.py > test.txt | |
(py3.4)~ » cat test.txt | |
あいう | |
あいう |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> import sys | |
# テキストストリーム (ref. https://docs.python.org/3/library/io.html#io.TextIOWrapper) | |
>>> type(sys.stdout) | |
Out[2]: _io.TextIOWrapper | |
# バイトストリーム (ref. https://docs.python.org/3/library/io.html#io.BufferedWriter) | |
>>> type(sys.stdout.buffer) | |
Out[3]: _io.BufferedWriter | |
# テキストストリームにバイト列は書き込めない | |
>>> sys.stdout.write('a'.encode('utf-8')) | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-4-581ae8b6af82> in <module>() | |
----> 1 sys.stdout.write('a'.encode('utf-8')) | |
TypeError: must be str, not bytes | |
# バイトストリームに文字列は書き込めない | |
>>> sys.stdout.buffer.write('a') | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-5-42da1d141b96> in <module>() | |
----> 1 sys.stdout.buffer.write('a') | |
TypeError: 'str' does not support the buffer interface | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment