Created
May 31, 2023 06:34
-
-
Save TomoG29/27e7f6826e807ec6a07527fb0e950fd6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
text = '<h2 class="test">sample text for python</h2>' | |
#====================================================== | |
#====================== 1 ============================= | |
#====================================================== | |
#【<h2】を検索し出力 | |
result1 = re.search("<h2",text) | |
print(result1.group(0)) | |
#出力結果:<h2 | |
#====================================================== | |
#====================== 2 ============================= | |
#====================================================== | |
#【""】内にある全ての文字を検索し出力 | |
result2 = re.search(r'"(.+)"',text) | |
print(result2.group(0)) | |
print(result2.group(1)) | |
#出力結果:"test" | |
# test | |
#====================================================== | |
#====================== 3 ============================= | |
#====================================================== | |
#【「>」以外の文字列が0回以上繰り返されているグループが0回か1回繰り返した】部分を検索し出力 | |
result3 = re.search(r'<h2([^>]*)?>',text) | |
print(result3.group(0)) | |
print(result3.group(1)) | |
#出力結果:<h2 class="test"> | |
# class="test" | |
#====================================================== | |
#====================== 4 ============================= | |
#====================================================== | |
#【「>」以外の文字列が0回以上繰り返されているグループが0個以上ある】部分を検索し出力 | |
#ただし検索部分は非キャプチャ | |
result4 = re.search(r'<h2(?:[^>]*)?>',text) | |
print(result4.group(0)) | |
print(result4.group(1)) | |
#出力結果:<h2 class="test"> | |
# エラー | |
#====================================================== | |
#====================== 5 ============================= | |
#====================================================== | |
#【<h2 任意の文字列>任意の文字列</h2>】を検索し出力 | |
#1つ目の任意の文字列は非キャプチャ、2つ目の任意の文字列はキャプチャ | |
result5 = re.search(r'<h2(?:[^>]*)?>(.+)</h2>',text) | |
print(result5.group(1)) | |
#出力結果:sample text for python | |
#====================================================== | |
#====================== 6 ============================= | |
#====================================================== | |
#【<h数字 属性名="関数名">】を検索し出力 | |
result6 = re.search(r'<h(\d)(?:\s+(\w+)=\"(\w+)\")?>(.+)</h(?:\d)>',text) | |
print(result6.group(1)) | |
print(result6.group(2)) | |
print(result6.group(3)) | |
print(result6.group(4)) | |
#出力結果:2 | |
# class | |
# test | |
# sample text for python |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment