Created
August 25, 2021 19:02
-
-
Save hodunov/00d98e23c8eeb6d466e5fa0d969a4220 to your computer and use it in GitHub Desktop.
Regular Expression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
text = "платеж номер платеж" | |
result = re.match(r"платеж", text) # вернули первое вхождение | |
# print(result.group(0)) | |
result = re.search(r"номер", text) # имеет смысл искать | |
# print(result.group(0)) | |
result = re.findall(r"платеж", text) | |
# print(result) | |
text = "За прошлый месяц Полина заработала 500$" | |
result = re.sub("заработала", "потратила", text) | |
# print(result) | |
text = "Должник Иванов М.Г. должен 3000 Г.А. 200$ когда занял 20.01.2021" | |
# pattern = r'\w+\s' | |
# name = re.findall(pattern, text) | |
# # print(name) | |
name = re.findall(r"[А-ЯЁа-яё]+\s[А-ЯЁа-яё]{1}\.[А-ЯЁа-яё]{1}\.", text) | |
# print(name) | |
next_text = """ | |
Иванов Г.А. 200$ занял ровно 10 месяцев назад и 2 дня | |
Петров Г.А. 100$ занял ровно 5 месяцев назад | |
""" | |
pattern = r"ровно(.+)назад.{,10}дн*\w+" | |
date = re.search(pattern, next_text) | |
# print(date[0]) | |
# txt_email = "не звони мне, лучше напиши на почту admin@example.com" | |
# | |
# pattern = r'([a-z0-9._-]+)@([a-z0-9._-]+\.[a-z]{2,})' | |
# email = re.findall(pattern, txt_email) | |
# print(email) | |
# txt_with_money = 'Петров Г.А. 100$ занял ровно 500 месяцев назад $500 400$' | |
# pattern = r'[0-9]+[$]{1}' | |
# print(re.findall(pattern, txt_with_money)) | |
authors = """ | |
1st January 1919 - J. D. Salinger's birthday, author of Catcher in the Rye | |
2nd February 1882 - James Joyce's birthday, author of Ulysses | |
3rd January 1892 - J. R. R. Tolkien's birthday, author of The Hobbit & The Lord of the Rings | |
4th January 1965 - T. S. Eliot's death, author of The Wast Land | |
""" | |
pattern = r"[0-9]+(?:st|[nr]d|th)\s[A-Za-z]+\s[0-9]{4}" | |
# print(re.findall(pattern, authors)) | |
text = "admin@example.com - this is my email!" | |
pattern = r"[^@ \t\r\n]+@[^@ \t\r\n]+\.[^@ \t\r\n]+" | |
# print(re.findall(pattern, text)) | |
list_of_phone_numbers = ["380953333322", "38-095-333-33-22", "38-050-333-xx-22"] | |
for number in list_of_phone_numbers: | |
if re.match(r"[0-9]{2}-[0-9]{3}-[0-9]{3}", number): | |
print(number) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment