Last active
November 8, 2022 11:09
-
-
Save akrisanov/f6318c05fd8b966e667845727a713c1d to your computer and use it in GitHub Desktop.
Yandex.Praktikum π
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
stock = pd.read_csv('/datasets/stock_upd.csv') | |
stock['item_lowercase'] = stock['item'].str.lower() | |
apple = stock[stock['item_lowercase'].str.contains('apple')]['count'].sum() | |
samsung = stock[stock['item_lowercase'].str.contains('samsung')]['count'].sum() | |
stock['item_lowercase'] = stock['item_lowercase'].drop_duplicates() | |
stock = stock.dropna().reset_index(drop=True) | |
stock.at[3, 'count'] = apple | |
stock.at[1, 'count'] = samsung | |
print(stock) | |
# Π Π΅Π·ΡΠ»ΡΡΠ°Ρ | |
# id item count item_lowercase | |
# 0 100480924 Π‘ΠΌΠ°ΡΡΡΠΎΠ½ Xiaomi Redmi 6A 16GB 44 ΡΠΌΠ°ΡΡΡΠΎΠ½ xiaomi redmi 6a 16gb | |
# 1 100480938 Π‘ΠΌΠ°ΡΡΡΠΎΠ½ Samsung Galaxy A30 32GB 60 ΡΠΌΠ°ΡΡΡΠΎΠ½ samsung galaxy a30 32gb | |
# 2 100480959 Π‘ΠΌΠ°ΡΡΡΠΎΠ½ HUAWEI P30 lite 38 ΡΠΌΠ°ΡΡΡΠΎΠ½ huawei p30 lite | |
# 3 100480975 Π‘ΠΌΠ°ΡΡΡΠΎΠ½ Apple iPhone Xr 64gb 29 ΡΠΌΠ°ΡΡΡΠΎΠ½ apple iphone xr 64gb | |
# 4 100480988 Π‘ΠΌΠ°ΡΡΡΠΎΠ½ Honor 8X 64GB 64 ΡΠΌΠ°ΡΡΡΠΎΠ½ honor 8x 64gb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.stem import SnowballStemmer | |
russian_stemmer = SnowballStemmer('russian') | |
queries = ["ΡΠΏΠ» Π°ΠΉΡΠΎΠ½Ρ", | |
"ΠΊΡΠΏΠΈΡΡ ΡΠΏΠ» ΡΠ΅Π»Π΅ΡΠΎΠ½", | |
"Π»ΡΡΡΠΈΠ΅ ΡΠΌΠ°ΡΡΡΠΎΠ½Ρ", | |
"Π±Π°ΡΠΎΠ½ ΡΠΎΠ½", | |
"ΡΠΌΠ°ΡΡΡΠΎΠ½ ΡΠΏΠ» Π°ΠΉΡΠΎΠ½", | |
"ΡΠΌΠ°ΡΡΡΠΎΠ½Ρ 2019", | |
"ΡΠΏΠ»Π°Π½", | |
"ΡΠΎΠ½ΠΎΠ²Π°Ρ ΠΌΡΠ·ΡΠΊΠ°", | |
"ΡΠΏΠ» Π°ΠΉΡΠΎΠ½Ρ ΠΈΠΊΡ", | |
"ΡΠΏΠ» Π°ΠΉΡΠΎΠ½ 64Π³Π±", | |
"ΡΠΎΠ½ Π΄Π»Ρ ΡΠΎΡΠΎ", | |
"ΠΊΡΠΏΠΈΡΡ ΡΠΏΠ»", | |
"ΡΠΏΠ» Π°ΠΉΡΠΎΠ½ ΠΊΡΠΏΠΈΡΡ", | |
"ΡΠΏΠ»Π΅ΡΠ΅Π½ΠΎΠ½ ΠΊΡΠΏΠΈΡΡ", | |
"ΡΠΌΠ°ΡΡΡΠΎΠ½ Π³Π΄Π΅ ΠΊΡΠΏΠΈΡΡ", | |
"ΡΠΏΠ»Π°Π½ ΠΏΠΎΠΊΠ°Π·Π°Π½ΠΈΡ", | |
"ΡΠΌΠ°ΡΡΡΠΎΠ½ huawei", | |
"ΡΠΏΠ»"] | |
for stemmed_query in queries: | |
for word in stemmed_query.split(): | |
stemmed_word = russian_stemmer.stem(word) | |
if stemmed_word == 'ΡΠΏΠ»': | |
print(stemmed_query) | |
# Π Π΅Π·ΡΠ»ΡΡΠ°Ρ | |
# ΡΠΏΠ» Π°ΠΉΡΠΎΠ½Ρ | |
# ΠΊΡΠΏΠΈΡΡ ΡΠΏΠ» ΡΠ΅Π»Π΅ΡΠΎΠ½ | |
# ΡΠΌΠ°ΡΡΡΠΎΠ½ ΡΠΏΠ» Π°ΠΉΡΠΎΠ½ | |
# ΡΠΏΠ» Π°ΠΉΡΠΎΠ½Ρ ΠΈΠΊΡ | |
# ΡΠΏΠ» Π°ΠΉΡΠΎΠ½ 64Π³Π± | |
# ΠΊΡΠΏΠΈΡΡ ΡΠΏΠ» | |
# ΡΠΏΠ» Π°ΠΉΡΠΎΠ½ ΠΊΡΠΏΠΈΡΡ | |
# ΡΠΏΠ» |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pymystem3 import Mystem | |
from collections import Counter | |
feedback = pd.read_csv('/datasets/feedback.csv') | |
m = Mystem() | |
lemmas = m.lemmatize(feedback.at[1, 'text']) | |
print(Counter(lemmas)) | |
# Counter({' ': 16, 'Π½Π°': 2, '- ': 1, 'ΠΎΠ³ΡΠΎΠΌΠ½ΡΠΉ': 1, 'ΠΊΠ°ΡΠ΅ΡΡΠ²Π΅Π½Π½ΡΠΉ': 1, 'ΡΠΊΡΠ°Π½': 1, 'Π·Π°ΠΏΠ°Ρ': 1, 'ΠΏΡΠΎΠΈΠ·Π²ΠΎΠ΄ΠΈΡΠ΅Π»ΡΠ½ΠΎΡΡΡ': 1, 'Ρ Π²Π°ΡΠΈΡΡ': 1, 'Π½Π΅ΡΠΊΠΎΠ»ΡΠΊΠΎ': 1, 'Π³ΠΎΠ΄': 1, 'Π·Π°ΠΌΠ΅ΡΠ°ΡΠ΅Π»ΡΠ½ΡΠΉ': 1, 'ΠΊΠ°ΠΌΠ΅ΡΠ°': 1, 'ΠΎΡΠ΅Π½Ρ': 1, 'Π΅ΠΌΠΊΠΈΠΉ': 1, 'Π±Π°ΡΠ°ΡΠ΅Ρ': 1, '. ': 1, 'Ρ Π²Π°ΡΠ°ΡΡ': 1, 'ΡΠ΅Π»ΡΠΉ': 1, 'Π΄Π΅Π½Ρ': 1, '.': 1, '\n': 1}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def alert_group(messages): | |
if messages <= 300: | |
return 'ΡΡΠ΅Π΄Π½ΠΈΠΉ' | |
elif messages > 300 and messages < 500: | |
return 'Π²ΡΡΠΎΠΊΠΈΠΉ' | |
else: | |
return 'ΠΊΡΠΈΡΠΈΡΠ½ΡΠΉ' | |
support_log = pd.read_csv('/datasets/support_log.csv') | |
support_log_grouped = support_log.groupby('type_id').count() | |
support_log_grouped['alert_group'] = support_log_grouped['user_id'].apply(alert_group) | |
print(support_log_grouped['alert_group'].value_counts()) | |
# Π Π΅Π·ΡΠ»ΡΡΠ°Ρ | |
# Π²ΡΡΠΎΠΊΠΈΠΉ 4 | |
# ΠΊΡΠΈΡΠΈΡΠ½ΡΠΉ 2 | |
# ΡΡΠ΅Π΄Π½ΠΈΠΉ 2 | |
# Name: alert_group, dtype: int64 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def alert_group_importance(row): | |
if row['alert_group'] == 'ΡΡΠ΅Π΄Π½ΠΈΠΉ' and row['importance'] == 1: | |
return 'ΠΎΠ±ΡΠ°ΡΠΈΡΡ Π²Π½ΠΈΠΌΠ°Π½ΠΈΠ΅' | |
elif row['alert_group'] == 'Π²ΡΡΠΎΠΊΠΈΠΉ' and row['importance'] == 1: | |
return 'Π²ΡΡΠΎΠΊΠΈΠΉ ΡΠΈΡΠΊ' | |
return 'Π² ΠΏΠΎΡΡΠ΄ΠΊΠ΅ ΠΎΡΠ΅ΡΠ΅Π΄ΠΈ' | |
support_log_grouped = pd.read_csv('/datasets/support_log_grouped.csv') | |
# print(alert_group_importance({'alert_group': 'Π²ΡΡΠΎΠΊΠΈΠΉ', 'importance': 1})) | |
support_log_grouped['importance_status'] = support_log_grouped.apply(alert_group_importance, axis=1) | |
print(support_log_grouped) | |
# Π Π΅Π·ΡΠ»ΡΡΠ°Ρ | |
# type_id user_id timestamp alert_group importance importance_status | |
# 0 1 311 311 Π²ΡΡΠΎΠΊΠΈΠΉ 1 Π²ΡΡΠΎΠΊΠΈΠΉ ΡΠΈΡΠΊ | |
# 1 2 302 302 Π²ΡΡΠΎΠΊΠΈΠΉ 0 Π² ΠΏΠΎΡΡΠ΄ΠΊΠ΅ ΠΎΡΠ΅ΡΠ΅Π΄ΠΈ | |
# 2 3 606 606 ΠΊΡΠΈΡΠΈΡΠ½ΡΠΉ 0 Π² ΠΏΠΎΡΡΠ΄ΠΊΠ΅ ΠΎΡΠ΅ΡΠ΅Π΄ΠΈ | |
# 3 4 312 312 Π²ΡΡΠΎΠΊΠΈΠΉ 1 Π²ΡΡΠΎΠΊΠΈΠΉ ΡΠΈΡΠΊ | |
# 4 5 586 586 ΠΊΡΠΈΡΠΈΡΠ½ΡΠΉ 1 Π² ΠΏΠΎΡΡΠ΄ΠΊΠ΅ ΠΎΡΠ΅ΡΠ΅Π΄ΠΈ | |
# 5 6 303 303 Π²ΡΡΠΎΠΊΠΈΠΉ 1 Π²ΡΡΠΎΠΊΠΈΠΉ ΡΠΈΡΠΊ | |
# 6 7 283 283 ΡΡΠ΅Π΄Π½ΠΈΠΉ 1 ΠΎΠ±ΡΠ°ΡΠΈΡΡ Π²Π½ΠΈΠΌΠ°Π½ΠΈΠ΅ | |
# 7 8 297 297 ΡΡΠ΅Π΄Π½ΠΈΠΉ 0 Π² ΠΏΠΎΡΡΠ΄ΠΊΠ΅ ΠΎΡΠ΅ΡΠ΅Π΄ΠΈ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment