Skip to content

Instantly share code, notes, and snippets.

View jpchagas's full-sized avatar
🎯
Focusing

João Pedro Chagas jpchagas

🎯
Focusing
View GitHub Profile
[tool.poetry]
name = "fetcher"
version = "0.1.0"
description = ""
authors = [""]
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
"""
Script do run quality checkers automatically
"""
import subprocess
def run_quality_checks():
"""
Run a series of quality checks including Black, isort, flake8, and pytest with coverage.
repos:
- repo: https://github.com/psf/black
rev: 24.8.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
[flake8]
max-line-length = 88
ignore = E203, E266, E501, W503
exclude = .git,_pycache_,docs,build,dist,data,models,app.log,.ipynb_checkpoints, experiment.ipynb
import xml.etree.ElementTree as ET
files = os.listdir(BASE_PATH)
xml_files = [x for x in files if x.endswith(".xml")]
for file in xml_files:
try:
filename=file.replace(".xml",".png")
tree = ET.parse(BASE_PATH +file)
root = tree.getroot()
xml_path = root[2].text
ssize = root[4]
<annotation>
<folder>FerreiraIbelli</folder>
<filename>scene00631.png</filename>
<path>/home/jpchagas/Projects/haole/data/videos/aus_margaret/FerreiraIbelli/scene00631.png</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1280</width>
<height>720</height>
X_insta = cv.fit_transform(instagram_df['comment']).toarray()
y_insta_pred_gnb = gnb.predict(X_insta)
y_insta_pred_mnb = mnb.predict(X_insta)
y_insta_pred_bnb = bnb.predict(X_insta)
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn import metrics
cv = CountVectorizer(max_features=150)
X = cv.fit_transform(imdb_df['text_pt']).toarray()
y = imdb_df.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
gnb = GaussianNB()
import nltk
stemmer = nltk.stem.RSLPStemmer()
text_stemmed = [stemmer.stem(word) for word in text_without_sw]
import nltk
from nltk.tokenize import word_tokenize
stop_words = nltk.corpus.stopwords.words('portuguese')
text_word_tokens = word_tokenize(df['text_pt'])
text_without_sw = [word for word in text_word_tokens if word not in stop_words]