João Pedro Chagas jpchagas

## pyproject.toml
[tool.poetry]
name = "fetcher"
version = "0.1.0"
description = ""
authors = [""]
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.10,<4.0"

## scripts.py
"""
Script do run quality checkers automatically
"""

import subprocess


def run_quality_checks():
    """
    Run a series of quality checks including Black, isort, flake8, and pytest with coverage.

## .pre-commit-config.yaml
repos:
  - repo: https://github.com/psf/black
    rev: 24.8.0
    hooks:
      - id: black
        language_version: python3

  - repo: https://github.com/PyCQA/isort
    rev: 5.13.2
    hooks:

## .flake8
[flake8]
max-line-length = 88
ignore = E203, E266, E501, W503
exclude = .git,_pycache_,docs,build,dist,data,models,app.log,.ipynb_checkpoints, experiment.ipynb

## ir1.py
import xml.etree.ElementTree as ET
files = os.listdir(BASE_PATH)
xml_files = [x for x in files if x.endswith(".xml")]
for file in xml_files:
    try:
        filename=file.replace(".xml",".png")
        tree = ET.parse(BASE_PATH +file)
        root = tree.getroot()
        xml_path = root[2].text
        ssize = root[4]

## ir.xml
<annotation>
	<folder>FerreiraIbelli</folder>
	<filename>scene00631.png</filename>
	<path>/home/jpchagas/Projects/haole/data/videos/aus_margaret/FerreiraIbelli/scene00631.png</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>1280</width>
		<height>720</height>

## sai_8.py
X_insta = cv.fit_transform(instagram_df['comment']).toarray()
y_insta_pred_gnb = gnb.predict(X_insta)
y_insta_pred_mnb = mnb.predict(X_insta)
y_insta_pred_bnb = bnb.predict(X_insta)

## sai_7.py
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn import metrics

cv = CountVectorizer(max_features=150)
X = cv.fit_transform(imdb_df['text_pt']).toarray()
y = imdb_df.iloc[:,-1].values
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
gnb = GaussianNB()

## sai_6.py
import nltk

stemmer = nltk.stem.RSLPStemmer()
text_stemmed = [stemmer.stem(word) for word in text_without_sw]

## sai_5.py
import nltk
from nltk.tokenize import word_tokenize

stop_words = nltk.corpus.stopwords.words('portuguese')
text_word_tokens = word_tokenize(df['text_pt'])
text_without_sw = [word for word in text_word_tokens if word not in stop_words]
	[tool.poetry]
	name = "fetcher"
	version = "0.1.0"
	description = ""
	authors = [""]
	readme = "README.md"

	[tool.poetry.dependencies]
	python = ">=3.10,<4.0"
	"""
	Script do run quality checkers automatically
	"""

	import subprocess


	def run_quality_checks():
	"""
	Run a series of quality checks including Black, isort, flake8, and pytest with coverage.
	repos:
	- repo: https://github.com/psf/black
	rev: 24.8.0
	hooks:
	- id: black
	language_version: python3

	- repo: https://github.com/PyCQA/isort
	rev: 5.13.2
	hooks:
	[flake8]
	max-line-length = 88
	ignore = E203, E266, E501, W503
	exclude = .git,_pycache_,docs,build,dist,data,models,app.log,.ipynb_checkpoints, experiment.ipynb
	import xml.etree.ElementTree as ET
	files = os.listdir(BASE_PATH)
	xml_files = [x for x in files if x.endswith(".xml")]
	for file in xml_files:
	try:
	filename=file.replace(".xml",".png")
	tree = ET.parse(BASE_PATH +file)
	root = tree.getroot()
	xml_path = root[2].text
	ssize = root[4]
	<annotation>
	<folder>FerreiraIbelli</folder>
	<filename>scene00631.png</filename>
	<path>/home/jpchagas/Projects/haole/data/videos/aus_margaret/FerreiraIbelli/scene00631.png</path>
	<source>
	<database>Unknown</database>
	</source>
	<size>
	<width>1280</width>
	<height>720</height>
	X_insta = cv.fit_transform(instagram_df['comment']).toarray()
	y_insta_pred_gnb = gnb.predict(X_insta)
	y_insta_pred_mnb = mnb.predict(X_insta)
	y_insta_pred_bnb = bnb.predict(X_insta)
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
	from sklearn import metrics

	cv = CountVectorizer(max_features=150)
	X = cv.fit_transform(imdb_df['text_pt']).toarray()
	y = imdb_df.iloc[:,-1].values
	X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
	gnb = GaussianNB()
	import nltk

	stemmer = nltk.stem.RSLPStemmer()
	text_stemmed = [stemmer.stem(word) for word in text_without_sw]
	import nltk
	from nltk.tokenize import word_tokenize

	stop_words = nltk.corpus.stopwords.words('portuguese')
	text_word_tokens = word_tokenize(df['text_pt'])
	text_without_sw = [word for word in text_word_tokens if word not in stop_words]