Skip to content

Instantly share code, notes, and snippets.

View SaraM92's full-sized avatar
🎯
Focusing

Sara Metwalli SaraM92

🎯
Focusing
View GitHub Profile
#Import needed library
import spacy
from sklearn import svm
#Loading NLP dictionary from Spacy
nlp = spacy.load("en_core_web_md")
#Build a class for categories
class Category:
BOOKS = "BOOKS"
CANDY = "CANDY"
#Import needed libraries
import re
#Original text
text = "The sidebar includes a Cheatsheet, full Reference, and Help. You can also save & Share with the Community, and view patterns you create or favorite in My Patterns."
#Compile regular expression pattern
pattern = re.compile(r'[A-Z]\w+')
#Scan the text and find all matches
results = re.findall(pattern,text)
#Print the results list
print(results)
#Import needed libraries
import re
#The text we want to search
text = "Yesterday at the office party, I met the manager of the east coast branch, her phone number is 202–555–0180. I also exchange my number 202–555–0195 with a recruiter."
#Compile the pattern
pattern = re.compile(r'\d{3}–\d{3}–\d{4}')
#Find all matches
results = re.findall(pattern,text)
#Print matches list
print(results)
#Import needed library
import re
#The original text
text = "Python 3.0 was released on 03–12–2008. It was a major revision of the language that is not completely backward-compatible. Many of its major features were backported to Python 2.6. and 2.7 versions that were released on 03.10.2008 and 03/07/2010 respectively."
#Compile pattern
pattern = re.compile(r'(0[1-9]|[12][0-9]|3[01])(\.|–|/)(0[1-9]|1[012])(\.|–|/)(19|20\d\d)')
#Find all matches
results = re.findall(pattern,text)
#Combine different groups
dates = [''.join(results[i]) for i in range(len(results))]
#Import needed libraries
import pandas as pd
#Read data and save it in a dataframe
Df = pd.read_csv("Rdatasets/csv/datasets/Titanic.csv")
Df.describe() #display description of data
#Inspect the first 5 rows of the dataframe
first = Df.head(5)
#Inspect the last 5 rows of the dataframe
last = Df.tail(5)
#Take a sample of 5
#Import needed libraries
from scipy.stats import chi2_contingency
#The chi2_contingence calculates the dependance of variables in a contingency table.
#The function returns the p value, the degree of freedom, the test statistics, and the expected frequencies.
#The Chi-Squared Test
table = [[10, 20, 30],[6, 9, 17]]
stat, p, dof, expected = chi2_contingency(table)
print('The p value is %.3f' % (p))
#the validity of this calculation is that the test should be used only if
#the observed and expected frequencies in each cell are at least 5
#Import needed librabried
from scipy.stats import ttest_ind
#the ttest_ind calculates the T-test for the means of TWO INDEPENDENT set of values.
data1 = [1.94, 2.29, -0.04, 0.84, -0.28, 1.34, 0.35, 0.12, -0.63, 0.11]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = ttest_ind(data1, data2)
print('The p value is %.3f' % (p))
#the validity of this calculation is that the test should be used only if
#the observed and expected frequencies in each cell are at least 5
if p > 0.05:
#Import needed libraries
from scipy.stats import f_oneway
#Performs one way ANOVA
#Same distribution data to test
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]
stat, p = f_oneway(data1, data2, data3)
print('The p value is %.3f' % (p))
if p > 0.05:
#Import needed libraries
import numpy as np
import matplotlib.pyplot as plt
#Observed values
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
# number of points
n = np.size(x)
# mean of x and y vector
m_x, m_y = np.mean(x), np.mean(y)
#Import needed libraries
import pygal
import pandas as pd
#Parse the dataframe
data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
#Get the mean number of cases per states
mean_per_state = data.groupby('state')['cases'].mean()
#Draw the bar chart
barChart = pygal.Bar(height=400)
[barChart.add(x[0], x[1]) for x in mean_per_state.items()]