Skip to content

Instantly share code, notes, and snippets.

View Abhayparashar31's full-sized avatar
:octocat:
Learning New Things

Abhay Parashar Abhayparashar31

:octocat:
Learning New Things
View GitHub Profile
'''
Data Augmentation Using Keras Library.
'''
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
'''
Finding Similar Stackoverflow Questions.
'''
from autoscraper import AutoScraper
url = 'https://stackoverflow.com/questions/67483624/how-to-install-tensorflow-object-detection-api-offline'
wanted_list = ["How to know if an object has an attribute in Python?"]
scraper = AutoScraper()
def clean_text(text):
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
wordLemm = WordNetLemmatizer()
EMOJIS = {':)': 'smile', ':-)': 'smile', ';d': 'wink', ':-E': 'vampire', ':(': 'sad',
':-(': 'sad', ':-<': 'sad', ':P': 'raspberry', ':O': 'surprised',
':-@': 'shocked', ':@': 'shocked',':-$': 'confused', ':\\': 'annoyed',
':#': 'mute', ':X': 'mute', ':^)': 'smile', ':-&': 'confused', '$_$': 'greedy',
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
## Loading dataset from the github repository raw url
df = pd.read_csv('https://raw.githubusercontent.com/Abhayparashar31/datasets/master/twitter.csv')
## cleaning the text with the help of an external python file containing cleaning function
from sklearn.feature_extraction.text import TfidfVectorizer
tf_idf = TfidfVectorizer()
vectors = tf_idf.fit_transform(corpus)
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(cleaned_corpus) ## passing cleaned corpus
doc_term_matrix = pd.DataFrame(X.toarray(),columns= vectorizer.get_feature_names())
print(doc_term_matrix)
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
english_stopwords = set(stopwords.words('english'))
corpus = ['Food is Bad',
'Bad Service Bad Food',
'Food is Good',
'Good Service With Good Food.',
import sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
import requests
from bs4 import BeautifulSoup
url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
import gensim
import re
from gensim.summarization.summarizer import summarize
import requests
from bs4 import BeautifulSoup
url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
def get_key(val):
for key, value in sentences_score.items():
if val == value:
return key
key = get_key(max(sentences_score.values()))
summary = heapq.nlargest(n,sentences_score,key=sentences_score.get) ## n=3
print(" ".join(summary))