Skip to content

Instantly share code, notes, and snippets.

View gamingflexer's full-sized avatar
❤️
Chaos all around

Om Surve gamingflexer

❤️
Chaos all around
View GitHub Profile
@gamingflexer
gamingflexer / main.py
Created July 19, 2023 09:02
Anthropic's tokenizer for Claude
from transformers import PreTrainedTokenizerFast
fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="/home/ubuntu/LLM/module/claude-v1-tokenization.json")
text = "Hello, this is a test input."
tokens = fast_tokenizer.tokenize(text)
tokens
@gamingflexer
gamingflexer / nllb_200.py
Created August 28, 2022 14:55
NLLB FB 200 Language Translator
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
#for source and target lang check this - https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200
def translation(text,src_lang,tgt_lang):
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length = 400)
return translator(text)
@gamingflexer
gamingflexer / classes.py
Created August 28, 2022 14:53
Detectron 2 Classes
className = {0: u'__background__',
1: u'person',
2: u'bicycle',
3: u'car',
4: u'motorcycle',
5: u'airplane',
6: u'bus',
7: u'train',
8: u'truck',
9: u'boat',
@gamingflexer
gamingflexer / google_fact_check.py
Created August 28, 2022 14:49
Google Fact Check | Web Scrapping
import requests
import urllib
import pandas as pd
from requests_html import HTML
from requests_html import HTMLSession
def get_source(url):
"""Return the source code for the provided URL.
Args:
@gamingflexer
gamingflexer / preprocess.py
Created August 28, 2022 14:47
Bunch of Cleaning Functions | ML & Backend Dev
import os,re,string,json,emoji,csv
import numpy as np
import pandas as pd
def clean_text(text):
'''Clean emoji, Make text lowercase, remove text in square brackets,remove links,remove punctuation
and remove words containing numbers.'''
text = emoji.demojize(text)
text = re.sub(r'\:(.*?)\:', '', text)
text = str(text).lower() # Making Text Lowercase
@gamingflexer
gamingflexer / download_twitter_video_from_link.py
Created August 28, 2022 14:43
Download Twitter Video From Link
@gamingflexer
gamingflexer / top_headlines.py
Created August 28, 2022 14:41
Scrape Top Headlines For a Particular Hashtags
import requests,json
API_KEY = ""
def headline_script():
url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/search/NewsSearchAPI"
querystring = {"q":"TOPIC NEEDED","pageNumber":"1","pageSize":"10","autoCorrect":"true","fromPublishedDate":"null","toPublishedDate":"null"}
headers = {
"X-RapidAPI-Key": API_KEY,
@gamingflexer
gamingflexer / instagram_video_from_link.py
Created August 28, 2022 14:39
Download Instagram Video from link
@gamingflexer
gamingflexer / twitter_scrapper.py
Created August 28, 2022 14:38
Twitter Scrapper | Hashtags & User
# pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint
import os
import twint
tempath = "add a temp path folder"
def top_tweets(username):
for user in username:
c = twint.Config()
@gamingflexer
gamingflexer / detectron2_video_od.py
Last active August 29, 2022 06:21
Detectron 2 | Video object Detection | Copy and Run
# install detectron2
# git clone https://github.com/facebookresearch/detectron2.git
# cd detectron2
# pip install -e .
# cd ..
import uuid
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg