Raghuvansh Tahlan rvt123

## notify.py
#!/usr/bin/env python3
# task_complete.py
"""Task completion sound using Windows from WSL2 - no pip install needed"""
import subprocess

def task_complete():
    try:
        # Call Windows PowerShell to play a beep
        # Format: powershell.exe -Command "[console]::beep(frequency, duration)"
        subprocess.run(

## Codex_handoff.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                rvt123
                / Codex_handoff.md
            
            
              Created
              April 15, 2026 14:56
            
              
                Handoff message in Global AGENTS.md
              
          
    HANDOFF.md

Maintain a persistent handoff file for continuity.
Update after each task:

current status
next steps
test results
environment details


## Codex notification
## Completion notification

At the end of every task:

- Run: ~/.codex/notify.py

Purpose:
- Notify user when task is complete
- Avoid manual polling
- Enable async workflows

## SKILL.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                rvt123
                / SKILL.md
            
            
              Created
              April 15, 2026 14:54
            
              
                Caveman-Compress SKILL.md
              
          
    name
    caveman-compress
  
  
    description
    Compress prose in a file using caveman-style brevity while preserving full technical meaning and structure. Creates a backup and rewrites the original file.
  
  
    user-invocable
    true
  
  
    argument-hint
    [file path]
  
  
Purpose

Reduce token usage in large markdown/docs while keeping meaning intact.

  
## Caveman.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                rvt123
                / Caveman.md
            
            
              Created
              April 15, 2026 14:53
            
              
                Caveman description to Global AGENTS.md
              
          
    Default response style

Use caveman-style brevity by default.
Default = Full. Auto-fallback = Lite when readability would suffer.
Rules:

Keep technical meaning exact
Remove filler, hedging, repetition
Answer first, then reasoning, then next step


## choosing_tokens_tfidf.py
tfidf = TfidfVectorizer(tokenizer=lambda x: x,preprocessor=lambda x: x,ngram_range=(1,1),min_df=10)
tfidf_vectorizer_vectors_train = tfidf.fit_transform(df_train.loc[:,'clean_text'].values)
features = tfidf_vectorizer_vectors_train.toarray()
vocab = dict(zip(tfidf.get_feature_names(),features[:,:len(tfidf.get_feature_names())].sum(axis=0)))

embeddings_dict = {}
with open("glove.6B.100d.txt", 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        token = values[0]

## clean_data.py
def clean_data(data,col,re_emoji,re_inc_boostr,re_dec_boostr,re_pos,re_neg,re_bad):
    dataframe = data.copy()
    link_regex = re.compile(r'(?:ftp|https?|www|file)\.?:?[//|\\\\]?[\w\d:#@%/;$()~_?\+-=\\\&]+\.[\w\d:#@%/;$~_?\+-=\\\&]+')
    dataframe = dataframe.assign(**dict(zip([col,'num_link'], zip(*dataframe['text'].apply(lambda x: re.subn(link_regex,'LINK',x) ) ))))
    dataframe = dataframe.assign(**dict(zip([col,'num_usermention'], zip(*dataframe[col].apply(lambda x: re.subn(r'@[\w]*','USERMENTION',x)) ))))
    dataframe = dataframe.assign(**dict(zip([col,'num_hashtag'], zip(*dataframe[col].apply(lambda x: re.subn(r'#[\w]*','HASHTAG',x) ) ))))
    dataframe = dataframe.assign(**dict(zip([col,'num_emoji'], zip(*dataframe[col].apply(lambda x: re.subn(re_emoji, lambda m: EMOJIS.get(m.group(), 'EMOJI') , x) )))))
    dataframe[col] = dataframe[col].apply(lambda x : re.sub(r'(.)\1{2,}', r'\1',x)) # make looong as long
    dataframe[col] = dataframe[col].apply(lambda x : expandContractions(x) ) # expand cont

## all_scrape.py
DATA_LIST = [] ## list to which all data is appended to
SMALL_DICT = {} # Dictionary to which countries are added if there is some error
for index,country_name_id in enumerate(COUNTRIES_DICT.items()): # Looping in the all the countries
    try: # try and except block to catch error
        country_name,country_id = country_name_id
        print(index,country_name)
        ser = Service('./chromedriver.exe')
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--incognito")
        driver = webdriver.Chrome(service=ser,options=chrome_options)

## scrape_country.py
DATA_LIST = []
country_name = 'United Kingdom'
ser = Service('./chromedriver.exe')
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(service=ser,options=chrome_options)
driver.get(BASE_URL + COUNTRIES_DICT.get(country_name))
element = driver.find_element(By.XPATH,'//*[@id="disclaimer-content"]/button')
driver.execute_script("arguments[0].click();", element)

## scrape_page.py
def get_data(page_source,DATA,category,country,page_num):
    content = BeautifulSoup(page_source,'html5lib')
    soups = content.findAll('div',{'class':'thumb-block'})
    for so in soups:
        if 'thumb-ad' not in so.get('class'):
            this_data = {}
            this_data['TITLE'] = so.find('p',{'class':'title'}).find('a').get('title')
            meta_data = so.find('p',{'class':'metadata'})
            quality = meta_data.find('span',{'class':'video-hd-mark'})
            if quality:
	#!/usr/bin/env python3
	# task_complete.py
	"""Task completion sound using Windows from WSL2 - no pip install needed"""
	import subprocess

	def task_complete():
	try:
	# Call Windows PowerShell to play a beep
	# Format: powershell.exe -Command "[console]::beep(frequency, duration)"
	subprocess.run(
	## Completion notification

	At the end of every task:

	- Run: ~/.codex/notify.py

	Purpose:
	- Notify user when task is complete
	- Avoid manual polling
	- Enable async workflows
name	caveman-compress
description	Compress prose in a file using caveman-style brevity while preserving full technical meaning and structure. Creates a backup and rewrites the original file.
user-invocable	true
argument-hint	[file path]
	tfidf = TfidfVectorizer(tokenizer=lambda x: x,preprocessor=lambda x: x,ngram_range=(1,1),min_df=10)
	tfidf_vectorizer_vectors_train = tfidf.fit_transform(df_train.loc[:,'clean_text'].values)
	features = tfidf_vectorizer_vectors_train.toarray()
	vocab = dict(zip(tfidf.get_feature_names(),features[:,:len(tfidf.get_feature_names())].sum(axis=0)))

	embeddings_dict = {}
	with open("glove.6B.100d.txt", 'r', encoding='utf-8') as f:
	for line in f:
	values = line.split()
	token = values[0]
	def clean_data(data,col,re_emoji,re_inc_boostr,re_dec_boostr,re_pos,re_neg,re_bad):
	dataframe = data.copy()
	link_regex = re.compile(r'(?:ftp\|https?\|www\|file)\.?:?[//\|\\\\]?[\w\d:#@%/;$()~_?\+-=\\\&]+\.[\w\d:#@%/;$~_?\+-=\\\&]+')
	dataframe = dataframe.assign(*dict(zip([col,'num_link'], zip(dataframe['text'].apply(lambda x: re.subn(link_regex,'LINK',x) ) ))))
	dataframe = dataframe.assign(*dict(zip([col,'num_usermention'], zip(dataframe[col].apply(lambda x: re.subn(r'@[\w]*','USERMENTION',x)) ))))
	dataframe = dataframe.assign(*dict(zip([col,'num_hashtag'], zip(dataframe[col].apply(lambda x: re.subn(r'#[\w]*','HASHTAG',x) ) ))))
	dataframe = dataframe.assign(*dict(zip([col,'num_emoji'], zip(dataframe[col].apply(lambda x: re.subn(re_emoji, lambda m: EMOJIS.get(m.group(), 'EMOJI') , x) )))))
	dataframe[col] = dataframe[col].apply(lambda x : re.sub(r'(.)\1{2,}', r'\1',x)) # make looong as long
	dataframe[col] = dataframe[col].apply(lambda x : expandContractions(x) ) # expand cont
	DATA_LIST = [] ## list to which all data is appended to
	SMALL_DICT = {} # Dictionary to which countries are added if there is some error
	for index,country_name_id in enumerate(COUNTRIES_DICT.items()): # Looping in the all the countries
	try: # try and except block to catch error
	country_name,country_id = country_name_id
	print(index,country_name)
	ser = Service('./chromedriver.exe')
	chrome_options = webdriver.ChromeOptions()
	chrome_options.add_argument("--incognito")
	driver = webdriver.Chrome(service=ser,options=chrome_options)
	DATA_LIST = []
	country_name = 'United Kingdom'
	ser = Service('./chromedriver.exe')
	chrome_options = webdriver.ChromeOptions()
	chrome_options.add_argument("--incognito")
	driver = webdriver.Chrome(service=ser,options=chrome_options)
	driver.get(BASE_URL + COUNTRIES_DICT.get(country_name))
	element = driver.find_element(By.XPATH,'//*[@id="disclaimer-content"]/button')
	driver.execute_script("arguments[0].click();", element)
	def get_data(page_source,DATA,category,country,page_num):
	content = BeautifulSoup(page_source,'html5lib')
	soups = content.findAll('div',{'class':'thumb-block'})
	for so in soups:
	if 'thumb-ad' not in so.get('class'):
	this_data = {}
	this_data['TITLE'] = so.find('p',{'class':'title'}).find('a').get('title')
	meta_data = so.find('p',{'class':'metadata'})
	quality = meta_data.find('span',{'class':'video-hd-mark'})
	if quality: