Maintain a persistent handoff file for continuity.
Update after each task:
- current status
- next steps
- test results
- environment details
| #!/usr/bin/env python3 | |
| # task_complete.py | |
| """Task completion sound using Windows from WSL2 - no pip install needed""" | |
| import subprocess | |
| def task_complete(): | |
| try: | |
| # Call Windows PowerShell to play a beep | |
| # Format: powershell.exe -Command "[console]::beep(frequency, duration)" | |
| subprocess.run( |
| ## Completion notification | |
| At the end of every task: | |
| - Run: ~/.codex/notify.py | |
| Purpose: | |
| - Notify user when task is complete | |
| - Avoid manual polling | |
| - Enable async workflows |
| name | caveman-compress |
|---|---|
| description | Compress prose in a file using caveman-style brevity while preserving full technical meaning and structure. Creates a backup and rewrites the original file. |
| user-invocable | true |
| argument-hint | [file path] |
Reduce token usage in large markdown/docs while keeping meaning intact.
| tfidf = TfidfVectorizer(tokenizer=lambda x: x,preprocessor=lambda x: x,ngram_range=(1,1),min_df=10) | |
| tfidf_vectorizer_vectors_train = tfidf.fit_transform(df_train.loc[:,'clean_text'].values) | |
| features = tfidf_vectorizer_vectors_train.toarray() | |
| vocab = dict(zip(tfidf.get_feature_names(),features[:,:len(tfidf.get_feature_names())].sum(axis=0))) | |
| embeddings_dict = {} | |
| with open("glove.6B.100d.txt", 'r', encoding='utf-8') as f: | |
| for line in f: | |
| values = line.split() | |
| token = values[0] |
| def clean_data(data,col,re_emoji,re_inc_boostr,re_dec_boostr,re_pos,re_neg,re_bad): | |
| dataframe = data.copy() | |
| link_regex = re.compile(r'(?:ftp|https?|www|file)\.?:?[//|\\\\]?[\w\d:#@%/;$()~_?\+-=\\\&]+\.[\w\d:#@%/;$~_?\+-=\\\&]+') | |
| dataframe = dataframe.assign(**dict(zip([col,'num_link'], zip(*dataframe['text'].apply(lambda x: re.subn(link_regex,'LINK',x) ) )))) | |
| dataframe = dataframe.assign(**dict(zip([col,'num_usermention'], zip(*dataframe[col].apply(lambda x: re.subn(r'@[\w]*','USERMENTION',x)) )))) | |
| dataframe = dataframe.assign(**dict(zip([col,'num_hashtag'], zip(*dataframe[col].apply(lambda x: re.subn(r'#[\w]*','HASHTAG',x) ) )))) | |
| dataframe = dataframe.assign(**dict(zip([col,'num_emoji'], zip(*dataframe[col].apply(lambda x: re.subn(re_emoji, lambda m: EMOJIS.get(m.group(), 'EMOJI') , x) ))))) | |
| dataframe[col] = dataframe[col].apply(lambda x : re.sub(r'(.)\1{2,}', r'\1',x)) # make looong as long | |
| dataframe[col] = dataframe[col].apply(lambda x : expandContractions(x) ) # expand cont |
| DATA_LIST = [] ## list to which all data is appended to | |
| SMALL_DICT = {} # Dictionary to which countries are added if there is some error | |
| for index,country_name_id in enumerate(COUNTRIES_DICT.items()): # Looping in the all the countries | |
| try: # try and except block to catch error | |
| country_name,country_id = country_name_id | |
| print(index,country_name) | |
| ser = Service('./chromedriver.exe') | |
| chrome_options = webdriver.ChromeOptions() | |
| chrome_options.add_argument("--incognito") | |
| driver = webdriver.Chrome(service=ser,options=chrome_options) |
| DATA_LIST = [] | |
| country_name = 'United Kingdom' | |
| ser = Service('./chromedriver.exe') | |
| chrome_options = webdriver.ChromeOptions() | |
| chrome_options.add_argument("--incognito") | |
| driver = webdriver.Chrome(service=ser,options=chrome_options) | |
| driver.get(BASE_URL + COUNTRIES_DICT.get(country_name)) | |
| element = driver.find_element(By.XPATH,'//*[@id="disclaimer-content"]/button') | |
| driver.execute_script("arguments[0].click();", element) |
| def get_data(page_source,DATA,category,country,page_num): | |
| content = BeautifulSoup(page_source,'html5lib') | |
| soups = content.findAll('div',{'class':'thumb-block'}) | |
| for so in soups: | |
| if 'thumb-ad' not in so.get('class'): | |
| this_data = {} | |
| this_data['TITLE'] = so.find('p',{'class':'title'}).find('a').get('title') | |
| meta_data = so.find('p',{'class':'metadata'}) | |
| quality = meta_data.find('span',{'class':'video-hd-mark'}) | |
| if quality: |