-
install npm
-
Run
mkdir nepsealpha-scrap; cd nepsealpha-scrap; npm init -
Install dependencies
npm install csv-writer puppeteer| from __future__ import annotations | |
| from typing import Any, Dict, Iterable, Type, TypeVar, Protocol | |
| from boto3.dynamodb.conditions import Attr, Key | |
| from pydantic import BaseModel, ValidationError | |
| T = TypeVar("T", bound=BaseModel) | |
| class TableLike(Protocol): |
| import time | |
| import pyautogui | |
| from random import randint, uniform, choice | |
| pyautogui.FAILSAFE = False | |
| NO_OF_WINDOWS = 3 # minimum 2 | |
| key_press = 0 | |
| mouse_move = 0 | |
| def pointer_move(): |
| accessible-pygments==0.0.5 | |
| alabaster==0.7.16 | |
| appnope==0.1.4 | |
| asttokens==2.4.1 | |
| attrs==23.2.0 | |
| Babel==2.15.0 | |
| beautifulsoup4==4.12.3 | |
| black==23.12.1 | |
| bleach==6.1.0 | |
| certifi==2024.7.4 |
| def download_file_from_url(url, folderpath): | |
| r = requests.get(url, allow_redirects=True, headers=headers, stream=True) | |
| if r.status_code == 200: | |
| print("\nDownloading.. %s"% url.split('/')[-1]) | |
| with open('%s/%s'%(folderpath, url.split('/')[-1]), 'wb') as f: | |
| total_length = r.headers.get('content-length') | |
| if total_length is None: # no content length header | |
| f.write(r.content) | |
| else: | |
| dl = 0 |
| ''' | |
| Interview hack: Memorize preorder/inorder/postorder tree ITERATORS (no recursion) and their reverses. | |
| It simplifies a disproportionate number of questions to simple for loops (see below). | |
| I consider the implementations below the simplest way to memorize the iterative tree traversal algorithms, | |
| because they are so similar to each other, and to their respective recursive versions. | |
| Notes: | |
| - We only visit a node after we have expanded its children (i.e. added them to the stack) in the desired order. | |
| - `x is curr` does the expanded flagging for us, because we always expand the current node. |
| import json, csv | |
| from collections import OrderedDict | |
| _json=json.loads(open('data.json', 'r').read(), object_pairs_hook=OrderedDict) | |
| out=open('converted.csv', 'w') | |
| writer = csv.writer(out) | |
| writer.writerow(_json[0].keys()) | |
| for row in _json: | |
| writer.writerow(row.values()) |
| Homebrew build logs for xapian on macOS 10.13.4 | |
| Build date: 2018-05-15 17:17:40 |
| # This is an example config for the mass publish script | |
| # The outermost item is the section_id the articles belong in. | |
| # The next level is the directory the articles are in, note that you can have | |
| # multiple directories for one section_id, but not multiple entries of the same | |
| # section_id. The final level is the names of the articles in the directory. | |
| 115001566429: | |
| html: | |
| - index.html: | |
| - installation.html: |
| import re | |
| class LogProcess: | |
| DATE_REGEX = r"\d{4}\-\d{1,2}\-\d{1,2}" | |
| LOG_LEVEL_REGEX = r"(?<=\[)\w+(?=\])" | |
| def __init__(self, filename): | |
| self.logfile = filename | |
| self.data = {} | |
| self.logvalues = {} |