Skip to content

Instantly share code, notes, and snippets.

View bveeramani's full-sized avatar

Balaji Veeramani bveeramani

  • Anyscale
  • Berkeley, California
  • X @B4110G
View GitHub Profile
"""Checks which pull requests contain merge conflicts with Blackened code."""
import os
import shlex
import subprocess
import tempfile
import github
import tqdm
@bveeramani
bveeramani / format-changed.sh
Last active January 20, 2022 16:50
Run this script to format your changed Python files with Black.
#!/usr/bin/env bash
# Move to top-level directory.
cd $(git rev-parse --show-toplevel)
BLACK_VERSION_REQUIRED="21.12b0"
# Check if Black exists
if ! [ -x "$(command -v black)" ]; then
echo "Black is not installed."
@bveeramani
bveeramani / comment.py
Last active March 11, 2022 07:36
Script to comment on all open Ray pull requests.
import time
from github import Github
MESSAGE = """
## :bangbang: ACTION REQUIRED :bangbang:
We've switched our code formatter from YAPF to Black (see #21311).
To prevent issues with merging your code, here's what you'll need to do:
@bveeramani
bveeramani / download_imagenet.sh
Last active March 6, 2022 19:49
A shell script that downloads ImageNet
# Download raw dataset
pip install --upgrade kaggle
mkdir -p ~/.kaggle
echo '{"username":"...","key":"..."}' > ~/.kaggle/kaggle.json
kaggle competitions download -c imagenet-object-localization-challenge -f imagenet_object_localization_patched2019.tar.gz
# Extract images
mkdir -p ~/data
mv imagenet_object_localization_patched2019.tar.gz data
cd ~/data
from typing import Dict
import pandas as pd
import pyarrow
import ray
import tensorflow as tf
from ray.data.block import Block
from ray.data.datasource.file_based_datasource import FileBasedDatasource
import torch.nn as nn
from ray.air import session
from ray.air.config import ScalingConfig
from ray.train.torch import TorchCheckpoint, TorchTrainer
class Identity(nn.Module):
def forward(self, x):
return x
@bveeramani
bveeramani / output.txt
Created September 11, 2022 06:26
read_tf_records bug
============================= test session starts ==============================
platform darwin -- Python 3.9.12, pytest-7.1.2, pluggy-1.0.0
rootdir: /Users/bveeramani/GitHub/ray/python
plugins: anyio-3.6.1, lazy-fixture-0.6.3
collected 1 item
python/ray/data/tests/test_dataset_formats.py F [100%]
=================================== FAILURES ===================================
_____________________________ test_read_tf_records _____________________________
============================= test session starts ==============================
platform darwin -- Python 3.9.12, pytest-7.1.2, pluggy-1.0.0
rootdir: /Users/bveeramani/GitHub/ray/python
plugins: anyio-3.6.1, lazy-fixture-0.6.3
collected 3 items
python/ray/data/tests/test_dataset_formats.py FFF [100%]
=================================== FAILURES ===================================
____________ TestReadImages.test_data_size_estimate[64-RGB-30000-4] ____________
(_split_single_block pid=8869) Traceback (most recent call last):
(_split_single_block pid=8869) File "python/ray/_raylet.pyx", line 865, in ray._raylet.execute_task
(_split_single_block pid=8869) with ray._private.worker._changeproctitle(title, next_title):
(_split_single_block pid=8869) File "python/ray/_raylet.pyx", line 869, in ray._raylet.execute_task
(_split_single_block pid=8869) outputs = function_executor(*args, **kwargs)
(_split_single_block pid=8869) File "/Users/balaji/Documents/GitHub/ray/python/ray/data/_internal/split.py", line 121, in _split_single_block
(_split_single_block pid=8869) split_block = block_accessor.slice(prev_index, index, copy=True)
(_split_single_block pid=8869) File "/Users/balaji/Documents/GitHub/ray/python/ray/data/_internal/arrow_block.py", line 175, in slice
(_split_single_block pid=8869) view = _copy_table(view)
(_split_single_block pid=8869) File "/Users/balaji/Documents/GitHub/ray/python/ray/data/_internal/arrow_block.py", line 618, in _copy_t
import numpy as np
import ray
from ray.data.preprocessors import BatchMapper
ds = ray.data.read_images("/Users/balaji/Datasets/COCO/val2017", mode="RGB", include_paths=True)
ds.fully_executed()
def fn(batch: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
return batch