Zaid Khan codezakh

## blip2_confidence_snippet.py
outputs = blip2.generate(
    pixel_values=pixel_values,
    input_ids=input_ids,
    attention_mask=attention_mask,
    do_sample=False,
    num_beams=5,
    max_new_tokens=10,
    min_length=1,
    length_penalty=-1,
    return_dict_in_generate=True,

## homogenous_batching.py
class CustomBatchSampler(Sampler):
    def __init__(self, batch_size, dataset):
        self.sampler = SequentialSampler(dataset)
        self.batch_size = batch_size
        self.drop_last = False
        self.dataset = dataset
    def __iter__(self):
        batch = []
        for idx in self.sampler:
            batch.append(idx)

## random_prediction.py
import pandas as pd
import numpy as np

val_pairs = pd.read_csv('val_pairs.csv').set_index('index')

# Randomly predict {0, 1} for each pair.
random_predictions = np.random.randint(0,2, size=len(val_pairs))

# Add the predictions as a column named "label".
val_pairs['label'] = pd.Series(random_predictions)

## microsoftOCRsample.json
{
  "language": "en",
  "textAngle": 0.0,
  "orientation": "Up",
  "regions": [
    {
      "boundingBox": "22,25,1389,1863",
      "lines": [
        {
          "boundingBox": "22,25,1389,49",
	outputs = blip2.generate(
	pixel_values=pixel_values,
	input_ids=input_ids,
	attention_mask=attention_mask,
	do_sample=False,
	num_beams=5,
	max_new_tokens=10,
	min_length=1,
	length_penalty=-1,
	return_dict_in_generate=True,
	class CustomBatchSampler(Sampler):
	def __init__(self, batch_size, dataset):
	self.sampler = SequentialSampler(dataset)
	self.batch_size = batch_size
	self.drop_last = False
	self.dataset = dataset
	def __iter__(self):
	batch = []
	for idx in self.sampler:
	batch.append(idx)
	import pandas as pd
	import numpy as np

	val_pairs = pd.read_csv('val_pairs.csv').set_index('index')

	# Randomly predict {0, 1} for each pair.
	random_predictions = np.random.randint(0,2, size=len(val_pairs))

	# Add the predictions as a column named "label".
	val_pairs['label'] = pd.Series(random_predictions)
	{
	"language": "en",
	"textAngle": 0.0,
	"orientation": "Up",
	"regions": [
	{
	"boundingBox": "22,25,1389,1863",
	"lines": [
	{
	"boundingBox": "22,25,1389,49",