arjunguha/check_multiple_r.ipynb

## check_multiple_r.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datasets\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Found cached dataset multi_pl-e (/home/arjun/.cache/huggingface/datasets/nuprl___multi_pl-e/humaneval-r/2.1.0/523fc75edb4d4e1207d2e94788ccff537d389f71947b21c296ae53c48bb044e5)\n"
     ]
    }
   ],
   "source": [
    "ds = datasets.load_dataset(\"nuprl/MultiPL-E\", \"humaneval-r\", split=\"test\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# There are eight planets in our solar system: the closerst to the Sun \n",
      "# is Mercury, the next one is Venus, then Earth, Mars, Jupiter, Saturn, \n",
      "# Uranus, Neptune.\n",
      "# Write a function that takes two planet names as strings planet1 and planet2. \n",
      "# The function should return a list containing all planets whose orbits are \n",
      "# located between the orbit of planet1 and the orbit of planet2, sorted by \n",
      "# the proximity to the sun. \n",
      "# The function should return an empty list if planet1 or planet2\n",
      "# are not correct planet names. \n",
      "# Examples\n",
      "# >>> bf('Jupiter', 'Neptune')\n",
      "# c('Saturn', 'Uranus')\n",
      "# >>> bf('Earth', 'Mercury')\n",
      "# 'Venus'\n",
      "# >>> bf('Mercury', 'Uranus')\n",
      "# c('Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn')\n",
      "bf <- function(planet1, planet2) {\n",
      "*****\n",
      "# Given a vector of non-negative integers, return a cor of the given vector after sorting,\n",
      "# you will sort the given vector in ascending order if the sum( first index value, last index value) is odd,\n",
      "# or sort it in descending order if the sum( first index value, last index value) is even.\n",
      "# Note:\n",
      "# * don't change the given vector.\n",
      "# Examples:\n",
      "# >>> sort_array(c())\n",
      "# c()\n",
      "# >>> sort_array(c(5))\n",
      "# c(5)\n",
      "# >>> sort_array(c(2, 4, 3, 0, 1, 5))\n",
      "# c(0, 1, 2, 3, 4, 5)\n",
      "# >>> sort_array(c(2, 4, 3, 0, 1, 5, 6))\n",
      "# c(6, 5, 4, 3, 2, 1, 0)\n",
      "sort_array <- function(array) {\n",
      "*****\n",
      "# Out of list of strings, return the longest one. Return the first one in case of multiple\n",
      "# strings of the same length. Return NULL in case the input list is empty.\n",
      "# >>> longest(c())\n",
      "# NULL\n",
      "# >>> longest(c('a', 'b', 'c'))\n",
      "# 'a'\n",
      "# >>> longest(c('a', 'bb', 'ccc'))\n",
      "# 'ccc'\n",
      "longest <- function(strings) {\n"
     ]
    }
   ],
   "source": [
    "print(\"\\n*****\\n\".join(ds[random.sample(list(range(0, len(ds))), 3)][\"prompt\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading readme: 100%|██████████| 668/668 [00:00<00:00, 1.54MB/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset None/None to /home/arjun/.cache/huggingface/datasets/nuprl-staging___parquet/nuprl-staging--multiplt-r-cd02647c22d67544/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading data: 100%|██████████| 21.9M/21.9M [00:05<00:00, 4.20MB/s]\n",
      "Downloading data files: 100%|██████████| 1/1 [00:06<00:00,  6.71s/it]\n",
      "Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 835.19it/s]\n",
      "                                                                                        "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset parquet downloaded and prepared to /home/arjun/.cache/huggingface/datasets/nuprl-staging___parquet/nuprl-staging--multiplt-r-cd02647c22d67544/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r"
     ]
    }
   ],
   "source": [
    "ds1 = datasets.load_dataset(\"nuprl-staging/multiplt-r\", split=\"train\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# Accelerate the car while clipping to a velocity range\n",
      "# Args:\n",
      "#     v (int): starting velocity\n",
      "#     a (int): acceleration\n",
      "#     v_range (list): min and max velocity\n",
      "# Returns:\n",
      "#     (int): velocity, clipped to min/max v_range\n",
      "accel_within_limits <- function(v, a, v_range) {\n",
      "    v <- v + a\n",
      "    v <- max(v, v_range[1])\n",
      "    v <- min(v, v_range[2])\n",
      "    return(v)\n",
      "}\n",
      "****\n",
      "# >>> should_I_care(\"I don't care\")\n",
      "# TRUE\n",
      "should_I_care <- function(text) {\n",
      "    text <- tolower(text)\n",
      "    text == \"i don't care\"\n",
      "}\n",
      "****\n",
      "#  Converts modules to a map with resources to keep them outside of module jars \n",
      "convert_modules_to_external_resources <- function(buck_modules, modules_with_resources) {\n",
      "  result <- list()\n",
      "  for (module in modules_with_resources) {\n",
      "    result[paste(\"buck-modules-resources/\", module, sep=\"\")] <- paste(buck_modules[module], \"_resources\", sep=\"\")\n",
      "  }\n",
      "  return(result)\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "print(\"\\n****\\n\".join(ds1[random.sample(range(0, len(ds1)), 3)][\"content\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Found cached dataset json (/home/arjun/.cache/huggingface/datasets/json/default-cf14b75870e39d9a/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)\n"
     ]
    }
   ],
   "source": [
    "ds2 = datasets.load_dataset(\"json\", data_files=\"https://raw.githubusercontent.com/nuprl/MultiPL-E/main/prompts/humaneval-r-reworded.jsonl\", split=\"train\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "# Write a function vowels_count which takes a string representing\n",
      "a word as input and returns the number of vowels in the string.\n",
      "Vowels in this case are 'a', 'e', 'i', 'o', 'u'. Here, 'y' is also a\n",
      "vowel, but only when it is at the end of the given word.\n",
      "Example:\n",
      ">>> vowels_count('abcde')\n",
      "2\n",
      ">>> vowels_count('ACEDY')\n",
      "3\n",
      "vowels_count <- function(s) {\n",
      "\n",
      "****\n",
      "\n",
      "# Return the largest prime factor of n. Assume n > 1 and is not a prime.\n",
      ">>> largest_prime_factor(13195)\n",
      "29\n",
      ">>> largest_prime_factor(2048)\n",
      "2\n",
      "largest_prime_factor <- function(n) {\n",
      "\n",
      "****\n",
      "\n",
      "# Given a vector arr of integers and a positive integer k, return a sorted list \n",
      "of length k with the maximum k numbers in arr.\n",
      "Example 1:\n",
      ">>> maximum(c(-3, -4, 5), 3)\n",
      "c(-4, -3, 5)\n",
      "Example 2:\n",
      ">>> maximum(c(4, -4, 4), 2)\n",
      "c(4, 4)\n",
      "Example 3:\n",
      ">>> maximum(c(-3, 2, 1, 2, -1, -2, 1), 1)\n",
      "c(2)\n",
      "Note:\n",
      "# 1. The length of the vector will be in the range of [1, 1000].\n",
      "# 2. The elements in the vector will be in the range of [-1000, 1000].\n",
      "# 3. 0 <= k <= len(arr)\n",
      "maximum <- function(arr, k) {\n"
     ]
    }
   ],
   "source": [
    "print(\"\\n\\n****\\n\\n\".join(ds2[random.sample(range(0, len(ds2)), 3)][\"prompt\"]))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"import datasets\n",
	"import random"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Found cached dataset multi_pl-e (/home/arjun/.cache/huggingface/datasets/nuprl___multi_pl-e/humaneval-r/2.1.0/523fc75edb4d4e1207d2e94788ccff537d389f71947b21c296ae53c48bb044e5)\n"
	]
	}
	],
	"source": [
	"ds = datasets.load_dataset(\"nuprl/MultiPL-E\", \"humaneval-r\", split=\"test\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"# There are eight planets in our solar system: the closerst to the Sun \n",
	"# is Mercury, the next one is Venus, then Earth, Mars, Jupiter, Saturn, \n",
	"# Uranus, Neptune.\n",
	"# Write a function that takes two planet names as strings planet1 and planet2. \n",
	"# The function should return a list containing all planets whose orbits are \n",
	"# located between the orbit of planet1 and the orbit of planet2, sorted by \n",
	"# the proximity to the sun. \n",
	"# The function should return an empty list if planet1 or planet2\n",
	"# are not correct planet names. \n",
	"# Examples\n",
	"# >>> bf('Jupiter', 'Neptune')\n",
	"# c('Saturn', 'Uranus')\n",
	"# >>> bf('Earth', 'Mercury')\n",
	"# 'Venus'\n",
	"# >>> bf('Mercury', 'Uranus')\n",
	"# c('Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn')\n",
	"bf <- function(planet1, planet2) {\n",
	"*****\n",
	"# Given a vector of non-negative integers, return a cor of the given vector after sorting,\n",
	"# you will sort the given vector in ascending order if the sum( first index value, last index value) is odd,\n",
	"# or sort it in descending order if the sum( first index value, last index value) is even.\n",
	"# Note:\n",
	"# * don't change the given vector.\n",
	"# Examples:\n",
	"# >>> sort_array(c())\n",
	"# c()\n",
	"# >>> sort_array(c(5))\n",
	"# c(5)\n",
	"# >>> sort_array(c(2, 4, 3, 0, 1, 5))\n",
	"# c(0, 1, 2, 3, 4, 5)\n",
	"# >>> sort_array(c(2, 4, 3, 0, 1, 5, 6))\n",
	"# c(6, 5, 4, 3, 2, 1, 0)\n",
	"sort_array <- function(array) {\n",
	"*****\n",
	"# Out of list of strings, return the longest one. Return the first one in case of multiple\n",
	"# strings of the same length. Return NULL in case the input list is empty.\n",
	"# >>> longest(c())\n",
	"# NULL\n",
	"# >>> longest(c('a', 'b', 'c'))\n",
	"# 'a'\n",
	"# >>> longest(c('a', 'bb', 'ccc'))\n",
	"# 'ccc'\n",
	"longest <- function(strings) {\n"
	]
	}
	],
	"source": [
	"print(\"\\n*****\\n\".join(ds[random.sample(list(range(0, len(ds))), 3)][\"prompt\"]))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Downloading readme: 100%\|██████████\| 668/668 [00:00<00:00, 1.54MB/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Downloading and preparing dataset None/None to /home/arjun/.cache/huggingface/datasets/nuprl-staging___parquet/nuprl-staging--multiplt-r-cd02647c22d67544/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7...\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Downloading data: 100%\|██████████\| 21.9M/21.9M [00:05<00:00, 4.20MB/s]\n",
	"Downloading data files: 100%\|██████████\| 1/1 [00:06<00:00, 6.71s/it]\n",
	"Extracting data files: 100%\|██████████\| 1/1 [00:00<00:00, 835.19it/s]\n",
	" "
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Dataset parquet downloaded and prepared to /home/arjun/.cache/huggingface/datasets/nuprl-staging___parquet/nuprl-staging--multiplt-r-cd02647c22d67544/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7. Subsequent calls will reuse this data.\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\r"
	]
	}
	],
	"source": [
	"ds1 = datasets.load_dataset(\"nuprl-staging/multiplt-r\", split=\"train\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"# Accelerate the car while clipping to a velocity range\n",
	"# Args:\n",
	"# v (int): starting velocity\n",
	"# a (int): acceleration\n",
	"# v_range (list): min and max velocity\n",
	"# Returns:\n",
	"# (int): velocity, clipped to min/max v_range\n",
	"accel_within_limits <- function(v, a, v_range) {\n",
	" v <- v + a\n",
	" v <- max(v, v_range[1])\n",
	" v <- min(v, v_range[2])\n",
	" return(v)\n",
	"}\n",
	"****\n",
	"# >>> should_I_care(\"I don't care\")\n",
	"# TRUE\n",
	"should_I_care <- function(text) {\n",
	" text <- tolower(text)\n",
	" text == \"i don't care\"\n",
	"}\n",
	"****\n",
	"# Converts modules to a map with resources to keep them outside of module jars \n",
	"convert_modules_to_external_resources <- function(buck_modules, modules_with_resources) {\n",
	" result <- list()\n",
	" for (module in modules_with_resources) {\n",
	" result[paste(\"buck-modules-resources/\", module, sep=\"\")] <- paste(buck_modules[module], \"_resources\", sep=\"\")\n",
	" }\n",
	" return(result)\n",
	"}\n"
	]
	}
	],
	"source": [
	"print(\"\\n****\\n\".join(ds1[random.sample(range(0, len(ds1)), 3)][\"content\"]))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Found cached dataset json (/home/arjun/.cache/huggingface/datasets/json/default-cf14b75870e39d9a/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)\n"
	]
	}
	],
	"source": [
	"ds2 = datasets.load_dataset(\"json\", data_files=\"https://raw.githubusercontent.com/nuprl/MultiPL-E/main/prompts/humaneval-r-reworded.jsonl\", split=\"train\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 33,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"# Write a function vowels_count which takes a string representing\n",
	"a word as input and returns the number of vowels in the string.\n",
	"Vowels in this case are 'a', 'e', 'i', 'o', 'u'. Here, 'y' is also a\n",
	"vowel, but only when it is at the end of the given word.\n",
	"Example:\n",
	">>> vowels_count('abcde')\n",
	"2\n",
	">>> vowels_count('ACEDY')\n",
	"3\n",
	"vowels_count <- function(s) {\n",
	"\n",
	"****\n",
	"\n",
	"# Return the largest prime factor of n. Assume n > 1 and is not a prime.\n",
	">>> largest_prime_factor(13195)\n",
	"29\n",
	">>> largest_prime_factor(2048)\n",
	"2\n",
	"largest_prime_factor <- function(n) {\n",
	"\n",
	"****\n",
	"\n",
	"# Given a vector arr of integers and a positive integer k, return a sorted list \n",
	"of length k with the maximum k numbers in arr.\n",
	"Example 1:\n",
	">>> maximum(c(-3, -4, 5), 3)\n",
	"c(-4, -3, 5)\n",
	"Example 2:\n",
	">>> maximum(c(4, -4, 4), 2)\n",
	"c(4, 4)\n",
	"Example 3:\n",
	">>> maximum(c(-3, 2, 1, 2, -1, -2, 1), 1)\n",
	"c(2)\n",
	"Note:\n",
	"# 1. The length of the vector will be in the range of [1, 1000].\n",
	"# 2. The elements in the vector will be in the range of [-1000, 1000].\n",
	"# 3. 0 <= k <= len(arr)\n",
	"maximum <- function(arr, k) {\n"
	]
	}
	],
	"source": [
	"print(\"\\n\\n****\\n\\n\".join(ds2[random.sample(range(0, len(ds2)), 3)][\"prompt\"]))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "venv",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}