Sabur B saburbutt

## Reading_multiple_csv_files.txt
import pandas as pd
from pathlib import Path
from glob import glob

allfiles = sorted(glob('Files/yob*.csv'))
print(allfiles)
home = str(Path.home())

#The * in the path is basically the variable which changes in every file
#Read the full data and concatenate all the csv files

## Model Parameters for extractive QA with SQuAD
 SpanBert Parameter Details:  attention probs dropout_prob: 0.1, directionality: bidi, hidden_act: gelu,
 hidden dropout prob: 0.1, hidden size: 1024, initializer range: 0.02, intermediate size: 4096,
 layer norm eps: 1e-12, max position embeddings: 512, num attention heads: 16, num hidden layers: 24,
 pad token id: 0, pooler fc size: 768, pooler num attention heads: 12, pooler num fc layers: 3,
 pooler size per head: 128, pooler type: "first_token_transform", type vocab size: 2, vocab size: 28996

 Albert Parameter Details: $attentionprobs dropoutprob:0, bos token id:2, classifier dropout prob: 0.1,
 down scale factor: 1, embedding size: 128, eos token id: 3, gap size: 0, hidden act: "gelu",
 hidden dropout prob: 0, hidden size: 4096, initializer range: 0.02, inner group num: 1,
 intermediate size: 16384, layer norm eps: 1e-12, max position embeddings: 512, net_structure_type: 0,

## median_of_two_arrays
"""1. You are interested in analyzing some hard-to-obtain data from two sepa- rate databases.
Each database contains n numerical values—so there are 2n values total—and you may assume that no two values are the same.
You’d like to determine the median of this set of 2n values, which we will define here to be the n th smallest value.
However, the only way you can access these values is through queries to the databases.
In a single query, you can specify a value k to one of the two databases, and the chosen database will return the k th smallest value that it contains.
Since queries are expensive, you would like to compute the median using as few queries as possible.
Give an algorithm that finds the median value using at most O(log n) queries."""
import statistics
#First make two databases that contain numeric numbers with name database1 and database2
from tkinter import *

## Lyricsgenerationupdated.txt
from typing import Dict, Any
from html.parser import HTMLParser
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import nltk
import pandas as pd
from requests import get
from bs4 import BeautifulSoup
import random, re #re helps with the text formating
from matplotlib import pyplot as plt

## MarkovModelCodeinR.txt
library(markovchain)
weatherStates <- c("sunny", "cloudy", "rain")
byRow <- TRUE
weatherMatrix <- matrix(data = c(0.50, 0.25, 0.25,
0.5, 0.1, 0.4,
0.1, 0.7, 0.2), byrow = byRow, nrow = 3,
dimnames = list(weatherStates, weatherStates))
#transition matrix and probabilities
mcWeather <- new("markovchain", states = weatherStates, byrow =
byRow,transitionMatrix = weatherMatrix, name = "Weather")
	import pandas as pd
	from pathlib import Path
	from glob import glob

	allfiles = sorted(glob('Files/yob*.csv'))
	print(allfiles)
	home = str(Path.home())

	#The * in the path is basically the variable which changes in every file
	#Read the full data and concatenate all the csv files
	SpanBert Parameter Details: attention probs dropout_prob: 0.1, directionality: bidi, hidden_act: gelu,
	hidden dropout prob: 0.1, hidden size: 1024, initializer range: 0.02, intermediate size: 4096,
	layer norm eps: 1e-12, max position embeddings: 512, num attention heads: 16, num hidden layers: 24,
	pad token id: 0, pooler fc size: 768, pooler num attention heads: 12, pooler num fc layers: 3,
	pooler size per head: 128, pooler type: "first_token_transform", type vocab size: 2, vocab size: 28996

	Albert Parameter Details: $attentionprobs dropoutprob:0, bos token id:2, classifier dropout prob: 0.1,
	down scale factor: 1, embedding size: 128, eos token id: 3, gap size: 0, hidden act: "gelu",
	hidden dropout prob: 0, hidden size: 4096, initializer range: 0.02, inner group num: 1,
	intermediate size: 16384, layer norm eps: 1e-12, max position embeddings: 512, net_structure_type: 0,
	"""1. You are interested in analyzing some hard-to-obtain data from two sepa- rate databases.
	Each database contains n numerical values—so there are 2n values total—and you may assume that no two values are the same.
	You’d like to determine the median of this set of 2n values, which we will define here to be the n th smallest value.
	However, the only way you can access these values is through queries to the databases.
	In a single query, you can specify a value k to one of the two databases, and the chosen database will return the k th smallest value that it contains.
	Since queries are expensive, you would like to compute the median using as few queries as possible.
	Give an algorithm that finds the median value using at most O(log n) queries."""
	import statistics
	#First make two databases that contain numeric numbers with name database1 and database2
	from tkinter import *
	from typing import Dict, Any
	from html.parser import HTMLParser
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.corpus import stopwords
	import nltk
	import pandas as pd
	from requests import get
	from bs4 import BeautifulSoup
	import random, re #re helps with the text formating
	from matplotlib import pyplot as plt
	library(markovchain)
	weatherStates <- c("sunny", "cloudy", "rain")
	byRow <- TRUE
	weatherMatrix <- matrix(data = c(0.50, 0.25, 0.25,
	0.5, 0.1, 0.4,
	0.1, 0.7, 0.2), byrow = byRow, nrow = 3,
	dimnames = list(weatherStates, weatherStates))
	#transition matrix and probabilities
	mcWeather <- new("markovchain", states = weatherStates, byrow =
	byRow,transitionMatrix = weatherMatrix, name = "Weather")