Michael Tu tuzhucheng

## mpcnn_watermill_queries.sql

-- Get experiment group name and counts for a dataset
select name, count(*) from experiments e join experiment_groups g on e.group_id=g.gid where json_extract(args, '$.dataset') == 'wikiqa' group by group_id;

-- Above query but also by Hyperband epochs
select name, json_extract(args, '$.epochs') as epochs, count(*) from experiments e join experiment_groups g on e.group_id=g.gid where json_extract(args, '$.dataset') == 'wikiqa' group by group_id, epochs;

-- Best performance regardless of search method
select name, json_extract(dev_metric, '$.map') as map, json_extract(args, '$.epochs') as epochs from experiments e join experiment_groups g on e.group_id=g.gid where json_extract(args, '$.dataset') == 'wikiqa' order by map desc limit 20;

## repositories.json
{
    "repositories": [
        {
            "url": "https://github.com/apache/kafka.git",
            "branch": "trunk"
        },
        {
            "url": "https://github.com/apache/storm.git",
            "branch": "master"
        },

## latex_code_snippet.tex
{\tt \small
\begin{verbatim}
int wrap_fact(ClientData clientData,
              Tcl_Interp *interp,
              int argc, char *argv[]) {
    int result;
    int arg0;
    if (argc != 2) {
        interp->result = "wrong # args";
        return TCL_ERROR;

## spark_flame_graphs.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                tuzhucheng
                / spark_flame_graphs.md
            
            
              Created
              December 9, 2017 22:41
                — forked from kayousterhout/spark_flame_graphs.md
            
          
    Generating Flame Graphs for Apache Spark

Flame graphs are a nifty debugging tool to determine where CPU time is being spent.  Using the Java Flight recorder, you can do this for Java processes without adding significant runtime overhead.
When are flame graphs useful?

Shivaram Venkataraman and I have found these flame recordings to be useful for diagnosing coarse-grained performance problems. We started using them at the suggestion of Josh Rosen, who quickly made one for the Spark scheduler when we were talking to him about why the scheduler caps out at a throughput of a few thousand tasks per second. Josh generated a graph similar to the one below, which illustrates that a significant amount of time is spent in serialization (if you click in the top right hand corner and search for "serialize", you can see that 78.6% of the sampled CPU time was spent in serialization). We used this insight to spee

  
## embedding_only_onnx.py
import torch.onnx
from torch.autograd import Variable
import torch.nn as nn

inp = Variable(torch.LongTensor([1,2,3]))

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.embedding = nn.Embedding(100, 300)

## ONNX Graph of Embedding Only Model
graph(%1 : Long(3)
      %2 : Float(100, 300)) {
  %3 : Float(3, 300) = Gather(%2, %1), uses = [%0.i0];
  return (%3);
}

## ONNX Graph of SM Model
In PyTorch there are 3 arguments used to call this model.
Arg 1: 1 x 50 x 20 FloatTensor
Arg 2: 1 x 50 x 19 FloatTensor
Arg 3: 1 x 4 FloatTensor

The first dimension is the batch size, the second dimension is the number of input channels to Conv1D. The last dimension for args 1 and 2 can vary depending on the sentence length.

graph(%1 : Float(1, 50, 20)
      %2 : Float(1, 50, 19)
      %3 : Float(1, 4)

## eduroam-linux-UoW.sh
#!/usr/bin/env bash

# script obtained from https://cat.eduroam.org/

if [ -z "$BASH" ] ; then
   bash  $0
   exit
fi


## cs241-binasm-test-framework.py
import os
import fnmatch
import subprocess
import sys
import getopt

print("CS241 A3 A4 Automated Python Test Framework")
print("Compiling Asm...")
subprocess.call(["javac", "Asm.java"])

	-- Get experiment group name and counts for a dataset
	select name, count(*) from experiments e join experiment_groups g on e.group_id=g.gid where json_extract(args, '$.dataset') == 'wikiqa' group by group_id;

	-- Above query but also by Hyperband epochs
	select name, json_extract(args, '$.epochs') as epochs, count(*) from experiments e join experiment_groups g on e.group_id=g.gid where json_extract(args, '$.dataset') == 'wikiqa' group by group_id, epochs;

	-- Best performance regardless of search method
	select name, json_extract(dev_metric, '$.map') as map, json_extract(args, '$.epochs') as epochs from experiments e join experiment_groups g on e.group_id=g.gid where json_extract(args, '$.dataset') == 'wikiqa' order by map desc limit 20;
	{
	"repositories": [
	{
	"url": "https://github.com/apache/kafka.git",
	"branch": "trunk"
	},
	{
	"url": "https://github.com/apache/storm.git",
	"branch": "master"
	},
	{\tt \small
	\begin{verbatim}
	int wrap_fact(ClientData clientData,
	Tcl_Interp *interp,
	int argc, char *argv[]) {
	int result;
	int arg0;
	if (argc != 2) {
	interp->result = "wrong # args";
	return TCL_ERROR;
	import torch.onnx
	from torch.autograd import Variable
	import torch.nn as nn

	inp = Variable(torch.LongTensor([1,2,3]))

	class MyModel(nn.Module):
	def __init__(self):
	super(MyModel, self).__init__()
	self.embedding = nn.Embedding(100, 300)
	graph(%1 : Long(3)
	%2 : Float(100, 300)) {
	%3 : Float(3, 300) = Gather(%2, %1), uses = [%0.i0];
	return (%3);
	}
	In PyTorch there are 3 arguments used to call this model.
	Arg 1: 1 x 50 x 20 FloatTensor
	Arg 2: 1 x 50 x 19 FloatTensor
	Arg 3: 1 x 4 FloatTensor

	The first dimension is the batch size, the second dimension is the number of input channels to Conv1D. The last dimension for args 1 and 2 can vary depending on the sentence length.

	graph(%1 : Float(1, 50, 20)
	%2 : Float(1, 50, 19)
	%3 : Float(1, 4)
	#!/usr/bin/env bash

	# script obtained from https://cat.eduroam.org/

	if [ -z "$BASH" ] ; then
	bash $0
	exit
	fi
	import os
	import fnmatch
	import subprocess
	import sys
	import getopt

	print("CS241 A3 A4 Automated Python Test Framework")
	print("Compiling Asm...")
	subprocess.call(["javac", "Asm.java"])