ieb/testUsingTensorFlow.html

## testcompute.html
<html>
<head>

</head>
<body>
This is not doing a real search. Look at the Javascript. It generates a dataset representing a preprocessed set of tensors
representing pages or sentences. This happens during page load as if it was downloaded from a server. Then when the button below
is pressed a tensor representing the search sentence is randomly generated and the cosine distance (euclidian dot product) is calculated.
In the javascript console you should get an idea of how many matches > 0.5 were found, and how long it took, as it will typically
take longer than Chrome likes and event handler to take.  On my ancient laptop, I see about 200ms for 10K 1024 length vectors.
This shows the approach is potentially viable from a compute pov..... however the dataset for the pages is 40MB uncompressed, which is not,
and with 1024 from a untrained small language model, the results will probably work for a general languages, but not domain specific vocab.

	<button id="searchbutton" >Run Search</button>

	<script type="text/javascript">
// shamelessly copied from https://stackoverflow.com/questions/51362252/javascript-cosine-similarity-function
function dotp(x, y) {
  function dotp_sum(a, b) {
    return a + b;
  }
  function dotp_times(a, i) {
    return x[i] * y[i];
  }
  return x.map(dotp_times).reduce(dotp_sum, 0);
}

function cosineSimilarity(A,B){
  var similarity = dotp(A, B) / (Math.sqrt(dotp(A,A)) * Math.sqrt(dotp(B,B)));
  return similarity;
}
//end of copy.

function createTensor(range, length) {
	const tensor = [];
	for (let j = 0; j < length; j++) {
		tensor.push((Math.random()-0.5)*range);
	}
	return tensor;
}

const tensorLength = 784;
var dataset = [];
for (let i = 0; i < 10000; i++) {
	dataset.push(createTensor(1.0E8, 784));
}

console.log("Generated dataset ",dataset, dataset.length);

function search() {
	const searchTensor = createTensor(1.0E8, 784);
	let nmatches = 0;
	for (var i = dataset.length - 1; i >= 0; i--) {
		if ( cosineSimilarity(searchTensor, dataset[i]) > 0.2) {
				nmatches++;
		}
	}
	console.log("Matches",nmatches);
}

document.querySelector("#searchbutton").addEventListener("click",(e) => {
	search();
} );

	</script>
</body>
</html>

## testUsingTensorFlow.html
<html>
<head>

</head>
<body>
	<div>
  This tests creating a sentence simularity index using tensorflow and then searching it.
  It takes about 10s to generate the index using content from www.hlx.live containing 77 pages, and 5s to query that data with
  1 sentence "Tell me about everything", indicating that a page in that site knows about everything.


		Goto <a href="https://www.hlx.live/docpages-index.json">https://www.hlx.live/docpages-index.json<a>
			Copy paste the contents here and click load to create the index
	 </div>

	<textarea name="sentences"> </textarea>
	<button id="load" >Load Sentences</button>

	<div> Then query</div>
	<input type="text" name="query" value="Tell me about everything you know" />
	<button id="searchbutton" >Run Search</button>

 <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
  <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/universal-sentence-encoder"></script>
	<script type="text/javascript">

function dotp(x, y) {
  function dotp_sum(a, b) {
    return a + b;
  }
  function dotp_times(a, i) {
    return x[i] * y[i];
  }
  return x.map(dotp_times).reduce(dotp_sum, 0);
}

function cosineSimilarity(A,B){
  var similarity = dotp(A, B) / (Math.sqrt(dotp(A,A)) * Math.sqrt(dotp(B,B)));
  return similarity;
}

function createTensor(range, length) {
	const tensor = [];
	for (let j = 0; j < length; j++) {
		tensor.push((Math.random()-0.5)*range);
	}
	return tensor;
}

var dataset = [];


function get_embeddings(list_sentences, callback) {
    use.load().then(model => {
      model.embed(list_sentences).then(embeddings => {
        callback(embeddings);
      });
    });
}

function search(query) {
	console.log("Start Query");
	const start = new Date();
	get_embeddings([query], (embeddings) => {
		const searchTensor = embeddings.arraySync()[0];
		console.log(searchTensor);
		let nmatches = 0;
		for (var i = dataset.length - 1; i >= 0; i--) {
			if ( cosineSimilarity(searchTensor, dataset[i]) > 0.2) {
					nmatches++;
			}
		}
		console.log("Matches",nmatches);
		const end = new Date();
		console.log("Took ",end.getTime()-start.getTime(),"ms");
	});
}
async function loadSentences(sentences) {
	console.log("Start Create Index");
	const start = new Date();
	const s = JSON.parse(sentences);
	console.log(s.data);
	const l = [];
	s.data.forEach((page) => {
		l.push(page.content);
	});

	get_embeddings(l,(embeddings) => {
		dataset = embeddings.arraySync();
		console.log("Dataset Now", dataset);
		const end = new Date();
		console.log("Took ",end.getTime()-start.getTime(),"ms");
	});

}
document.querySelector("#searchbutton").addEventListener("click",(e) => {
	search(document.querySelector('[name="query"]').value);
});
document.querySelector("#load").addEventListener("click", async (e) => {
	await loadSentences(document.querySelector('[name="sentences"]').value);
} );

	</script>
</body>
</html>
	<html>
	<head>

	</head>
	<body>
	This is not doing a real search. Look at the Javascript. It generates a dataset representing a preprocessed set of tensors
	representing pages or sentences. This happens during page load as if it was downloaded from a server. Then when the button below
	is pressed a tensor representing the search sentence is randomly generated and the cosine distance (euclidian dot product) is calculated.
	In the javascript console you should get an idea of how many matches > 0.5 were found, and how long it took, as it will typically
	take longer than Chrome likes and event handler to take. On my ancient laptop, I see about 200ms for 10K 1024 length vectors.
	This shows the approach is potentially viable from a compute pov..... however the dataset for the pages is 40MB uncompressed, which is not,
	and with 1024 from a untrained small language model, the results will probably work for a general languages, but not domain specific vocab.

	<button id="searchbutton" >Run Search</button>

	<script type="text/javascript">
	// shamelessly copied from https://stackoverflow.com/questions/51362252/javascript-cosine-similarity-function
	function dotp(x, y) {
	function dotp_sum(a, b) {
	return a + b;
	}
	function dotp_times(a, i) {
	return x[i] * y[i];
	}
	return x.map(dotp_times).reduce(dotp_sum, 0);
	}

	function cosineSimilarity(A,B){
	var similarity = dotp(A, B) / (Math.sqrt(dotp(A,A)) * Math.sqrt(dotp(B,B)));
	return similarity;
	}
	//end of copy.

	function createTensor(range, length) {
	const tensor = [];
	for (let j = 0; j < length; j++) {
	tensor.push((Math.random()-0.5)*range);
	}
	return tensor;
	}

	const tensorLength = 784;
	var dataset = [];
	for (let i = 0; i < 10000; i++) {
	dataset.push(createTensor(1.0E8, 784));
	}

	console.log("Generated dataset ",dataset, dataset.length);

	function search() {
	const searchTensor = createTensor(1.0E8, 784);
	let nmatches = 0;
	for (var i = dataset.length - 1; i >= 0; i--) {
	if ( cosineSimilarity(searchTensor, dataset[i]) > 0.2) {
	nmatches++;
	}
	}
	console.log("Matches",nmatches);
	}

	document.querySelector("#searchbutton").addEventListener("click",(e) => {
	search();
	} );

	</script>
	</body>
	</html>