human (Aug 27, 2024, 01:51 PM)
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gemini API Image Bounding Box Visualization</title>
<script type="module">
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai";
import { marked } from "https://esm.run/marked";
function getApiKey() {
let apiKey = localStorage.getItem("GEMINI_API_KEY");
if (!apiKey) {
apiKey = prompt("Please enter your Gemini API key:");
if (apiKey) {
localStorage.setItem("GEMINI_API_KEY", apiKey);
}
}
return apiKey;
}
async function getGenerativeModel(params) {
const API_KEY = getApiKey();
const genAI = new GoogleGenerativeAI(API_KEY);
return genAI.getGenerativeModel(params);
}
async function fileToGenerativePart(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve({
inlineData: {
data: reader.result.split(",")[1],
mimeType: file.type
}
});
reader.readAsDataURL(file);
});
}
function resizeAndCompressImage(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onload = function(event) {
const img = new Image();
img.onload = function() {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
let width = img.width;
let height = img.height;
if (width > 1000) {
height = Math.round((height * 1000) / width);
width = 1000;
}
canvas.width = width;
canvas.height = height;
ctx.drawImage(img, 0, 0, width, height);
canvas.toBlob((blob) => {
resolve(new File([blob], "compressed_image.jpg", { type: "image/jpeg" }));
}, 'image/jpeg', 0.7);
};
img.src = event.target.result;
};
reader.readAsDataURL(file);
});
}
async function processImageAndPrompt() {
const fileInput = document.getElementById('imageInput');
const promptInput = document.getElementById('promptInput');
const resultDiv = document.getElementById('result');
if (!fileInput.files[0] || !promptInput.value) {
alert('Please select an image and enter a prompt.');
return;
}
resultDiv.innerHTML = 'Processing...';
try {
const model = await getGenerativeModel({ model: "gemini-1.5-pro" });
const compressedImage = await resizeAndCompressImage(fileInput.files[0]);
const imagePart = await fileToGenerativePart(compressedImage);
const result = await model.generateContent([promptInput.value, imagePart]);
const response = await result.response;
const text = response.text();
resultDiv.innerHTML = marked.parse(text);
// Extract coordinates from the response
const coordinates = extractCoordinates(text);
if (coordinates.length > 0) {
displayImageWithBoundingBoxes(compressedImage, coordinates);
}
} catch (error) {
resultDiv.innerHTML = `Error: ${error.message}`;
}
}
function extractCoordinates(text) {
const regex = /\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]/g;
const matches = text.match(regex) || [];
return matches.map(JSON.parse);
}
function displayImageWithBoundingBoxes(file, coordinates) {
const reader = new FileReader();
reader.onload = function(event) {
const image = new Image();
image.onload = function() {
const canvas = document.getElementById('canvas');
canvas.width = image.width + 100;
canvas.height = image.height + 100;
const ctx = canvas.getContext('2d');
// Draw the image
ctx.drawImage(image, 80, 20);
// Draw grid lines
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; // Red with 50% opacity
ctx.lineWidth = 1;
// Vertical grid lines
for (let i = 0; i <= 1000; i += 100) {
const x = 80 + i / 1000 * image.width;
ctx.beginPath();
ctx.moveTo(x, 20);
ctx.lineTo(x, image.height + 20);
ctx.stroke();
}
// Horizontal grid lines
for (let i = 0; i <= 1000; i += 100) {
const y = 20 + (1000 - i) / 1000 * image.height;
ctx.beginPath();
ctx.moveTo(80, y);
ctx.lineTo(image.width + 80, y);
ctx.stroke();
}
// Draw bounding boxes
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
coordinates.forEach((box, index) => {
const [ymin, xmin, ymax, xmax] = box.map(coord => coord / 1000);
const width = (xmax - xmin) * image.width;
const height = (ymax - ymin) * image.height;
ctx.strokeStyle = colors[index % colors.length];
ctx.lineWidth = 5;
ctx.strokeRect(xmin * image.width + 80, ymin * image.height + 20, width, height);
});
// Draw axes and labels
ctx.strokeStyle = '#000000';
ctx.lineWidth = 1;
ctx.font = '26px Arial';
ctx.textAlign = 'right';
// Y-axis
ctx.beginPath();
ctx.moveTo(80, 20);
ctx.lineTo(80, image.height + 20);
ctx.stroke();
// Y-axis labels and ticks
for (let i = 0; i <= 1000; i += 100) {
const y = 20 + i / 1000 * image.height;
ctx.fillText(i.toString(), 75, y + 5);
ctx.beginPath();
ctx.moveTo(75, y);
ctx.lineTo(80, y);
ctx.stroke();
}
// X-axis
ctx.beginPath();
ctx.moveTo(80, image.height + 20);
ctx.lineTo(image.width + 80, image.height + 20);
ctx.stroke();
// X-axis labels and ticks
ctx.textAlign = 'center';
for (let i = 0; i <= 1000; i += 100) {
const x = 80 + i / 1000 * image.width;
ctx.fillText(i.toString(), x, image.height + 40);
ctx.beginPath();
ctx.moveTo(x, image.height + 20);
ctx.lineTo(x, image.height + 25);
ctx.stroke();
}
};
image.src = event.target.result;
};
reader.readAsDataURL(file);
}
// Attach event listener to the submit button
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
</script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
textarea {
width: 100%;
height: 100px;
}
#result, #imageContainer {
margin-top: 20px;
border: 1px solid #ccc;
padding: 10px;
}
#canvas {
max-width: 100%;
height: auto;
}
</style>
</head>
<body>
<h1>Gemini API Image Bounding Box Visualization</h1>
<input type="file" id="imageInput" accept="image/*">
<textarea id="promptInput">Return bounding boxes as JSON arrays [ymin, xmin, ymax, xmax]
</textarea>
<button id="submitBtn">Process</button>
<div id="result"></div>
<div id="imageContainer">
<canvas id="canvas"></canvas>
</div>
</body>
</html>
Add the ability to select from a list of models - keep the current one as the default and add options for gemini-1.5-flash-8b-exp-0827 and gemini-1.5-pro-exp-0827 and gemini-1.5-flash-exp-0827
Make sure image upload is optional, if no image set then it runs the prompt without the image
Add a clear image link
When image is selected it should be shown in the area at the bottom where
assistant (Aug 27, 2024, 01:51 PM)
Certainly, I'll modify the HTML to include these new features. Here's the updated version:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gemini API Image Bounding Box Visualization</title>
<script type="module">
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai";
import { marked } from "https://esm.run/marked";
function getApiKey() {
let apiKey = localStorage.getItem("GEMINI_API_KEY");
if (!apiKey) {
apiKey = prompt("Please enter your Gemini API key:");
if (apiKey) {
localStorage.setItem("GEMINI_API_KEY", apiKey);
}
}
return apiKey;
}
async function getGenerativeModel(params) {
const API_KEY = getApiKey();
const genAI = new GoogleGenerativeAI(API_KEY);
return genAI.getGenerativeModel(params);
}
async function fileToGenerativePart(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve({
inlineData: {
data: reader.result.split(",")[1],
mimeType: file.type
}
});
reader.readAsDataURL(file);
});
}
function resizeAndCompressImage(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onload = function(event) {
const img = new Image();
img.onload = function() {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
let width = img.width;
let height = img.height;
if (width > 1000) {
height = Math.round((height * 1000) / width);
width = 1000;
}
canvas.width = width;
canvas.height = height;
ctx.drawImage(img, 0, 0, width, height);
canvas.toBlob((blob) => {
resolve(new File([blob], "compressed_image.jpg", { type: "image/jpeg" }));
}, 'image/jpeg', 0.7);
};
img.src = event.target.result;
};
reader.readAsDataURL(file);
});
}
async function processImageAndPrompt() {
const fileInput = document.getElementById('imageInput');
const promptInput = document.getElementById('promptInput');
const resultDiv = document.getElementById('result');
const modelSelect = document.getElementById('modelSelect');
if (!promptInput.value) {
alert('Please enter a prompt.');
return;
}
resultDiv.innerHTML = 'Processing...';
try {
const model = await getGenerativeModel({ model: modelSelect.value });
let content = [promptInput.value];
if (fileInput.files[0]) {
const compressedImage = await resizeAndCompressImage(fileInput.files[0]);
const imagePart = await fileToGenerativePart(compressedImage);
content.push(imagePart);
}
const result = await model.generateContent(content);
const response = await result.response;
const text = response.text();
resultDiv.innerHTML = marked.parse(text);
if (fileInput.files[0]) {
// Extract coordinates from the response
const coordinates = extractCoordinates(text);
if (coordinates.length > 0) {
displayImageWithBoundingBoxes(fileInput.files[0], coordinates);
}
}
} catch (error) {
resultDiv.innerHTML = `Error: ${error.message}`;
}
}
function extractCoordinates(text) {
const regex = /\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]/g;
const matches = text.match(regex) || [];
return matches.map(JSON.parse);
}
function displayImageWithBoundingBoxes(file, coordinates) {
const reader = new FileReader();
reader.onload = function(event) {
const image = new Image();
image.onload = function() {
const canvas = document.getElementById('canvas');
canvas.width = image.width + 100;
canvas.height = image.height + 100;
const ctx = canvas.getContext('2d');
// Draw the image
ctx.drawImage(image, 80, 20);
// Draw grid lines
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; // Red with 50% opacity
ctx.lineWidth = 1;
// Vertical grid lines
for (let i = 0; i <= 1000; i += 100) {
const x = 80 + i / 1000 * image.width;
ctx.beginPath();
ctx.moveTo(x, 20);
ctx.lineTo(x, image.height + 20);
ctx.stroke();
}
// Horizontal grid lines
for (let i = 0; i <= 1000; i += 100) {
const y = 20 + (1000 - i) / 1000 * image.height;
ctx.beginPath();
ctx.moveTo(80, y);
ctx.lineTo(image.width + 80, y);
ctx.stroke();
}
// Draw bounding boxes
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
coordinates.forEach((box, index) => {
const [ymin, xmin, ymax, xmax] = box.map(coord => coord / 1000);
const width = (xmax - xmin) * image.width;
const height = (ymax - ymin) * image.height;
ctx.strokeStyle = colors[index % colors.length];
ctx.lineWidth = 5;
ctx.strokeRect(xmin * image.width + 80, ymin * image.height + 20, width, height);
});
// Draw axes and labels
ctx.strokeStyle = '#000000';
ctx.lineWidth = 1;
ctx.font = '26px Arial';
ctx.textAlign = 'right';
// Y-axis
ctx.beginPath();
ctx.moveTo(80, 20);
ctx.lineTo(80, image.height + 20);
ctx.stroke();
// Y-axis labels and ticks
for (let i = 0; i <= 1000; i += 100) {
const y = 20 + i / 1000 * image.height;
ctx.fillText(i.toString(), 75, y + 5);
ctx.beginPath();
ctx.moveTo(75, y);
ctx.lineTo(80, y);
ctx.stroke();
}
// X-axis
ctx.beginPath();
ctx.moveTo(80, image.height + 20);
ctx.lineTo(image.width + 80, image.height + 20);
ctx.stroke();
// X-axis labels and ticks
ctx.textAlign = 'center';
for (let i = 0; i <= 1000; i += 100) {
const x = 80 + i / 1000 * image.width;
ctx.fillText(i.toString(), x, image.height + 40);
ctx.beginPath();
ctx.moveTo(x, image.height + 20);
ctx.lineTo(x, image.height + 25);
ctx.stroke();
}
};
image.src = event.target.result;
};
reader.readAsDataURL(file);
}
function clearImage() {
document.getElementById('imageInput').value = '';
document.getElementById('canvas').getContext('2d').clearRect(0, 0, canvas.width, canvas.height);
document.getElementById('imagePreview').src = '';
document.getElementById('imagePreview').style.display = 'none';
}
function previewImage(event) {
const file = event.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = function(e) {
const img = document.getElementById('imagePreview');
img.src = e.target.result;
img.style.display = 'block';
}
reader.readAsDataURL(file);
}
}
// Attach event listeners
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
document.getElementById('clearImageBtn').addEventListener('click', clearImage);
document.getElementById('imageInput').addEventListener('change', previewImage);
</script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
textarea {
width: 100%;
height: 100px;
}
#result, #imageContainer {
margin-top: 20px;
border: 1px solid #ccc;
padding: 10px;
}
#canvas {
max-width: 100%;
height: auto;
}
#imagePreview {
max-width: 100%;
display: none;
margin-top: 10px;
}
</style>
</head>
<body>
<h1>Gemini API Image Bounding Box Visualization</h1>
<select id="modelSelect">
<option value="gemini-1.5-pro">gemini-1.5-pro (default)</option>
<option value="gemini-1.5-flash-8b-exp-0827">gemini-1.5-flash-8b-exp-0827</option>
<option value="gemini-1.5-pro-exp-0827">gemini-1.5-pro-exp-0827</option>
<option value="gemini-1.5-flash-exp-0827">gemini-1.5-flash-exp-0827</option>
</select>
<input type="file" id="imageInput" accept="image/*">
<button id="clearImageBtn">Clear Image</button>
<textarea id="promptInput">Return bounding boxes as JSON arrays [ymin, xmin, ymax, xmax]
</textarea>
<button id="submitBtn">Process</button>
<div id="result"></div>
<div id="imageContainer">
<img id="imagePreview" alt="Image preview" />
<canvas id="canvas"></canvas>
</div>
</body>
</html>
I've made the following changes to the HTML:
-
Added a select element with options for different Gemini models:
- gemini-1.5-pro (default)
- gemini-1.5-flash-8b-exp-0827
- gemini-1.5-pro-exp-0827
- gemini-1.5-flash-exp-0827
-
Modified the
processImageAndPrompt
function to make the image upload optional:- It now checks if an image is selected before processing it.
- If no image is selected, it runs the prompt without the image.
-
Added a "Clear Image" button:
- This button clears the selected image, the canvas, and the image preview.
-
Added an image preview feature:
- When an image is selected, it's displayed in the area below the input fields.
- The
previewImage
function handles this functionality.
-
Updated the HTML structure to include the new elements:
- Added the model select dropdown.
- Added the "Clear Image" button.
- Added an img element for the image preview.
These changes allow users to select different Gemini models, use the tool with or without an image, clear the selected image easily, and see a preview of the selected image before processing.