Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
stt_api_output.json
[
{
"transcript": "Software developers. We're not known for our Rock and style. Are we or are we today? I'll show you how I use ml to make new trendier taking inspiration from influencers. I'm going to build an app that analyzes my closet and pictures of fashionable influencers and then makes me outfit recommendations. If I'm missing an important piece will also recommend me products to buy let me show you how",
"words": [
{
"word": "Software",
"start_time": 0.2,
"end_time": 0.6,
"speaker_tag": 0
},
{
"word": "developers.",
"start_time": 0.6,
"end_time": 1.5,
"speaker_tag": 0
},
{ "word": "We're", "start_time": 1.6, "end_time": 1.8, "speaker_tag": 0 },
{ "word": "not", "start_time": 1.8, "end_time": 2.0, "speaker_tag": 0 },
{ "word": "known", "start_time": 2.0, "end_time": 2.3, "speaker_tag": 0 },
{ "word": "for", "start_time": 2.3, "end_time": 2.4, "speaker_tag": 0 },
{ "word": "our", "start_time": 2.4, "end_time": 2.6, "speaker_tag": 0 },
{ "word": "Rock", "start_time": 2.6, "end_time": 2.9, "speaker_tag": 0 },
{ "word": "and", "start_time": 2.9, "end_time": 3.1, "speaker_tag": 0 },
{
"word": "style.",
"start_time": 3.1,
"end_time": 3.7,
"speaker_tag": 0
},
{ "word": "Are", "start_time": 3.7, "end_time": 4.0, "speaker_tag": 0 },
{ "word": "we", "start_time": 4.0, "end_time": 4.4, "speaker_tag": 0 },
{ "word": "or", "start_time": 5.0, "end_time": 5.4, "speaker_tag": 0 },
{ "word": "are", "start_time": 5.4, "end_time": 5.7, "speaker_tag": 0 },
{ "word": "we", "start_time": 5.7, "end_time": 6.2, "speaker_tag": 0 },
{
"word": "today?",
"start_time": 7.0,
"end_time": 7.2,
"speaker_tag": 0
},
{ "word": "I'll", "start_time": 7.2, "end_time": 7.3, "speaker_tag": 0 },
{ "word": "show", "start_time": 7.3, "end_time": 7.5, "speaker_tag": 0 },
{ "word": "you", "start_time": 7.5, "end_time": 7.6, "speaker_tag": 0 },
{ "word": "how", "start_time": 7.6, "end_time": 7.9, "speaker_tag": 0 },
{ "word": "I", "start_time": 7.9, "end_time": 8.0, "speaker_tag": 0 },
{ "word": "use", "start_time": 8.0, "end_time": 8.2, "speaker_tag": 0 },
{ "word": "ml", "start_time": 8.2, "end_time": 8.5, "speaker_tag": 0 },
{ "word": "to", "start_time": 8.5, "end_time": 8.6, "speaker_tag": 0 },
{ "word": "make", "start_time": 8.6, "end_time": 8.8, "speaker_tag": 0 },
{ "word": "new", "start_time": 8.8, "end_time": 8.9, "speaker_tag": 0 },
{
"word": "trendier",
"start_time": 8.9,
"end_time": 9.5,
"speaker_tag": 0
},
{
"word": "taking",
"start_time": 9.5,
"end_time": 9.9,
"speaker_tag": 0
},
{
"word": "inspiration",
"start_time": 9.9,
"end_time": 10.6,
"speaker_tag": 0
},
{
"word": "from",
"start_time": 10.6,
"end_time": 10.7,
"speaker_tag": 0
},
{
"word": "influencers.",
"start_time": 10.7,
"end_time": 11.6,
"speaker_tag": 0
},
{ "word": "I'm", "start_time": 11.9, "end_time": 12.0, "speaker_tag": 0 },
{
"word": "going",
"start_time": 12.0,
"end_time": 12.1,
"speaker_tag": 0
},
{ "word": "to", "start_time": 12.1, "end_time": 12.2, "speaker_tag": 0 },
{
"word": "build",
"start_time": 12.2,
"end_time": 12.4,
"speaker_tag": 0
},
{ "word": "an", "start_time": 12.4, "end_time": 12.5, "speaker_tag": 0 },
{ "word": "app", "start_time": 12.5, "end_time": 12.7, "speaker_tag": 0 },
{
"word": "that",
"start_time": 12.7,
"end_time": 12.9,
"speaker_tag": 0
},
{
"word": "analyzes",
"start_time": 12.9,
"end_time": 13.4,
"speaker_tag": 0
},
{ "word": "my", "start_time": 13.4, "end_time": 13.6, "speaker_tag": 0 },
{
"word": "closet",
"start_time": 13.6,
"end_time": 14.2,
"speaker_tag": 0
},
{ "word": "and", "start_time": 14.2, "end_time": 14.3, "speaker_tag": 0 },
{
"word": "pictures",
"start_time": 14.3,
"end_time": 14.8,
"speaker_tag": 0
},
{ "word": "of", "start_time": 14.8, "end_time": 14.9, "speaker_tag": 0 },
{
"word": "fashionable",
"start_time": 14.9,
"end_time": 15.5,
"speaker_tag": 0
},
{
"word": "influencers",
"start_time": 15.6,
"end_time": 16.3,
"speaker_tag": 0
},
{ "word": "and", "start_time": 16.3, "end_time": 16.4, "speaker_tag": 0 },
{
"word": "then",
"start_time": 16.4,
"end_time": 16.5,
"speaker_tag": 0
},
{
"word": "makes",
"start_time": 16.5,
"end_time": 16.8,
"speaker_tag": 0
},
{ "word": "me", "start_time": 16.8, "end_time": 16.9, "speaker_tag": 0 },
{
"word": "outfit",
"start_time": 16.9,
"end_time": 17.2,
"speaker_tag": 0
},
{
"word": "recommendations.",
"start_time": 17.2,
"end_time": 18.2,
"speaker_tag": 0
},
{ "word": "If", "start_time": 18.9, "end_time": 19.1, "speaker_tag": 0 },
{ "word": "I'm", "start_time": 19.1, "end_time": 19.1, "speaker_tag": 0 },
{
"word": "missing",
"start_time": 19.1,
"end_time": 19.4,
"speaker_tag": 0
},
{ "word": "an", "start_time": 19.4, "end_time": 19.5, "speaker_tag": 0 },
{
"word": "important",
"start_time": 19.5,
"end_time": 19.9,
"speaker_tag": 0
},
{
"word": "piece",
"start_time": 19.9,
"end_time": 20.3,
"speaker_tag": 0
},
{
"word": "will",
"start_time": 20.3,
"end_time": 20.5,
"speaker_tag": 0
},
{
"word": "also",
"start_time": 20.5,
"end_time": 20.7,
"speaker_tag": 0
},
{
"word": "recommend",
"start_time": 20.7,
"end_time": 21.0,
"speaker_tag": 0
},
{ "word": "me", "start_time": 21.0, "end_time": 21.1, "speaker_tag": 0 },
{
"word": "products",
"start_time": 21.1,
"end_time": 21.5,
"speaker_tag": 0
},
{ "word": "to", "start_time": 21.5, "end_time": 21.5, "speaker_tag": 0 },
{ "word": "buy", "start_time": 21.5, "end_time": 22.0, "speaker_tag": 0 },
{ "word": "let", "start_time": 22.1, "end_time": 22.2, "speaker_tag": 0 },
{ "word": "me", "start_time": 22.2, "end_time": 22.3, "speaker_tag": 0 },
{
"word": "show",
"start_time": 22.3,
"end_time": 22.4,
"speaker_tag": 0
},
{ "word": "you", "start_time": 22.4, "end_time": 22.6, "speaker_tag": 0 },
{ "word": "how", "start_time": 22.6, "end_time": 23.0, "speaker_tag": 0 }
]
},
{
"transcript": " so the way this is going to work is that I'm going to grab a bunch of fashionable inspiration pictures from social media and I'll take a bunch of pictures of my wardrobe and I'll try to match the pieces that I already owned those fashion pictures using machine learning which will help me put together outfits all wrap this whole thing in a Firebase apps that I can scroll through outfit recommendations on my phone. Alright, let's build this thing. So first I needed to catalog my entire wardrobe. So I thought I could just grab a few pictures of me wearing clothing and everyday life and then use computer vision to pick out the items.",
"words": [
{ "word": "so", "start_time": 28.8, "end_time": 29.0, "speaker_tag": 0 },
{ "word": "the", "start_time": 29.0, "end_time": 29.1, "speaker_tag": 0 },
{ "word": "way", "start_time": 29.1, "end_time": 29.2, "speaker_tag": 0 },
{
"word": "this",
"start_time": 29.2,
"end_time": 29.4,
"speaker_tag": 0
},
{ "word": "is", "start_time": 29.4, "end_time": 29.5, "speaker_tag": 0 },
{
"word": "going",
"start_time": 29.5,
"end_time": 29.6,
"speaker_tag": 0
},
{ "word": "to", "start_time": 29.6, "end_time": 29.6, "speaker_tag": 0 },
{
"word": "work",
"start_time": 29.6,
"end_time": 29.9,
"speaker_tag": 0
},
{ "word": "is", "start_time": 29.9, "end_time": 30.0, "speaker_tag": 0 },
{
"word": "that",
"start_time": 30.0,
"end_time": 30.1,
"speaker_tag": 0
},
{ "word": "I'm", "start_time": 30.1, "end_time": 30.1, "speaker_tag": 0 },
{
"word": "going",
"start_time": 30.1,
"end_time": 30.3,
"speaker_tag": 0
},
{ "word": "to", "start_time": 30.3, "end_time": 30.3, "speaker_tag": 0 },
{
"word": "grab",
"start_time": 30.3,
"end_time": 30.5,
"speaker_tag": 0
},
{ "word": "a", "start_time": 30.5, "end_time": 30.6, "speaker_tag": 0 },
{
"word": "bunch",
"start_time": 30.6,
"end_time": 30.9,
"speaker_tag": 0
},
{ "word": "of", "start_time": 30.9, "end_time": 31.0, "speaker_tag": 0 },
{
"word": "fashionable",
"start_time": 31.0,
"end_time": 31.6,
"speaker_tag": 0
},
{
"word": "inspiration",
"start_time": 31.6,
"end_time": 32.3,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 32.3,
"end_time": 32.7,
"speaker_tag": 0
},
{
"word": "from",
"start_time": 32.7,
"end_time": 32.9,
"speaker_tag": 0
},
{
"word": "social",
"start_time": 32.9,
"end_time": 33.2,
"speaker_tag": 0
},
{
"word": "media",
"start_time": 33.2,
"end_time": 33.8,
"speaker_tag": 0
},
{ "word": "and", "start_time": 33.8, "end_time": 34.0, "speaker_tag": 0 },
{
"word": "I'll",
"start_time": 34.0,
"end_time": 34.1,
"speaker_tag": 0
},
{
"word": "take",
"start_time": 34.1,
"end_time": 34.3,
"speaker_tag": 0
},
{ "word": "a", "start_time": 34.3, "end_time": 34.3, "speaker_tag": 0 },
{
"word": "bunch",
"start_time": 34.3,
"end_time": 34.5,
"speaker_tag": 0
},
{ "word": "of", "start_time": 34.5, "end_time": 34.6, "speaker_tag": 0 },
{
"word": "pictures",
"start_time": 34.6,
"end_time": 34.9,
"speaker_tag": 0
},
{ "word": "of", "start_time": 34.9, "end_time": 35.0, "speaker_tag": 0 },
{ "word": "my", "start_time": 35.0, "end_time": 35.1, "speaker_tag": 0 },
{
"word": "wardrobe",
"start_time": 35.1,
"end_time": 35.9,
"speaker_tag": 0
},
{ "word": "and", "start_time": 35.9, "end_time": 36.0, "speaker_tag": 0 },
{
"word": "I'll",
"start_time": 36.0,
"end_time": 36.1,
"speaker_tag": 0
},
{ "word": "try", "start_time": 36.1, "end_time": 36.3, "speaker_tag": 0 },
{ "word": "to", "start_time": 36.3, "end_time": 36.4, "speaker_tag": 0 },
{
"word": "match",
"start_time": 36.4,
"end_time": 36.6,
"speaker_tag": 0
},
{ "word": "the", "start_time": 36.6, "end_time": 36.7, "speaker_tag": 0 },
{
"word": "pieces",
"start_time": 36.7,
"end_time": 37.0,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 37.0,
"end_time": 37.1,
"speaker_tag": 0
},
{ "word": "I", "start_time": 37.1, "end_time": 37.2, "speaker_tag": 0 },
{
"word": "already",
"start_time": 37.2,
"end_time": 37.6,
"speaker_tag": 0
},
{
"word": "owned",
"start_time": 37.6,
"end_time": 37.8,
"speaker_tag": 0
},
{
"word": "those",
"start_time": 37.8,
"end_time": 38.1,
"speaker_tag": 0
},
{
"word": "fashion",
"start_time": 38.1,
"end_time": 38.6,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 38.6,
"end_time": 39.1,
"speaker_tag": 0
},
{
"word": "using",
"start_time": 39.1,
"end_time": 39.3,
"speaker_tag": 0
},
{
"word": "machine",
"start_time": 39.3,
"end_time": 39.7,
"speaker_tag": 0
},
{
"word": "learning",
"start_time": 39.7,
"end_time": 40.1,
"speaker_tag": 0
},
{
"word": "which",
"start_time": 40.3,
"end_time": 40.4,
"speaker_tag": 0
},
{
"word": "will",
"start_time": 40.4,
"end_time": 40.5,
"speaker_tag": 0
},
{
"word": "help",
"start_time": 40.5,
"end_time": 40.7,
"speaker_tag": 0
},
{ "word": "me", "start_time": 40.7, "end_time": 40.8, "speaker_tag": 0 },
{ "word": "put", "start_time": 40.8, "end_time": 41.0, "speaker_tag": 0 },
{
"word": "together",
"start_time": 41.0,
"end_time": 41.3,
"speaker_tag": 0
},
{
"word": "outfits",
"start_time": 41.3,
"end_time": 41.9,
"speaker_tag": 0
},
{ "word": "all", "start_time": 42.4, "end_time": 42.5, "speaker_tag": 0 },
{
"word": "wrap",
"start_time": 42.5,
"end_time": 42.7,
"speaker_tag": 0
},
{
"word": "this",
"start_time": 42.7,
"end_time": 42.9,
"speaker_tag": 0
},
{
"word": "whole",
"start_time": 42.9,
"end_time": 43.0,
"speaker_tag": 0
},
{
"word": "thing",
"start_time": 43.0,
"end_time": 43.3,
"speaker_tag": 0
},
{ "word": "in", "start_time": 43.3, "end_time": 43.4, "speaker_tag": 0 },
{ "word": "a", "start_time": 43.4, "end_time": 43.4, "speaker_tag": 0 },
{
"word": "Firebase",
"start_time": 43.4,
"end_time": 43.9,
"speaker_tag": 0
},
{
"word": "apps",
"start_time": 43.9,
"end_time": 44.2,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 44.2,
"end_time": 44.3,
"speaker_tag": 0
},
{ "word": "I", "start_time": 44.3, "end_time": 44.4, "speaker_tag": 0 },
{ "word": "can", "start_time": 44.4, "end_time": 44.5, "speaker_tag": 0 },
{
"word": "scroll",
"start_time": 44.5,
"end_time": 44.8,
"speaker_tag": 0
},
{
"word": "through",
"start_time": 44.8,
"end_time": 45.1,
"speaker_tag": 0
},
{
"word": "outfit",
"start_time": 45.1,
"end_time": 45.3,
"speaker_tag": 0
},
{
"word": "recommendations",
"start_time": 45.3,
"end_time": 46.0,
"speaker_tag": 0
},
{ "word": "on", "start_time": 46.0, "end_time": 46.2, "speaker_tag": 0 },
{ "word": "my", "start_time": 46.2, "end_time": 46.3, "speaker_tag": 0 },
{
"word": "phone.",
"start_time": 46.3,
"end_time": 46.8,
"speaker_tag": 0
},
{
"word": "Alright,",
"start_time": 47.0,
"end_time": 47.4,
"speaker_tag": 0
},
{
"word": "let's",
"start_time": 47.4,
"end_time": 47.5,
"speaker_tag": 0
},
{
"word": "build",
"start_time": 47.5,
"end_time": 47.7,
"speaker_tag": 0
},
{
"word": "this",
"start_time": 47.7,
"end_time": 47.8,
"speaker_tag": 0
},
{
"word": "thing.",
"start_time": 47.8,
"end_time": 48.2,
"speaker_tag": 0
},
{ "word": "So", "start_time": 48.5, "end_time": 48.6, "speaker_tag": 0 },
{
"word": "first",
"start_time": 48.6,
"end_time": 49.0,
"speaker_tag": 0
},
{ "word": "I", "start_time": 49.0, "end_time": 49.1, "speaker_tag": 0 },
{
"word": "needed",
"start_time": 49.1,
"end_time": 49.4,
"speaker_tag": 0
},
{ "word": "to", "start_time": 49.4, "end_time": 49.4, "speaker_tag": 0 },
{
"word": "catalog",
"start_time": 49.4,
"end_time": 49.9,
"speaker_tag": 0
},
{ "word": "my", "start_time": 49.9, "end_time": 50.0, "speaker_tag": 0 },
{
"word": "entire",
"start_time": 50.0,
"end_time": 50.5,
"speaker_tag": 0
},
{
"word": "wardrobe.",
"start_time": 50.5,
"end_time": 51.2,
"speaker_tag": 0
},
{ "word": "So", "start_time": 51.3, "end_time": 51.5, "speaker_tag": 0 },
{ "word": "I", "start_time": 51.5, "end_time": 51.5, "speaker_tag": 0 },
{
"word": "thought",
"start_time": 51.5,
"end_time": 51.8,
"speaker_tag": 0
},
{ "word": "I", "start_time": 51.8, "end_time": 51.8, "speaker_tag": 0 },
{
"word": "could",
"start_time": 51.8,
"end_time": 51.9,
"speaker_tag": 0
},
{
"word": "just",
"start_time": 51.9,
"end_time": 52.1,
"speaker_tag": 0
},
{
"word": "grab",
"start_time": 52.1,
"end_time": 52.4,
"speaker_tag": 0
},
{ "word": "a", "start_time": 52.4, "end_time": 52.4, "speaker_tag": 0 },
{ "word": "few", "start_time": 52.4, "end_time": 52.6, "speaker_tag": 0 },
{
"word": "pictures",
"start_time": 52.6,
"end_time": 53.0,
"speaker_tag": 0
},
{ "word": "of", "start_time": 53.0, "end_time": 53.1, "speaker_tag": 0 },
{ "word": "me", "start_time": 53.1, "end_time": 53.2, "speaker_tag": 0 },
{
"word": "wearing",
"start_time": 53.2,
"end_time": 53.5,
"speaker_tag": 0
},
{
"word": "clothing",
"start_time": 53.5,
"end_time": 53.9,
"speaker_tag": 0
},
{ "word": "and", "start_time": 53.9, "end_time": 54.0, "speaker_tag": 0 },
{
"word": "everyday",
"start_time": 54.0,
"end_time": 54.4,
"speaker_tag": 0
},
{
"word": "life",
"start_time": 54.4,
"end_time": 54.8,
"speaker_tag": 0
},
{ "word": "and", "start_time": 54.8, "end_time": 54.9, "speaker_tag": 0 },
{
"word": "then",
"start_time": 54.9,
"end_time": 55.1,
"speaker_tag": 0
},
{ "word": "use", "start_time": 55.1, "end_time": 55.3, "speaker_tag": 0 },
{
"word": "computer",
"start_time": 55.3,
"end_time": 55.6,
"speaker_tag": 0
},
{
"word": "vision",
"start_time": 55.6,
"end_time": 56.0,
"speaker_tag": 0
},
{ "word": "to", "start_time": 56.0, "end_time": 56.1, "speaker_tag": 0 },
{
"word": "pick",
"start_time": 56.1,
"end_time": 56.2,
"speaker_tag": 0
},
{ "word": "out", "start_time": 56.2, "end_time": 56.3, "speaker_tag": 0 },
{ "word": "the", "start_time": 56.3, "end_time": 56.4, "speaker_tag": 0 },
{
"word": "items.",
"start_time": 56.4,
"end_time": 56.8,
"speaker_tag": 0
}
]
},
{
"transcript": " Do this. I used a feature of the Google Cloud Vision API called object detection which identifies objects and photos and then draws a little bounding box around them as you can see it kind of worked at identifying my top shorts shoes stuff like that. But since I'm going to be matching my clothing pictures to inspiration pictures, I realize that the quality of my pictures had to be really high. They couldn't beat up weird lighting angles or anything like that. So I accepted the sad fate that I was just going to have to take a picture of every single item in my",
"words": [
{ "word": "Do", "start_time": 57.6, "end_time": 57.8, "speaker_tag": 0 },
{
"word": "this.",
"start_time": 57.8,
"end_time": 58.1,
"speaker_tag": 0
},
{ "word": "I", "start_time": 58.1, "end_time": 58.2, "speaker_tag": 0 },
{
"word": "used",
"start_time": 58.2,
"end_time": 58.5,
"speaker_tag": 0
},
{ "word": "a", "start_time": 58.5, "end_time": 58.5, "speaker_tag": 0 },
{
"word": "feature",
"start_time": 58.5,
"end_time": 59.0,
"speaker_tag": 0
},
{ "word": "of", "start_time": 59.0, "end_time": 59.1, "speaker_tag": 0 },
{ "word": "the", "start_time": 59.1, "end_time": 59.2, "speaker_tag": 0 },
{
"word": "Google",
"start_time": 59.2,
"end_time": 59.5,
"speaker_tag": 0
},
{
"word": "Cloud",
"start_time": 59.5,
"end_time": 59.8,
"speaker_tag": 0
},
{
"word": "Vision",
"start_time": 59.8,
"end_time": 60.0,
"speaker_tag": 0
},
{ "word": "API", "start_time": 60.0, "end_time": 60.5, "speaker_tag": 0 },
{
"word": "called",
"start_time": 60.5,
"end_time": 60.8,
"speaker_tag": 0
},
{
"word": "object",
"start_time": 60.8,
"end_time": 61.1,
"speaker_tag": 0
},
{
"word": "detection",
"start_time": 61.1,
"end_time": 61.8,
"speaker_tag": 0
},
{
"word": "which",
"start_time": 62.0,
"end_time": 62.1,
"speaker_tag": 0
},
{
"word": "identifies",
"start_time": 62.1,
"end_time": 62.8,
"speaker_tag": 0
},
{
"word": "objects",
"start_time": 62.8,
"end_time": 63.2,
"speaker_tag": 0
},
{ "word": "and", "start_time": 63.2, "end_time": 63.3, "speaker_tag": 0 },
{
"word": "photos",
"start_time": 63.3,
"end_time": 63.7,
"speaker_tag": 0
},
{ "word": "and", "start_time": 63.7, "end_time": 63.8, "speaker_tag": 0 },
{
"word": "then",
"start_time": 63.8,
"end_time": 63.9,
"speaker_tag": 0
},
{
"word": "draws",
"start_time": 63.9,
"end_time": 64.1,
"speaker_tag": 0
},
{ "word": "a", "start_time": 64.1, "end_time": 64.1, "speaker_tag": 0 },
{
"word": "little",
"start_time": 64.1,
"end_time": 64.4,
"speaker_tag": 0
},
{
"word": "bounding",
"start_time": 64.4,
"end_time": 64.7,
"speaker_tag": 0
},
{ "word": "box", "start_time": 64.7, "end_time": 65.0, "speaker_tag": 0 },
{
"word": "around",
"start_time": 65.0,
"end_time": 65.3,
"speaker_tag": 0
},
{
"word": "them",
"start_time": 65.3,
"end_time": 65.6,
"speaker_tag": 0
},
{ "word": "as", "start_time": 66.4, "end_time": 66.5, "speaker_tag": 0 },
{ "word": "you", "start_time": 66.5, "end_time": 66.6, "speaker_tag": 0 },
{ "word": "can", "start_time": 66.6, "end_time": 66.8, "speaker_tag": 0 },
{ "word": "see", "start_time": 66.8, "end_time": 67.3, "speaker_tag": 0 },
{ "word": "it", "start_time": 67.4, "end_time": 67.5, "speaker_tag": 0 },
{
"word": "kind",
"start_time": 67.5,
"end_time": 67.7,
"speaker_tag": 0
},
{ "word": "of", "start_time": 67.7, "end_time": 67.8, "speaker_tag": 0 },
{
"word": "worked",
"start_time": 67.8,
"end_time": 68.4,
"speaker_tag": 0
},
{ "word": "at", "start_time": 68.5, "end_time": 68.6, "speaker_tag": 0 },
{
"word": "identifying",
"start_time": 68.6,
"end_time": 69.2,
"speaker_tag": 0
},
{ "word": "my", "start_time": 69.2, "end_time": 69.3, "speaker_tag": 0 },
{ "word": "top", "start_time": 69.3, "end_time": 70.0, "speaker_tag": 0 },
{
"word": "shorts",
"start_time": 70.0,
"end_time": 70.7,
"speaker_tag": 0
},
{
"word": "shoes",
"start_time": 70.7,
"end_time": 71.4,
"speaker_tag": 0
},
{
"word": "stuff",
"start_time": 71.4,
"end_time": 71.6,
"speaker_tag": 0
},
{
"word": "like",
"start_time": 71.6,
"end_time": 71.7,
"speaker_tag": 0
},
{
"word": "that.",
"start_time": 71.7,
"end_time": 72.3,
"speaker_tag": 0
},
{ "word": "But", "start_time": 72.5, "end_time": 72.7, "speaker_tag": 0 },
{
"word": "since",
"start_time": 72.7,
"end_time": 72.9,
"speaker_tag": 0
},
{ "word": "I'm", "start_time": 72.9, "end_time": 72.9, "speaker_tag": 0 },
{
"word": "going",
"start_time": 72.9,
"end_time": 73.1,
"speaker_tag": 0
},
{ "word": "to", "start_time": 73.1, "end_time": 73.1, "speaker_tag": 0 },
{ "word": "be", "start_time": 73.1, "end_time": 73.2, "speaker_tag": 0 },
{
"word": "matching",
"start_time": 73.2,
"end_time": 73.6,
"speaker_tag": 0
},
{ "word": "my", "start_time": 73.6, "end_time": 73.8, "speaker_tag": 0 },
{
"word": "clothing",
"start_time": 73.8,
"end_time": 74.1,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 74.1,
"end_time": 74.5,
"speaker_tag": 0
},
{ "word": "to", "start_time": 74.5, "end_time": 74.6, "speaker_tag": 0 },
{
"word": "inspiration",
"start_time": 74.6,
"end_time": 75.2,
"speaker_tag": 0
},
{
"word": "pictures,",
"start_time": 75.2,
"end_time": 75.9,
"speaker_tag": 0
},
{ "word": "I", "start_time": 75.9, "end_time": 76.0, "speaker_tag": 0 },
{
"word": "realize",
"start_time": 76.0,
"end_time": 76.3,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 76.3,
"end_time": 76.4,
"speaker_tag": 0
},
{ "word": "the", "start_time": 76.4, "end_time": 76.5, "speaker_tag": 0 },
{
"word": "quality",
"start_time": 76.5,
"end_time": 76.9,
"speaker_tag": 0
},
{ "word": "of", "start_time": 76.9, "end_time": 77.0, "speaker_tag": 0 },
{ "word": "my", "start_time": 77.0, "end_time": 77.1, "speaker_tag": 0 },
{
"word": "pictures",
"start_time": 77.1,
"end_time": 77.4,
"speaker_tag": 0
},
{ "word": "had", "start_time": 77.4, "end_time": 77.5, "speaker_tag": 0 },
{ "word": "to", "start_time": 77.5, "end_time": 77.6, "speaker_tag": 0 },
{ "word": "be", "start_time": 77.6, "end_time": 77.7, "speaker_tag": 0 },
{
"word": "really",
"start_time": 77.7,
"end_time": 77.9,
"speaker_tag": 0
},
{
"word": "high.",
"start_time": 77.9,
"end_time": 78.6,
"speaker_tag": 0
},
{
"word": "They",
"start_time": 78.6,
"end_time": 78.7,
"speaker_tag": 0
},
{
"word": "couldn't",
"start_time": 78.7,
"end_time": 79.0,
"speaker_tag": 0
},
{
"word": "beat",
"start_time": 79.0,
"end_time": 79.2,
"speaker_tag": 0
},
{ "word": "up", "start_time": 79.2, "end_time": 79.3, "speaker_tag": 0 },
{
"word": "weird",
"start_time": 79.3,
"end_time": 79.6,
"speaker_tag": 0
},
{
"word": "lighting",
"start_time": 79.6,
"end_time": 79.9,
"speaker_tag": 0
},
{
"word": "angles",
"start_time": 79.9,
"end_time": 80.9,
"speaker_tag": 0
},
{ "word": "or", "start_time": 80.9, "end_time": 81.0, "speaker_tag": 0 },
{
"word": "anything",
"start_time": 81.0,
"end_time": 81.2,
"speaker_tag": 0
},
{
"word": "like",
"start_time": 81.2,
"end_time": 81.4,
"speaker_tag": 0
},
{
"word": "that.",
"start_time": 81.4,
"end_time": 82.1,
"speaker_tag": 0
},
{ "word": "So", "start_time": 82.1, "end_time": 82.3, "speaker_tag": 0 },
{ "word": "I", "start_time": 82.3, "end_time": 82.3, "speaker_tag": 0 },
{
"word": "accepted",
"start_time": 82.3,
"end_time": 83.0,
"speaker_tag": 0
},
{ "word": "the", "start_time": 83.0, "end_time": 83.1, "speaker_tag": 0 },
{ "word": "sad", "start_time": 83.1, "end_time": 83.4, "speaker_tag": 0 },
{
"word": "fate",
"start_time": 83.4,
"end_time": 83.7,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 83.7,
"end_time": 83.8,
"speaker_tag": 0
},
{ "word": "I", "start_time": 83.8, "end_time": 83.9, "speaker_tag": 0 },
{ "word": "was", "start_time": 83.9, "end_time": 84.0, "speaker_tag": 0 },
{
"word": "just",
"start_time": 84.0,
"end_time": 84.2,
"speaker_tag": 0
},
{
"word": "going",
"start_time": 84.2,
"end_time": 84.3,
"speaker_tag": 0
},
{ "word": "to", "start_time": 84.3, "end_time": 84.4, "speaker_tag": 0 },
{
"word": "have",
"start_time": 84.4,
"end_time": 84.5,
"speaker_tag": 0
},
{ "word": "to", "start_time": 84.5, "end_time": 84.5, "speaker_tag": 0 },
{
"word": "take",
"start_time": 84.5,
"end_time": 84.7,
"speaker_tag": 0
},
{ "word": "a", "start_time": 84.7, "end_time": 84.7, "speaker_tag": 0 },
{
"word": "picture",
"start_time": 84.7,
"end_time": 85.1,
"speaker_tag": 0
},
{ "word": "of", "start_time": 85.1, "end_time": 85.3, "speaker_tag": 0 },
{
"word": "every",
"start_time": 85.3,
"end_time": 85.6,
"speaker_tag": 0
},
{
"word": "single",
"start_time": 85.6,
"end_time": 86.0,
"speaker_tag": 0
},
{
"word": "item",
"start_time": 86.0,
"end_time": 86.2,
"speaker_tag": 0
},
{ "word": "in", "start_time": 86.2, "end_time": 86.3, "speaker_tag": 0 },
{ "word": "my", "start_time": 86.3, "end_time": 86.8, "speaker_tag": 0 }
]
},
{
"transcript": " Visit which I did painstakingly over a couple of days dressing and undressing. My mannequin taking pictures of all my different shoes next. I need to find some people who actually knew something about looking trendy and bonus points if they also worked in Tech. So I reached out to Laura Medallia Laura is a developer who posts lots of fashionable outfit pics of herself on Instagram where she's at coder girl and from head to toe which is important because we need to capture those shoes with her permission. I downloaded a bunch of her pictures and uploaded them to cloud storage again. The goal here is to take",
"words": [
{
"word": "Visit",
"start_time": 86.9,
"end_time": 87.4,
"speaker_tag": 0
},
{
"word": "which",
"start_time": 87.4,
"end_time": 87.6,
"speaker_tag": 0
},
{ "word": "I", "start_time": 87.6, "end_time": 87.6, "speaker_tag": 0 },
{ "word": "did", "start_time": 87.6, "end_time": 88.0, "speaker_tag": 0 },
{
"word": "painstakingly",
"start_time": 88.0,
"end_time": 88.9,
"speaker_tag": 0
},
{
"word": "over",
"start_time": 88.9,
"end_time": 89.4,
"speaker_tag": 0
},
{ "word": "a", "start_time": 89.4, "end_time": 89.5, "speaker_tag": 0 },
{
"word": "couple",
"start_time": 89.5,
"end_time": 89.8,
"speaker_tag": 0
},
{ "word": "of", "start_time": 89.8, "end_time": 89.9, "speaker_tag": 0 },
{
"word": "days",
"start_time": 89.9,
"end_time": 90.5,
"speaker_tag": 0
},
{
"word": "dressing",
"start_time": 90.8,
"end_time": 91.1,
"speaker_tag": 0
},
{ "word": "and", "start_time": 91.1, "end_time": 91.2, "speaker_tag": 0 },
{
"word": "undressing.",
"start_time": 91.2,
"end_time": 91.5,
"speaker_tag": 0
},
{ "word": "My", "start_time": 91.5, "end_time": 91.6, "speaker_tag": 0 },
{
"word": "mannequin",
"start_time": 91.6,
"end_time": 92.5,
"speaker_tag": 0
},
{
"word": "taking",
"start_time": 92.5,
"end_time": 92.9,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 92.9,
"end_time": 93.2,
"speaker_tag": 0
},
{ "word": "of", "start_time": 93.2, "end_time": 93.3, "speaker_tag": 0 },
{ "word": "all", "start_time": 93.3, "end_time": 93.4, "speaker_tag": 0 },
{ "word": "my", "start_time": 93.4, "end_time": 93.6, "speaker_tag": 0 },
{
"word": "different",
"start_time": 93.6,
"end_time": 93.9,
"speaker_tag": 0
},
{
"word": "shoes",
"start_time": 93.9,
"end_time": 94.4,
"speaker_tag": 0
},
{
"word": "next.",
"start_time": 94.8,
"end_time": 95.1,
"speaker_tag": 0
},
{ "word": "I", "start_time": 95.1, "end_time": 95.2, "speaker_tag": 0 },
{
"word": "need",
"start_time": 95.2,
"end_time": 95.4,
"speaker_tag": 0
},
{ "word": "to", "start_time": 95.4, "end_time": 95.5, "speaker_tag": 0 },
{
"word": "find",
"start_time": 95.5,
"end_time": 95.7,
"speaker_tag": 0
},
{
"word": "some",
"start_time": 95.7,
"end_time": 95.8,
"speaker_tag": 0
},
{
"word": "people",
"start_time": 95.8,
"end_time": 96.1,
"speaker_tag": 0
},
{ "word": "who", "start_time": 96.1, "end_time": 96.3, "speaker_tag": 0 },
{
"word": "actually",
"start_time": 96.3,
"end_time": 96.7,
"speaker_tag": 0
},
{
"word": "knew",
"start_time": 96.7,
"end_time": 96.8,
"speaker_tag": 0
},
{
"word": "something",
"start_time": 96.8,
"end_time": 97.1,
"speaker_tag": 0
},
{
"word": "about",
"start_time": 97.1,
"end_time": 97.3,
"speaker_tag": 0
},
{
"word": "looking",
"start_time": 97.3,
"end_time": 97.5,
"speaker_tag": 0
},
{
"word": "trendy",
"start_time": 97.5,
"end_time": 98.0,
"speaker_tag": 0
},
{ "word": "and", "start_time": 98.1, "end_time": 98.3, "speaker_tag": 0 },
{
"word": "bonus",
"start_time": 98.3,
"end_time": 98.6,
"speaker_tag": 0
},
{
"word": "points",
"start_time": 98.6,
"end_time": 98.8,
"speaker_tag": 0
},
{ "word": "if", "start_time": 98.8, "end_time": 98.9, "speaker_tag": 0 },
{
"word": "they",
"start_time": 98.9,
"end_time": 99.1,
"speaker_tag": 0
},
{
"word": "also",
"start_time": 99.1,
"end_time": 99.4,
"speaker_tag": 0
},
{
"word": "worked",
"start_time": 99.4,
"end_time": 99.6,
"speaker_tag": 0
},
{ "word": "in", "start_time": 99.6, "end_time": 99.6, "speaker_tag": 0 },
{
"word": "Tech.",
"start_time": 99.6,
"end_time": 100.1,
"speaker_tag": 0
},
{
"word": "So",
"start_time": 100.2,
"end_time": 100.4,
"speaker_tag": 0
},
{ "word": "I", "start_time": 100.4, "end_time": 100.5, "speaker_tag": 0 },
{
"word": "reached",
"start_time": 100.5,
"end_time": 100.9,
"speaker_tag": 0
},
{
"word": "out",
"start_time": 100.9,
"end_time": 101.0,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 101.0,
"end_time": 101.2,
"speaker_tag": 0
},
{
"word": "Laura",
"start_time": 101.2,
"end_time": 101.5,
"speaker_tag": 0
},
{
"word": "Medallia",
"start_time": 101.5,
"end_time": 102.1,
"speaker_tag": 0
},
{
"word": "Laura",
"start_time": 102.5,
"end_time": 102.8,
"speaker_tag": 0
},
{
"word": "is",
"start_time": 102.8,
"end_time": 102.9,
"speaker_tag": 0
},
{ "word": "a", "start_time": 102.9, "end_time": 102.9, "speaker_tag": 0 },
{
"word": "developer",
"start_time": 102.9,
"end_time": 103.4,
"speaker_tag": 0
},
{
"word": "who",
"start_time": 103.4,
"end_time": 103.6,
"speaker_tag": 0
},
{
"word": "posts",
"start_time": 103.6,
"end_time": 103.9,
"speaker_tag": 0
},
{
"word": "lots",
"start_time": 103.9,
"end_time": 104.2,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 104.2,
"end_time": 104.3,
"speaker_tag": 0
},
{
"word": "fashionable",
"start_time": 104.3,
"end_time": 104.9,
"speaker_tag": 0
},
{
"word": "outfit",
"start_time": 104.9,
"end_time": 105.2,
"speaker_tag": 0
},
{
"word": "pics",
"start_time": 105.2,
"end_time": 105.4,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 105.4,
"end_time": 105.5,
"speaker_tag": 0
},
{
"word": "herself",
"start_time": 105.5,
"end_time": 105.8,
"speaker_tag": 0
},
{
"word": "on",
"start_time": 105.8,
"end_time": 106.0,
"speaker_tag": 0
},
{
"word": "Instagram",
"start_time": 106.0,
"end_time": 106.5,
"speaker_tag": 0
},
{
"word": "where",
"start_time": 106.5,
"end_time": 106.6,
"speaker_tag": 0
},
{
"word": "she's",
"start_time": 106.6,
"end_time": 106.9,
"speaker_tag": 0
},
{
"word": "at",
"start_time": 106.9,
"end_time": 107.2,
"speaker_tag": 0
},
{
"word": "coder",
"start_time": 107.2,
"end_time": 107.4,
"speaker_tag": 0
},
{
"word": "girl",
"start_time": 107.4,
"end_time": 107.8,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 108.1,
"end_time": 108.4,
"speaker_tag": 0
},
{
"word": "from",
"start_time": 108.4,
"end_time": 108.5,
"speaker_tag": 0
},
{
"word": "head",
"start_time": 108.5,
"end_time": 108.7,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 108.7,
"end_time": 108.8,
"speaker_tag": 0
},
{
"word": "toe",
"start_time": 108.8,
"end_time": 109.2,
"speaker_tag": 0
},
{
"word": "which",
"start_time": 109.2,
"end_time": 109.4,
"speaker_tag": 0
},
{
"word": "is",
"start_time": 109.4,
"end_time": 109.5,
"speaker_tag": 0
},
{
"word": "important",
"start_time": 109.5,
"end_time": 109.9,
"speaker_tag": 0
},
{
"word": "because",
"start_time": 109.9,
"end_time": 110.1,
"speaker_tag": 0
},
{
"word": "we",
"start_time": 110.1,
"end_time": 110.2,
"speaker_tag": 0
},
{
"word": "need",
"start_time": 110.2,
"end_time": 110.3,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 110.3,
"end_time": 110.3,
"speaker_tag": 0
},
{
"word": "capture",
"start_time": 110.3,
"end_time": 110.6,
"speaker_tag": 0
},
{
"word": "those",
"start_time": 110.6,
"end_time": 110.8,
"speaker_tag": 0
},
{
"word": "shoes",
"start_time": 110.8,
"end_time": 111.4,
"speaker_tag": 0
},
{
"word": "with",
"start_time": 111.6,
"end_time": 111.7,
"speaker_tag": 0
},
{
"word": "her",
"start_time": 111.7,
"end_time": 111.9,
"speaker_tag": 0
},
{
"word": "permission.",
"start_time": 111.9,
"end_time": 112.3,
"speaker_tag": 0
},
{ "word": "I", "start_time": 112.3, "end_time": 112.4, "speaker_tag": 0 },
{
"word": "downloaded",
"start_time": 112.4,
"end_time": 112.8,
"speaker_tag": 0
},
{ "word": "a", "start_time": 112.8, "end_time": 112.8, "speaker_tag": 0 },
{
"word": "bunch",
"start_time": 112.8,
"end_time": 113.1,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 113.1,
"end_time": 113.1,
"speaker_tag": 0
},
{
"word": "her",
"start_time": 113.1,
"end_time": 113.2,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 113.2,
"end_time": 113.7,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 113.7,
"end_time": 113.8,
"speaker_tag": 0
},
{
"word": "uploaded",
"start_time": 113.8,
"end_time": 114.2,
"speaker_tag": 0
},
{
"word": "them",
"start_time": 114.2,
"end_time": 114.4,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 114.4,
"end_time": 114.5,
"speaker_tag": 0
},
{
"word": "cloud",
"start_time": 114.5,
"end_time": 114.8,
"speaker_tag": 0
},
{
"word": "storage",
"start_time": 114.8,
"end_time": 115.4,
"speaker_tag": 0
},
{
"word": "again.",
"start_time": 115.6,
"end_time": 115.9,
"speaker_tag": 0
},
{
"word": "The",
"start_time": 115.9,
"end_time": 116.0,
"speaker_tag": 0
},
{
"word": "goal",
"start_time": 116.0,
"end_time": 116.3,
"speaker_tag": 0
},
{
"word": "here",
"start_time": 116.3,
"end_time": 116.5,
"speaker_tag": 0
},
{
"word": "is",
"start_time": 116.5,
"end_time": 116.6,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 116.6,
"end_time": 116.7,
"speaker_tag": 0
},
{
"word": "take",
"start_time": 116.7,
"end_time": 116.8,
"speaker_tag": 0
}
]
},
{
"transcript": " Bunch of Laura's pictures and then pictures from my own closet and see which of her outfits I can kind of recreate now challenge one was that not all lawyers pictures are actually of clothing. So first I had to eliminate those and select only the ones where she's in an outfit for that. I used a feature of the vision API that does image classification this feature returns a bunch of tags describing what in general is going on in an image and one of the tags that returns its fashion. So I sorted through all of those pictures and only kept ones that had a person in them and there were also labeled fashion and with that I had my inspiration image",
"words": [
{
"word": "Bunch",
"start_time": 117.0,
"end_time": 117.1,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 117.1,
"end_time": 117.2,
"speaker_tag": 0
},
{
"word": "Laura's",
"start_time": 117.2,
"end_time": 117.6,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 117.6,
"end_time": 118.1,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 118.1,
"end_time": 118.3,
"speaker_tag": 0
},
{
"word": "then",
"start_time": 118.3,
"end_time": 118.4,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 118.4,
"end_time": 118.6,
"speaker_tag": 0
},
{
"word": "from",
"start_time": 118.6,
"end_time": 118.8,
"speaker_tag": 0
},
{
"word": "my",
"start_time": 118.8,
"end_time": 118.9,
"speaker_tag": 0
},
{
"word": "own",
"start_time": 118.9,
"end_time": 119.2,
"speaker_tag": 0
},
{
"word": "closet",
"start_time": 119.2,
"end_time": 119.7,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 119.7,
"end_time": 119.8,
"speaker_tag": 0
},
{
"word": "see",
"start_time": 119.8,
"end_time": 120.0,
"speaker_tag": 0
},
{
"word": "which",
"start_time": 120.0,
"end_time": 120.1,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 120.1,
"end_time": 120.3,
"speaker_tag": 0
},
{
"word": "her",
"start_time": 120.3,
"end_time": 120.4,
"speaker_tag": 0
},
{
"word": "outfits",
"start_time": 120.4,
"end_time": 120.8,
"speaker_tag": 0
},
{ "word": "I", "start_time": 120.8, "end_time": 121.0, "speaker_tag": 0 },
{
"word": "can",
"start_time": 121.0,
"end_time": 121.2,
"speaker_tag": 0
},
{
"word": "kind",
"start_time": 121.2,
"end_time": 121.4,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 121.4,
"end_time": 121.5,
"speaker_tag": 0
},
{
"word": "recreate",
"start_time": 121.5,
"end_time": 122.1,
"speaker_tag": 0
},
{
"word": "now",
"start_time": 122.1,
"end_time": 122.4,
"speaker_tag": 0
},
{
"word": "challenge",
"start_time": 122.4,
"end_time": 122.8,
"speaker_tag": 0
},
{
"word": "one",
"start_time": 122.8,
"end_time": 123.0,
"speaker_tag": 0
},
{
"word": "was",
"start_time": 123.0,
"end_time": 123.1,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 123.1,
"end_time": 123.3,
"speaker_tag": 0
},
{
"word": "not",
"start_time": 123.3,
"end_time": 123.5,
"speaker_tag": 0
},
{
"word": "all",
"start_time": 123.5,
"end_time": 123.7,
"speaker_tag": 0
},
{
"word": "lawyers",
"start_time": 123.7,
"end_time": 124.1,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 124.1,
"end_time": 124.5,
"speaker_tag": 0
},
{
"word": "are",
"start_time": 124.5,
"end_time": 124.6,
"speaker_tag": 0
},
{
"word": "actually",
"start_time": 124.6,
"end_time": 124.9,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 124.9,
"end_time": 125.0,
"speaker_tag": 0
},
{
"word": "clothing.",
"start_time": 125.0,
"end_time": 125.5,
"speaker_tag": 0
},
{
"word": "So",
"start_time": 125.6,
"end_time": 125.8,
"speaker_tag": 0
},
{
"word": "first",
"start_time": 125.8,
"end_time": 126.0,
"speaker_tag": 0
},
{ "word": "I", "start_time": 126.0, "end_time": 126.0, "speaker_tag": 0 },
{
"word": "had",
"start_time": 126.0,
"end_time": 126.1,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 126.1,
"end_time": 126.2,
"speaker_tag": 0
},
{
"word": "eliminate",
"start_time": 126.2,
"end_time": 126.7,
"speaker_tag": 0
},
{
"word": "those",
"start_time": 126.7,
"end_time": 127.0,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 127.0,
"end_time": 127.2,
"speaker_tag": 0
},
{
"word": "select",
"start_time": 127.2,
"end_time": 127.4,
"speaker_tag": 0
},
{
"word": "only",
"start_time": 127.4,
"end_time": 127.6,
"speaker_tag": 0
},
{
"word": "the",
"start_time": 127.6,
"end_time": 127.7,
"speaker_tag": 0
},
{
"word": "ones",
"start_time": 127.7,
"end_time": 127.9,
"speaker_tag": 0
},
{
"word": "where",
"start_time": 127.9,
"end_time": 128.1,
"speaker_tag": 0
},
{
"word": "she's",
"start_time": 128.1,
"end_time": 128.3,
"speaker_tag": 0
},
{
"word": "in",
"start_time": 128.3,
"end_time": 128.4,
"speaker_tag": 0
},
{
"word": "an",
"start_time": 128.4,
"end_time": 128.5,
"speaker_tag": 0
},
{
"word": "outfit",
"start_time": 128.5,
"end_time": 129.1,
"speaker_tag": 0
},
{
"word": "for",
"start_time": 129.3,
"end_time": 129.5,
"speaker_tag": 0
},
{
"word": "that.",
"start_time": 129.5,
"end_time": 129.8,
"speaker_tag": 0
},
{ "word": "I", "start_time": 129.8, "end_time": 129.9, "speaker_tag": 0 },
{
"word": "used",
"start_time": 129.9,
"end_time": 130.1,
"speaker_tag": 0
},
{ "word": "a", "start_time": 130.1, "end_time": 130.2, "speaker_tag": 0 },
{
"word": "feature",
"start_time": 130.2,
"end_time": 130.6,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 130.6,
"end_time": 130.6,
"speaker_tag": 0
},
{
"word": "the",
"start_time": 130.6,
"end_time": 130.7,
"speaker_tag": 0
},
{
"word": "vision",
"start_time": 130.7,
"end_time": 131.0,
"speaker_tag": 0
},
{
"word": "API",
"start_time": 131.0,
"end_time": 131.4,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 131.4,
"end_time": 131.5,
"speaker_tag": 0
},
{
"word": "does",
"start_time": 131.5,
"end_time": 131.7,
"speaker_tag": 0
},
{
"word": "image",
"start_time": 131.7,
"end_time": 132.0,
"speaker_tag": 0
},
{
"word": "classification",
"start_time": 132.0,
"end_time": 133.1,
"speaker_tag": 0
},
{
"word": "this",
"start_time": 133.1,
"end_time": 133.3,
"speaker_tag": 0
},
{
"word": "feature",
"start_time": 133.3,
"end_time": 133.6,
"speaker_tag": 0
},
{
"word": "returns",
"start_time": 133.6,
"end_time": 134.0,
"speaker_tag": 0
},
{ "word": "a", "start_time": 134.0, "end_time": 134.1, "speaker_tag": 0 },
{
"word": "bunch",
"start_time": 134.1,
"end_time": 134.4,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 134.4,
"end_time": 134.5,
"speaker_tag": 0
},
{
"word": "tags",
"start_time": 134.5,
"end_time": 134.8,
"speaker_tag": 0
},
{
"word": "describing",
"start_time": 134.8,
"end_time": 135.3,
"speaker_tag": 0
},
{
"word": "what",
"start_time": 135.3,
"end_time": 135.5,
"speaker_tag": 0
},
{
"word": "in",
"start_time": 135.5,
"end_time": 135.6,
"speaker_tag": 0
},
{
"word": "general",
"start_time": 135.6,
"end_time": 136.1,
"speaker_tag": 0
},
{
"word": "is",
"start_time": 136.1,
"end_time": 136.3,
"speaker_tag": 0
},
{
"word": "going",
"start_time": 136.3,
"end_time": 136.5,
"speaker_tag": 0
},
{
"word": "on",
"start_time": 136.5,
"end_time": 136.7,
"speaker_tag": 0
},
{
"word": "in",
"start_time": 136.7,
"end_time": 136.8,
"speaker_tag": 0
},
{
"word": "an",
"start_time": 136.8,
"end_time": 136.9,
"speaker_tag": 0
},
{
"word": "image",
"start_time": 136.9,
"end_time": 137.4,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 137.4,
"end_time": 137.5,
"speaker_tag": 0
},
{
"word": "one",
"start_time": 137.5,
"end_time": 137.7,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 137.7,
"end_time": 137.7,
"speaker_tag": 0
},
{
"word": "the",
"start_time": 137.7,
"end_time": 137.8,
"speaker_tag": 0
},
{
"word": "tags",
"start_time": 137.8,
"end_time": 138.1,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 138.1,
"end_time": 138.2,
"speaker_tag": 0
},
{
"word": "returns",
"start_time": 138.2,
"end_time": 138.7,
"speaker_tag": 0
},
{
"word": "its",
"start_time": 138.7,
"end_time": 138.9,
"speaker_tag": 0
},
{
"word": "fashion.",
"start_time": 138.9,
"end_time": 139.4,
"speaker_tag": 0
},
{
"word": "So",
"start_time": 139.5,
"end_time": 139.6,
"speaker_tag": 0
},
{ "word": "I", "start_time": 139.6, "end_time": 139.7, "speaker_tag": 0 },
{
"word": "sorted",
"start_time": 139.7,
"end_time": 140.0,
"speaker_tag": 0
},
{
"word": "through",
"start_time": 140.0,
"end_time": 140.2,
"speaker_tag": 0
},
{
"word": "all",
"start_time": 140.2,
"end_time": 140.3,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 140.3,
"end_time": 140.4,
"speaker_tag": 0
},
{
"word": "those",
"start_time": 140.4,
"end_time": 140.7,
"speaker_tag": 0
},
{
"word": "pictures",
"start_time": 140.7,
"end_time": 141.1,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 141.1,
"end_time": 141.2,
"speaker_tag": 0
},
{
"word": "only",
"start_time": 141.2,
"end_time": 141.4,
"speaker_tag": 0
},
{
"word": "kept",
"start_time": 141.4,
"end_time": 141.7,
"speaker_tag": 0
},
{
"word": "ones",
"start_time": 141.7,
"end_time": 142.0,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 142.0,
"end_time": 142.2,
"speaker_tag": 0
},
{
"word": "had",
"start_time": 142.2,
"end_time": 142.3,
"speaker_tag": 0
},
{ "word": "a", "start_time": 142.3, "end_time": 142.3, "speaker_tag": 0 },
{
"word": "person",
"start_time": 142.3,
"end_time": 142.6,
"speaker_tag": 0
},
{
"word": "in",
"start_time": 142.6,
"end_time": 142.7,
"speaker_tag": 0
},
{
"word": "them",
"start_time": 142.7,
"end_time": 142.9,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 142.9,
"end_time": 143.0,
"speaker_tag": 0
},
{
"word": "there",
"start_time": 143.0,
"end_time": 143.1,
"speaker_tag": 0
},
{
"word": "were",
"start_time": 143.1,
"end_time": 143.3,
"speaker_tag": 0
},
{
"word": "also",
"start_time": 143.3,
"end_time": 143.6,
"speaker_tag": 0
},
{
"word": "labeled",
"start_time": 143.6,
"end_time": 144.0,
"speaker_tag": 0
},
{
"word": "fashion",
"start_time": 144.0,
"end_time": 144.6,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 144.8,
"end_time": 145.0,
"speaker_tag": 0
},
{
"word": "with",
"start_time": 145.0,
"end_time": 145.0,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 145.0,
"end_time": 145.3,
"speaker_tag": 0
},
{ "word": "I", "start_time": 145.3, "end_time": 145.4, "speaker_tag": 0 },
{
"word": "had",
"start_time": 145.4,
"end_time": 145.7,
"speaker_tag": 0
},
{
"word": "my",
"start_time": 145.7,
"end_time": 146.0,
"speaker_tag": 0
},
{
"word": "inspiration",
"start_time": 146.0,
"end_time": 146.6,
"speaker_tag": 0
},
{
"word": "image",
"start_time": 146.6,
"end_time": 146.8,
"speaker_tag": 0
}
]
},
{
"transcript": " Each data set now. The next step is to implement the feature that looks in my closet and the inspiration picture and matches them for that. I'm going to use yet another feature of the Google Vision API called Product search. If you ever scrolling through a store and you're looking at a product and you see similar items, that's the functionality that the Google Vision product search feature gives you so the next step is to create a product set for this you'll want to use the pi Vision product search Library. The first step is to create a product search client that connects to the Google Cloud back-end and",
"words": [
{
"word": "Each",
"start_time": 146.9,
"end_time": 146.9,
"speaker_tag": 0
},
{
"word": "data",
"start_time": 146.9,
"end_time": 147.2,
"speaker_tag": 0
},
{
"word": "set",
"start_time": 147.2,
"end_time": 147.6,
"speaker_tag": 0
},
{
"word": "now.",
"start_time": 147.9,
"end_time": 148.1,
"speaker_tag": 0
},
{
"word": "The",
"start_time": 148.1,
"end_time": 148.3,
"speaker_tag": 0
},
{
"word": "next",
"start_time": 148.3,
"end_time": 148.5,
"speaker_tag": 0
},
{
"word": "step",
"start_time": 148.5,
"end_time": 148.7,
"speaker_tag": 0
},
{
"word": "is",
"start_time": 148.7,
"end_time": 148.8,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 148.8,
"end_time": 148.9,
"speaker_tag": 0
},
{
"word": "implement",
"start_time": 148.9,
"end_time": 149.2,
"speaker_tag": 0
},
{
"word": "the",
"start_time": 149.2,
"end_time": 149.3,
"speaker_tag": 0
},
{
"word": "feature",
"start_time": 149.3,
"end_time": 149.9,
"speaker_tag": 0
},
{
"word": "that",
"start_time": 149.9,
"end_time": 150.2,
"speaker_tag": 0
},
{
"word": "looks",
"start_time": 150.2,
"end_time": 150.4,
"speaker_tag": 0
},
{
"word": "in",
"start_time": 150.4,
"end_time": 150.5,
"speaker_tag": 0
},
{
"word": "my",
"start_time": 150.5,
"end_time": 150.7,
"speaker_tag": 0
},
{
"word": "closet",
"start_time": 150.7,
"end_time": 151.1,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 151.1,
"end_time": 151.3,
"speaker_tag": 0
},
{
"word": "the",
"start_time": 151.3,
"end_time": 151.4,
"speaker_tag": 0
},
{
"word": "inspiration",
"start_time": 151.4,
"end_time": 151.9,
"speaker_tag": 0
},
{
"word": "picture",
"start_time": 151.9,
"end_time": 152.3,
"speaker_tag": 0
},
{
"word": "and",
"start_time": 152.3,
"end_time": 152.6,
"speaker_tag": 0
},
{
"word": "matches",
"start_time": 152.6,
"end_time": 152.9,
"speaker_tag": 0
},
{
"word": "them",
"start_time": 152.9,
"end_time": 153.3,
"speaker_tag": 0
},
{
"word": "for",
"start_time": 153.9,
"end_time": 154.0,
"speaker_tag": 0
},
{
"word": "that.",
"start_time": 154.0,
"end_time": 154.3,
"speaker_tag": 0
},
{
"word": "I'm",
"start_time": 154.3,
"end_time": 154.4,
"speaker_tag": 0
},
{
"word": "going",
"start_time": 154.4,
"end_time": 154.5,
"speaker_tag": 0
},
{
"word": "to",
"start_time": 154.5,
"end_time": 154.6,
"speaker_tag": 0
},
{
"word": "use",
"start_time": 154.6,
"end_time": 154.7,
"speaker_tag": 0
},
{
"word": "yet",
"start_time": 154.7,
"end_time": 154.9,
"speaker_tag": 0
},
{
"word": "another",
"start_time": 154.9,
"end_time": 155.3,
"speaker_tag": 0
},
{
"word": "feature",
"start_time": 155.3,
"end_time": 155.6,
"speaker_tag": 0
},
{
"word": "of",
"start_time": 155.6,
"end_time": 155.7,
"speaker_tag": 0
},
{
"word": "the",
"start_time": 155.7,
"end_time": 155.8,
"speaker_tag": 0
},
{
"word": "Google",
"start_time": 155.8,
"end_time": 156.1,
"speaker_tag": 0
},
{
"word": "Vision",
"start_time": 156.1,
"end_time": 156.4,
"speaker_tag": 0
},
{
"word": "API",
"start_time": 156.4,
"end_time": 156.8,
"speaker_tag": 0
},
{
"word": "called",
"start_time": 156.8,
"end_time": 157.3,
"speaker_tag": 0
},
{
"word": "Product",
"start_time": 157.3,
"end_time": 157.7,
"speaker_tag": 0
},
{
"word": "search.",
"start_time": 157.7,
"end_time": 158.1,
"speaker_tag": 0
},
{
"word": "If",
"start_time": 158.1,
"end_time": 158.2,
"speaker_tag": 0
},
{
"word": "you",
"start_time": 158.2,
"end_time": 158.3,
"speaker_tag": 0