Skip to content

Instantly share code, notes, and snippets.

import sqlite3
import logging
from logging.handlers import RotatingFileHandler
from contextlib import closing
from pathlib import Path
import mcp.types as types
from mcp.server import NotificationOptions, Server
from mcp.server.models import InitializationOptions
import mcp.server.stdio
from pydantic import AnyUrl
{
"_id": "671412006a8ffac0814c1ace",
"arxivId": "1706.03762",
"title": "Attention Is All You Need",
"authors": [
"\\ANDAshish Vaswani\nGoogle Brain\navaswani@google.com\n&Noam Shazeer11footnotemark: 1\nGoogle Brain\nnoam@google.com\n&Niki Parmar11footnotemark: 1\nGoogle Research\nnikip@google.com\n&Jakob Uszkoreit11footnotemark: 1\nGoogle Research\nusz@google.com\n&Llion Jones11footnotemark: 1\nGoogle Research\nllion@google.com\n&Aidan N. Gomez11footnotemark: 1   \nUniversity of Toronto\naidan@cs.toronto.edu\n&Łukasz Kaiser11footnotemark: 1\nGoogle Brain\nlukaszkaiser@google.com\n&Illia Polosukhin11footnotemark: 1   \nillia.polosukhin@gmail.com"
],
"abstract": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on at
{
"_id": "67146a7d7b8107942d346db2",
"arxivId": "1706.03762",
"title": "Attention Is All You Need",
"authors": [
"\\ANDAshish Vaswani\nGoogle Brain\navaswani@google.com\n&Noam Shazeer11footnotemark: 1\nGoogle Brain\nnoam@google.com\n&Niki Parmar11footnotemark: 1\nGoogle Research\nnikip@google.com\n&Jakob Uszkoreit11footnotemark: 1\nGoogle Research\nusz@google.com\n&Llion Jones11footnotemark: 1\nGoogle Research\nllion@google.com\n&Aidan N. Gomez11footnotemark: 1   \nUniversity of Toronto\naidan@cs.toronto.edu\n&Łukasz Kaiser11footnotemark: 1\nGoogle Brain\nlukaszkaiser@google.com\n&Illia Polosukhin11footnotemark: 1   \nillia.polosukhin@gmail.com"
],
"abstract": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on at
{
"type": "doc",
"content": [
{
"type": "bulletList",
"content": [
{
"type": "listItem",
"content": [
{
[
{
"text": "Figure 1: The Transformer - model architecture.",
"sid": "S3.F1"
},
{
"text": "Figure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several attention layers running in parallel.",
"sid": "S3.F2"
},
{
{
"_id": {
"$oid": "66bf0862ee5f85fa91202703"
},
"arxivId": "1706.03762v7",
"title": "Attention Is All You Need",
"authors": [
"\\ANDAshish Vaswani\nGoogle Brain\navaswani@google.com\n&Noam Shazeer11footnotemark: 1\nGoogle Brain\nnoam@google.com\n&Niki Parmar11footnotemark: 1\nGoogle Research\nnikip@google.com\n&Jakob Uszkoreit11footnotemark: 1\nGoogle Research\nusz@google.com\n&Llion Jones11footnotemark: 1\nGoogle Research\nllion@google.com\n&Aidan N. Gomez11footnotemark: 1   \nUniversity of Toronto\naidan@cs.toronto.edu\n&Łukasz Kaiser11footnotemark: 1\nGoogle Brain\nlukaszkaiser@google.com\n&Illia Polosukhin11footnotemark: 1   \nillia.polosukhin@gmail.com"
],
"abstract": "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely o
@jimmyhzhang
jimmyhzhang / machine.js
Created September 26, 2019 18:57
Generated by XState Viz: https://xstate.js.org/viz
// Available variables:
// - Machine
// - interpret
// - assign
// - send
// - sendParent
// - spawn
// - raise
// - actions