import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

import DefaultLayout from "/home/runner/work/myedibleenso.github.io/myedibleenso.github.io/src/components/BasicLayout.js";
import { HTMLTable } from '@blueprintjs/core';
export const _frontmatter = {};
const layoutProps = {
  _frontmatter
};
const MDXLayout = DefaultLayout;
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">



    <h1 {...{
      "id": "overview",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#overview",
        "aria-label": "overview permalink",
        "className": "md-header before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "height": "20",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "20"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Overview`}</h1>
    <p><undefined parentName="p">{`In this unit, we'll look at transformer architectures and attention mechanisms.  `}<span {...{
          "role": "img",
          "aria-label": "eyes"
        }}>{`👀`}</span>{` `}</undefined></p>
    <h1 {...{
      "id": "outcomes",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#outcomes",
        "aria-label": "outcomes permalink",
        "className": "md-header before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "height": "20",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "20"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Outcomes`}</h1>
    <p>{`By the end of this unit, you'll be able to ...`}</p>
    <ul>
      <li parentName="ul"><undefined parentName="li">{`distinguish between transformers `}<span {...{
            "id": "citation-0",
            "data-hover": ""
          }}><span parentName="span" {...{
              "className": "citation-number"
            }}>{`[?]`}</span></span>{` and BERT `}<span {...{
            "id": "citation-0",
            "data-hover": ""
          }}><span parentName="span" {...{
              "className": "citation-number"
            }}>{`[?]`}</span></span></undefined></li>
      <li parentName="ul">{`describe the structure of BERT`}</li>
      <li parentName="ul">{`describe the pretraining tasks used for BERT`}</li>
    </ul>
    <h1 {...{
      "id": "resources",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#resources",
        "aria-label": "resources permalink",
        "className": "md-header before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "height": "20",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "20"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Resources`}</h1>
    <HTMLTable condensed striped mdxType="HTMLTable">
  <tbody>
    <tr>
        <td>
          <p>Readings</p>
        </td>
        <td>
          <p className="table-break">Self-attention mechanisms</p> 
          <p>Tranformers (first academic paper)</p>
          <p className="table-break">The Illustrated Transformer</p>
          <p>BERT (first academic paper)</p>
          <p>Bidirectional Transformer Encoders</p>
        </td>
        <td>
          <p className="partial-row">
            <li><a href="https://public.parsertongue.com/readings/slp3/9-dl.pdf#page=17">Section 9.7 of Jurafsky and Martin's <i>Speech and Language Processing</i></a></li>
          </p>
          <p className="partial-row">
            <li><a href="https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf">Attention is All you Need (Vaswani et al., 2017)</a></li>
          </p>
          <p className="partial-row">
            <li><a href="http://jalammar.github.io/illustrated-transformer/">Animated walkthrough of the transformer from Jay Alammar</a></li>
          </p>
          <p className="partial-row">
            <li><a href="https://aclanthology.org/N19-1423/">BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding (Devlin et al., 2018)</a></li>
          </p>
          <p className="partial-row">
            <li><a href="https://public.parsertongue.com/readings/slp3/11.pdf">Chapter 11 of Jurafsky and Martin's <i>Speech and Language Processing</i></a></li>
          </p>
        </td>
    </tr>
    <tr>
        <td>
          <p>PyTorch</p>
        </td>
        <td>
          <p>The Annotated Transformer</p>
        </td>
        <td>
          <p>
            <li><a href="http://nlp.seas.harvard.edu/annotated-transformer/">Austin Huang, Suraj Subramanian, Jonathan Sum, Khalid Almubarak, and Stella Athena's walkthrough and PyTorch implementation of the transformer architecture (based on an earlier version by Alexander Rush, Vincent Nguyen, and Guillaume Klein)</a></li>
          </p>
        </td>
    </tr>
  </tbody>
    </HTMLTable>
    {
      /* <tr>
           <td>
             <p><code>einops</code></p>
           </td>
           <td>
           </td>
           <td>
             <p>
               <a href="https://github.com/arogozhnikov/einops">Library supporting Einstein notation to define and describe resizing operations</a>
             </p>
           </td>
         </tr>
      */
    }
    <h1 {...{
      "id": "learn",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#learn",
        "aria-label": "learn permalink",
        "className": "md-header before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "height": "20",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "20"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Learn`}</h1>
    <ul>
      <li parentName="ul"><a parentName="li" {...{
          "href": "/tutorials/transformers-for-nlp"
        }}>{`Transformers for NLP`}</a></li>
    </ul>
    <h1 {...{
      "id": "practice",
      "style": {
        "position": "relative"
      }
    }}><a parentName="h1" {...{
        "href": "#practice",
        "aria-label": "practice permalink",
        "className": "md-header before"
      }}><svg parentName="a" {...{
          "aria-hidden": "true",
          "height": "20",
          "version": "1.1",
          "viewBox": "0 0 16 16",
          "width": "20"
        }}><path parentName="svg" {...{
            "fillRule": "evenodd",
            "d": "M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
          }}></path></svg></a>{`Practice`}</h1>
    <p>{`Try to put these ideas into practice for your shared task and class competition submissions.`}</p>
    {
      /* - [Review what you've learned](https://arizona.openclass.ai) */
    }
    {
      /* # References */
    }
    <p>{`@@bibliography@@
@inproceedings{Vaswani2017AttentionIA,
title={Attention is All you Need},
author={Ashish Vaswani and Noam M. Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
booktitle={NIPS},
year={2017}
}
@inproceedings{devlin-etal-2019-bert,
title = "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding",
author = {Devlin, Jacob  and Chang, Ming-Wei  and  Lee, Kenton  and Toutanova, Kristina},
booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2019",
publisher = "Association for Computational Linguistics",
doi = "10.18653/v1/N19-1423",
pages = "4171-4186",
}
@misc{1810.04805,
author = {Jacob Devlin and Ming-Wei Chang and Kenton Lee and Kristina Toutanova},
title = {BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
year = {2018},
Eprint = {arXiv:1810.04805},
}
@Book{goldberg2017neural,
author = {Goldberg, Yoav},
title = {Neural Network Methods for Deep Learning},
year = {2017},
publisher = {Morgan & Claypool Publishers},
address = {San Rafael, California},
isbn = {1627052984},
url = {ebookcentral.proquest.com/lib/uaz/reader.action?docID=4843762},
}
@misc{ruder2017deeplearningnlp,
author = {Ruder, Sebastian},
title = {Deep Learning for NLP Best Practices},
year = {2017},
howpublished = {\\url{`}<a parentName="p" {...{
        "href": "http://www.ruder.io/deep-learning-nlp-best-practices/%7D%7D",
        "target": "_self",
        "rel": "nofollow"
      }}>{`www.ruder.io/deep-learning-nlp-best-practices/}}`}</a>{`,
url = {`}<a parentName="p" {...{
        "href": "http://www.ruder.io/deep-learning-nlp-best-practices/%7D",
        "target": "_self",
        "rel": "nofollow"
      }}>{`www.ruder.io/deep-learning-nlp-best-practices/}`}</a>{`,
}
@@bibliography@@`}</p>
    {
      /* 
      Group: ling-582
      */
    }

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      