

import Resume from "src/components/Resume"
import profileImage from "src/components/ghp-photo"

function MyResume() {
  return (
    <Resume
      name="Gus Hahn-Powell"
      firstName="Gus"
      tel="+1 (520) xxx-xxxx"
      email="gus@parsertongue.org"
      address="Tucson, AZ"
      linkedInAccount="gushahnpowell"
      githubAccount="myedibleenso"
      orcidId="0000-0002-7957-3586"
      profileImage={profileImage}
      summary={
        <div className="standalone">
          <p>
            I am a computational linguist with over 10 years of experience in Natural Language Processing (NLP) and Machine Learning (ML), 20+ peer-reviewed publications, and more than 10 years as a professional software developer responsible for designing, implementing, and deploying custom ML solutions.  
          </p>
          <p>
            {/* <h6>what I offer</h6> */}
            My expertise centers around information extraction and assembling knowledge graphs from unstructured text. I believe in choosing the right tool for the job and pride myself on identifying simple and effective solutions.  
            {/* While I favor approaches that are both explainable and directly editable by humans,  */}
          </p>
          <p>
            {/* <h6>evidence</h6> */}
            I have a track record of multi-disciplinary collaborative research as a tenure-track professor in academia funded by agencies such as DARPA, NSF, and the CDC. I also bring years of applied research experience in industry as both a co-founder of a bootstrapped startup and an individual contributor at a large corporation developing new IP and production quality code in languages like Python and Scala.
          </p>


          {/* I am a computational linguist and NLP researcher.

My research centers around machine reading for scientific discovery. I build and design systems to help researchers surmount the problem of information overload by scouring the vast body of scientific literature, analyzing findings, and synthesizing discoveries to generate novel hypotheses.

My work has been funded by agencies such as DARPA, NSF, the Bill and Melinda Gates Foundation, and the CDC. */}
        </div>
      }
      profession="Computational Linguist"
      skills={[
        { tag: "Python" },
        { tag: "Scala" },
        { tag: "Natural Language Processing (NLP)" },
        {tag: "Machine Learning (ML)"},
        {tag: "Deep Learning (DL)"},
        {tag: "Neuro-symbolic AI"},
        { tag: "Apache Spark" },
        { tag: "AWS (ECS, Fargate, EMR, Lambda)" },
        { tag: "Terraform" },
        { tag: "Docker" },
        // { tag: "GitHub Actions" },
        { tag: "OpenAPI" }
        // { name: "Machine Learning", level: 5 }
      ]}
      experiences={[
        {
          company: "University of Arizona",
          period: "2019-present",
          title: "Assistant Professor",
          description: (
            <div>
              <p>
                Assistant Professor (tenure-track) of Computational Linguistics at the <a href="https://linguistics.arizona.edu/peo-faculty">University of Arizona</a> and founding director of both the <a href="https://uazhlt.github.io/hlt-online/about">online MS in Human Language Technology (HLT)</a> and the <a href="https://uazhlt.github.io/nlp-cert/about">Graduate Certificate Program in Natural Language Processing (NLP)</a>.  
              </p>
              <p>
                {/* <span className="subheader">Teaching</span> */}
                <a href="/courses">I design and teach graduate-level courses in statistical natural language processing (NLP) that cover both "classical" machine learning (ML) and deep learning (DL) methods for NLP.</a>
              </p>
              <p>
                {/* <span className="subheader">Research</span> */}
                My research is funded by agencies such as DARPA, NSF, and the CDC.  I also hold appointments in the <a href="https://cogsci.arizona.edu/faculty">Cognitive Science GIDP</a> and the <a href="https://css.arizona.edu/people">Computational Social Science Graduate Certificate Program</a>. 
              </p>
            </div>
          ),
          accomplishments: [
            "Investigator that helped secure over $9M in grant-based funding from multiple federal agencies.",
            "Managed and trained remote and in-person teams of junior NLP researchers and software developers.",
            "Developed, documented, and deployed CI pipelines and NLP software for multiple federal agencies on AWS and AWS GovCloud.",
            "Within a year of being hired during the University of Arizona's largest budget cuts, I designed a graduate curriculum, oversaw the development of its courses, and launched a fully online MS program in Human Language Technology that has attracted a global body of students."
          ]
        },
        {
          company: "Amazon",
          period: "2022-2023",
          title: "Applied Scientist",
          // As an Applied Scientist with the Product Graph team within Personalization, I work on automatically discovering novel product dimensions and enriching a large-scale product knowledge graph to help customers make informed purchase decisions.
          description: (
            <div>
              <p>
                As an Applied Scientist with the <a href="https://www.amazon.science/blog/building-product-graphs-automatically">Product Graph team</a> within <a href="https://www.amazon.jobs/en/teams/personalization-and-recommendations">Personalization</a>, I work on automatically discovering novel product dimensions and enriching the Amazon Catalog to help customers make informed purchase decisions.
              </p>
              {/* <p>
                <ul>
                  <li>Answer any question about products and related knowledge in the world.</li> 
                  <li>Enable Amazon customers to make informed purchase decisions through rich product knowledge and understanding.</li>
                </ul>
              </p> */}
            </div>
          ),
          accomplishments: [
            "Designed and implemented a neuro-symbolic system to discover novel attributes and compatibility information from product profiles in support of a multi-team knowledge graph initiative.",
            "In less than one week, I designed and implemented a scalable system for weakly labeling sections of e-Commerce web pages for document layout analysis and targeted information extraction.",
            "Implemented and deployed a series of scheduled Amazon-scale ETL tasks using Apache Spark and PySpark to improve automation and prevent data drift."
          ]

        },
        {
          company: "Lum AI",
          period: "2017-present",
          title: "Co-founder and Applied Scientist",
          description: (
            <div>
              <p>
                I am co-founder of <a href="https://www.lum.ai/">Lum AI</a>, a small bootstrapped NLP startup focusing on large-scale machine reading and rapid text annotation/data labeling.  
              </p>
              <p>
                In addition to product development, I manage our AWS deployments (mostly ECS + terraform + GitHub Actions).
              </p>
            </div>
          ),
          accomplishments: [
            `Designed and implemented a resilient actor-based distributed version of the <a href="https://aclanthology.org/2020.lrec-1.267">Odinson information extraction system</a> that requires less than 30% of the resources of an Elasticsearch cluster.`,
            "Designed, implemented, and deployed horizontally-scalable containerized services for information extraction on AWS with rolling deployments triggered via changes to the default branch of a Git repository.",
            "Managed a small, distributed team of software developers and applied scientists over multiple projects."
          ]
        }
      ]}
      education={[
        {
          school: "University of Arizona",
          major: "Computational Linguistics",
          minor: "Information",
          department: "Linguistics",
          degree: "PhD",
          period: "2014-2018",
          thesis: "Machine Reading for Scientific Discovery",
          thesisLink: "https://repository.arizona.edu/handle/10150/630562",
          thesisDescription: "The aim of this work is to accelerate scientific discovery by advancing machine reading approaches designed to extract claims and assertions made in the literature, assemble these statements into cohesive models, and generate novel hypotheses that synthesize findings from isolated research communities. [...]"
        },
        {
          school: "University of Arizona",
          major: "Human Language Technology",
          degree: "MS",
          period: "2012-2014",
          thesis: "BioNER: A hybrid approach to identifying mentions of protein complexes"
        },
        {
          school: "University of Alabama",
          major: "Applied Linguistics",
          degree: "MA",
          period: "2008-2010",
          thesis: "The 'Worthy of Attention' Collostruction: Frequency, synonymy, and learnability",
          thesisDescription: "Modeling L2 synonym learning using ART-2 neural networks."
        },
        {
          school: "University of Alabama",
          major: "Japanese",
          degree: "BA",
          period: "2004-2008",
          thesis: "Language in Zen poetry",
        }
      ]}
      publications={[
      ]}
      publicationsDescription={(
        <div>
          For a complete list of my publications, please see my <a href="https://public.parsertongue.org/cv.pdf">CV</a> or <a href="https://orcid.org/0000-0002-7957-3586">ORCID profile</a>.
        </div>
      )}
      educationalProducts={[
      ]}
      teachingDescription={(
        <div>
          For a complete list of courses and workshops I've developed, please see <a href="https://parsertongue.org/courses/"><code>https://parsertongue.org/courses/</code></a>.
        </div>
      )}
      projects={[
        {
          name: "Daedalus",
          period: "2024-present",
          description: (
            <div className ="project-description">
              <p>
                Vision-based Piping & Instrumentation Diagram (P&ID) understanding system for the construction industry.
              </p>
            </div>
          ),
          keywords: [
            {tag: "computer vision"},
            {tag: "Lum AI"},
            {tag: "Docker"},
            {tag: "AWS Lambda"},
          ]
        },
        {
          name: "Odinsynth-LLM",
          period: "2023-present",
          description: (
            <div className ="project-description">
              <p>
                Interactive system that generates explainable, editable, and generalizable information extraction rules from a single example using LLMs, preference optimization, and reinforcement learning with <i>machine</i> feedback (RLMF).
              </p>
            </div>
          ),
          keywords: [
            {tag: "LLMs"},
            {tag: "neuro-symbolic AI"}
          ]
        },
        {
          name: "Odinsynth",
          period: "2022-2024",
          description: (
            <div className ="project-description">
              <p>
                A neural program synthesis approach to generating explainable and editable information extraction rules from a handful of examples.
              </p>
            </div>
          ),
          keywords: [
            {tag: "PyTorch"}, 
            {tag: "transformers"},
            // {tag: "LLMs"},
            {tag: "Python"},
            {tag: "Scala"},
            {tag: "Docker"},
            {tag: "AWS ECS"},
            {tag: "program synthesis"},
            {tag: "neuro-symbolic AI"}
          ],
          codeLink: "https://github.com/clu-ling/odinsynth",
          demoLink: "https://youtu.be/Osf4KhWlvb8",
          videoLink: "https://youtu.be/Osf4KhWlvb8",
          paperLink: "https://aclanthology.org/2022.naacl-demo.8"
        },
        {
          name: "clu-azahead",
          period: "2022-2023",
          description: (
            <div className ="project-description">
              <p>
                A toolkit to improve information access and aggregation for public health.
              </p>
            </div>
          ),
          keywords: [
            {tag: "neural search"}, 
            {tag: "question answering"}, 
            {tag: "PyTorch"}, 
            {tag: "SBERT", url: "https://arxiv.org/abs/1908.10084"}, 
            {tag: "transformers"},
            {tag: "ASR"}, 
            {tag: "whisper", url: "https://arxiv.org/abs/2212.04356"},
            {tag: "Python"},
            {tag: "FastAPI"},
            {tag: "Streamlit"},
            {tag: "Docker"},
          ],
          codeLink: "https://github.com/clu-ling/clu-azahead",
          // demoLink: "https://github.com/myedibleenso",
          // videoLink: "https://github.com/myedibleenso",
          // paperLink: "https://aclanthology.org/2020.lrec-1.267"
        },
        {
          name: "Annotaurus Tex(t)",
          period: "2021-2022",
          description: (
            <div className ="project-description">
              <p>
                A web-based platform (hosted solution) for rapid text annotation and data labeling for NLP.  
                
                {/* Launching soon with a free tier! */}
              </p>
            </div>
          ),
          keywords: [
            {tag: "annotation"}, 
            {tag: "Lum AI"},
            {tag: "information extraction"}, 
            {tag: "active learning"},
            {tag: "AWS ECS"},
            {tag: "JavaScript"},
            {tag: "PostgreSQL"}, 
            {tag: "Odinson"}, 
            {tag: "data labeling"}
          ],
          //codeLink: "https://github.com/lum-ai/odinson",
          // demoLink: "https://github.com/myedibleenso",
          // videoLink: "https://github.com/myedibleenso",
          //paperLink: "https://aclanthology.org/2020.lrec-1.267"
        },
        {
          name: "Odinson",
          period: "2019-present",
          description: (
            <div className ="project-description">
              <p>
                A fast and highly scalable language and runtime system for information extraction that supports patterns composed of graph traversals and token-level constraints. The successor to Odin. Odinson is <strong>four orders of magnitude faster</strong> that the previous state of the art.
              </p>
            </div>
          ),
          contributions: [
            `IDE design, development, and deployment 	&#40;closed source&#41;`,
            `a <a href="https://github.com/lum-ai/odinson-rest">REST API and companion Python library</a>`,
            `language features and testing`,
            `development of a distributed version for web-scale information extraction using <a href="https://akka.io/">Akka</a> &#40;development of this component was funded by DARPA's Causal Exploration program&#41;.`
          ],
          keywords: [
            {tag: "Scala"}, 
            {tag: "information extraction"}, 
            {tag: "Apache Lucene"}, 
            {tag: "Akka"}, 
            {tag: "actor-based concurrency"}
          ],
          codeLink: "https://github.com/lum-ai/odinson",
          // demoLink: "https://github.com/myedibleenso",
          // videoLink: "https://github.com/myedibleenso",
          paperLink: "https://aclanthology.org/2020.lrec-1.267"
        },
        {
          name: "Influence Search",
          period: "2017-2019",
          description: (
            <div className ="project-description">
              <p>
                A platform for literature-based discovery that incorporates multi-domain extractions of causal interactions into a single searchable knowledge graph. Originally developed to <a href="https://www.kiglobalhealth.org/data-contributors/">support the Bill and Melinda Gates Foundation's efforts to improve child and maternal health</a>.  Create conceptual models (interest maps) by searching for direct and indirect influence relations, merging concepts, injecting your own expertise, and collaboratively editing models.
              </p>
            </div>
          ),
          contributions: [
            `system architecture and deployment (AWS)`,
            `open domain machine reader and assembly system`,
            `incorporation and alignment of citation graph (<a href="https://www.microsoft.com/en-us/research/project/microsoft-academic-graph/">MAG</a>) information and clinical trials &#40;this component was funded by the Bill and Melinda Gates Foundation as part of their KI Platform Prototype&#41;`
          ],
          keywords: [
            {tag: "Scala"}, 
            {tag: "JavaScript"}, 
            {tag: "Neo4j"}, 
            {tag: "Apache Spark"}, 
            {tag: "knowledge graphs"},
            {tag: "operational transformation"}
          ],
          //codeLink: "https://github.com/lum-ai/influence-search",
          demoLink: "https://influence.demos.lum.ai",
          videoLink: "https://youtu.be/k5dcMegrRJc",
          paperLink: "https://aclanthology.org/P17-4018/"
        },
        {
          name: "Reach",
          period: "2014-2018",
          description: (
            <div>
              <p>
                Information extraction system for BioNLP that includes components for event extraction, NER, domain-specific coreference resolution, causal event ordering, and grounding.  Reach was the most precise and highest throughput machine reading system in DARPA's Big Mechanism program, and has been <a href="https://doi.org/10.1093/database/bay098">used by biologists to discover novel and plausible biological hypotheses for multiple cancers</a>. 
              </p>
            </div>
          ),
          contributions: [
            `<a href="https://doi.org/10.1093/database/bay098">broad coverage and extensible information extraction of biomolecular statements described in scholarly documents</a>. These statements often describe complex nested relations &#40;e.g., a positive regulation involving a particular post-translational modification&#41;`,
            `<a href="https://aclanthology.org/W16-2920/">assembly and causal ordering of model fragments of cell signaling pathways</a>`,
            `<a href="https://aclanthology.org/L16-1027">coreference resolution tailored to the biomedical domain</a> &#40;"how can we automatically determine the antecedent of an expression like the protein?"&#41;`
          ],
          keywords: [
            {tag: "BioNLP"}, 
            {tag: "Scala"}, 
            {tag: "Python"}, 
            {tag: "information extraction"}, 
            {tag: "deduplication"},
            {tag: "causal ordering"}
          ],
          codeLink: "https://github.com/clulab/reach",
          // demoLink: "https://github.com/myedibleenso",
          // videoLink: "https://github.com/myedibleenso",
          paperLink: "https://doi.org/10.1093/database/bay098"
        }
      ]}
      grants={[
        {
          name: "ADHS-CDC COVID Disparities Initiative",
          description: (
            <div className ="project-description">
              <p>
                 Address COVID-19 health disparities among underserved and high-risk populations in Arizona, including racial and ethnic minorities as well as rural communities. 
              </p>
            </div>
          ),
          contributions: [
            `personalized question-answering and semantic search systems for different audiences &#40;community health workers, patients, etc.&#41; that operate over curated document collections`,
            `automatic speech recognition &#40;ASR&#41;`,
            `monitoring trusted information sources to detect policy changes`,
            `machine translation, summarization, and customized message generation`,
            `modernizing cyberinfrastructure for health communication &#40;ex. telemedicine systems&#41;`
          ],
          agency: "CDC & Arizona Department of Health Services",
          url: "https://crh.arizona.edu/programs/covid-disparities-initiative",
          role: "Co-I",
          award: "$8M",
          period: "2022-present",
          codeLink: "https://github.com/clu-ling/clu-azahead"
        },
        {
          name: "Democratizing machine reading for non-experts",
          description: (
            <div className ="project-description">
              <p>
                <em><strong>Democratizing machine reading for non-experts: Easy and interpretable methods to extract structured information from text</strong></em>
              </p>
              <p>
                This work aims to democratize machine reading technology to make it accessible to subject matter experts (ex. molecular biologists) who may be entirely unfamiliar with natural language processing and machine learning. In an effort to hybridize symbolic and statistical approaches, my collaborators and I are leveraging neural methods for program synthesis and reinforcement learning to generate editable and executable and human-editable rules for rapid information extraction.
              </p>
            </div>
          ),
          contributions: [
            `Determining research direction`,
            `System design and implementation`,
            `Deployment of GPU-accelerated sotware demos on AWS`
          ],
          agency: "NSF",
          url: "https://mr4all.parsertongue.org/about",
          role: "Co-PI",
          awardId: "2006583",
          award: "$499K",
          period: "2020-present",
          codeLink: "https://github.com/clulab/odinsynth2",
          videoLink: "https://youtu.be/Osf4KhWlvb8",
          demoLink: "https://clu-ling.github.io/odinsynth/about"
        },
        {
          name: "Supply chain Quantification Using Imperfect Data (SQUID)",
          agency: "DARPA",
          description: (
            <div className ="project-description">
              <p>
                Improve the efficiency of the military supply chain by contructing operational process models from fragmented data.
              </p>
            </div>
          ),
          contributions: [
            `information and event extraction related to logistics processes &#40;supply chain events&#41; and event ordering`,
            `query parsing and intent understanding to power a chatbot interface for logisticians`,
            `document layout analysis for PDFs`
          ],
          url: "https://darpa.mil/program/logx",
          role: "PI (Phase I subcontract through Raytheon BBN)",
          award: "$282K",
          period: "2020",
          codeLink: "https://github.com/clu-ling/logx-reader"
        }

        // Identifying cancer driving mechanisms via machine reading While a graduate student, I served on a multi-institutional team in DARPA’s Big Mechanism program with the aim of reconstructing and extending mechanistic models of cancer biology from the scientific literature. I was one of the primary authors of the Reach machine reading system4 which cancer biologists have used to discover novel and plausible biological hypotheses for multiple cancers. In collaboration with my peers, I helped to develop a system with the following key capabilities:
        // • broad coverage and extensible information extraction of biomolecular statements described in scholarly documents. These statements often describe complex nested relations (e.g., a positive regulation involving a particular post-translational modification)
        // • assembly and causal ordering of model fragments of cell signaling pathways
        // • coreference resolution tailored to the biomedical domain (“how can we automatically determine the
        // antecedent of an expression like the protein?”)

        // Maternal and children’s health While a graduate student, I co-founded an NLP startup that licensed technology my peers and I developed at the University of Arizona. At the request of the Bill and Melinda Gates Foundation, my company created their knowledge integration platform prototype as part of their KI Initiative5. The system, Influence Search6, assembles causal statements mined from a large body of literature covering multiple scientific domains into a queryable knowledge graph that can be used to collaboratively construct models of systems and perform rapid literature reviews.
        
      ]}
      awards={[
        {
          name: "Best System Demonstration",
          organization: "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)",
          description: (
            <div>
              <p>
                Multilingual extension of Influence Search that extends the open-domain machine reader to Portuguese.
              </p>
            </div>
          ),
          demoLink: "https://influence.demos.lum.ai",
          paperLink: "https://doi.org/10.18653/v1/N19-4003",
          date: "2019",
        },
      ]}
    />
  )
}

export default MyResume;
