tanbro/emoji-data

View on GitHub
docs/notebooks/example.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Class `EmojiSequence` is most useful. To use it:"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Usages"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import `emoji_data`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from emoji_data import EmojiSequence"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Print Emojis\n",
    "\n",
    "Print first 50 emojis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<EmojiSequence code_points='0023 FE0E' string='#︎' version='E1.1' description='number sign (text style)'>\n",
      "<EmojiSequence code_points='0023 FE0F' string='#️' version='E1.1' description='number sign (emoji style)'>\n",
      "<EmojiSequence code_points='002A FE0E' string='*︎' version='E1.1' description='asterisk (text style)'>\n",
      "<EmojiSequence code_points='002A FE0F' string='*️' version='E1.1' description='asterisk (emoji style)'>\n",
      "<EmojiSequence code_points='0030 FE0E' string='0︎' version='E1.1' description='digit zero (text style)'>\n",
      "<EmojiSequence code_points='0030 FE0F' string='0️' version='E1.1' description='digit zero (emoji style)'>\n",
      "<EmojiSequence code_points='0031 FE0E' string='1︎' version='E1.1' description='digit one (text style)'>\n",
      "<EmojiSequence code_points='0031 FE0F' string='1️' version='E1.1' description='digit one (emoji style)'>\n",
      "<EmojiSequence code_points='0032 FE0E' string='2︎' version='E1.1' description='digit two (text style)'>\n",
      "<EmojiSequence code_points='0032 FE0F' string='2️' version='E1.1' description='digit two (emoji style)'>\n",
      "<EmojiSequence code_points='0033 FE0E' string='3︎' version='E1.1' description='digit three (text style)'>\n",
      "<EmojiSequence code_points='0033 FE0F' string='3️' version='E1.1' description='digit three (emoji style)'>\n",
      "<EmojiSequence code_points='0034 FE0E' string='4︎' version='E1.1' description='digit four (text style)'>\n",
      "<EmojiSequence code_points='0034 FE0F' string='4️' version='E1.1' description='digit four (emoji style)'>\n",
      "<EmojiSequence code_points='0035 FE0E' string='5︎' version='E1.1' description='digit five (text style)'>\n",
      "<EmojiSequence code_points='0035 FE0F' string='5️' version='E1.1' description='digit five (emoji style)'>\n",
      "<EmojiSequence code_points='0036 FE0E' string='6︎' version='E1.1' description='digit six (text style)'>\n",
      "<EmojiSequence code_points='0036 FE0F' string='6️' version='E1.1' description='digit six (emoji style)'>\n",
      "<EmojiSequence code_points='0037 FE0E' string='7︎' version='E1.1' description='digit seven (text style)'>\n",
      "<EmojiSequence code_points='0037 FE0F' string='7️' version='E1.1' description='digit seven (emoji style)'>\n",
      "<EmojiSequence code_points='0038 FE0E' string='8︎' version='E1.1' description='digit eight (text style)'>\n",
      "<EmojiSequence code_points='0038 FE0F' string='8️' version='E1.1' description='digit eight (emoji style)'>\n",
      "<EmojiSequence code_points='0039 FE0E' string='9︎' version='E1.1' description='digit nine (text style)'>\n",
      "<EmojiSequence code_points='0039 FE0F' string='9️' version='E1.1' description='digit nine (emoji style)'>\n",
      "<EmojiSequence code_points='00A9 FE0E' string='©︎' version='E1.1' description='copyright sign (text style)'>\n",
      "<EmojiSequence code_points='00A9 FE0F' string='©️' version='E0.6' description='copyright'>\n",
      "<EmojiSequence code_points='00AE FE0E' string='®︎' version='E1.1' description='registered sign (text style)'>\n",
      "<EmojiSequence code_points='00AE FE0F' string='®️' version='E0.6' description='registered'>\n",
      "<EmojiSequence code_points='203C FE0E' string='‼︎' version='E1.1' description='double exclamation mark (text style)'>\n",
      "<EmojiSequence code_points='203C FE0F' string='‼️' version='E0.6' description='double exclamation mark'>\n",
      "<EmojiSequence code_points='2049 FE0E' string='⁉︎' version='E3.0' description='exclamation question mark (text style)'>\n",
      "<EmojiSequence code_points='2049 FE0F' string='⁉️' version='E0.6' description='exclamation question mark'>\n",
      "<EmojiSequence code_points='2122 FE0E' string='™︎' version='E1.1' description='trade mark sign (text style)'>\n",
      "<EmojiSequence code_points='2122 FE0F' string='™️' version='E0.6' description='trade mark'>\n",
      "<EmojiSequence code_points='2139 FE0E' string='ℹ︎' version='E3.0' description='information source (text style)'>\n",
      "<EmojiSequence code_points='2139 FE0F' string='ℹ️' version='E0.6' description='information'>\n",
      "<EmojiSequence code_points='2194 FE0E' string='↔︎' version='E1.1' description='left right arrow (text style)'>\n",
      "<EmojiSequence code_points='2194 FE0F' string='↔️' version='E0.6' description='left-right arrow'>\n",
      "<EmojiSequence code_points='2195 FE0E' string='↕︎' version='E1.1' description='up down arrow (text style)'>\n",
      "<EmojiSequence code_points='2195 FE0F' string='↕️' version='E0.6' description='up-down arrow'>\n",
      "<EmojiSequence code_points='2196 FE0E' string='↖︎' version='E1.1' description='north west arrow (text style)'>\n",
      "<EmojiSequence code_points='2196 FE0F' string='↖️' version='E0.6' description='up-left arrow'>\n",
      "<EmojiSequence code_points='2197 FE0E' string='↗︎' version='E1.1' description='north east arrow (text style)'>\n",
      "<EmojiSequence code_points='2197 FE0F' string='↗️' version='E0.6' description='up-right arrow'>\n",
      "<EmojiSequence code_points='2198 FE0E' string='↘︎' version='E1.1' description='south east arrow (text style)'>\n",
      "<EmojiSequence code_points='2198 FE0F' string='↘️' version='E0.6' description='down-right arrow'>\n",
      "<EmojiSequence code_points='2199 FE0E' string='↙︎' version='E1.1' description='south west arrow (text style)'>\n",
      "<EmojiSequence code_points='2199 FE0F' string='↙️' version='E0.6' description='down-left arrow'>\n",
      "<EmojiSequence code_points='21A9 FE0E' string='↩︎' version='E1.1' description='leftwards arrow with hook (text style)'>\n",
      "<EmojiSequence code_points='21A9 FE0F' string='↩️' version='E0.6' description='right arrow curving left'>\n",
      "<EmojiSequence code_points='21AA FE0E' string='↪︎' version='E1.1' description='rightwards arrow with hook (text style)'>\n",
      "<EmojiSequence code_points='21AA FE0F' string='↪️' version='E0.6' description='left arrow curving right'>\n",
      "<EmojiSequence code_points='231A FE0E' string='⌚︎' version='E1.1' description='watch (text style)'>\n",
      "<EmojiSequence code_points='231A FE0F' string='⌚️' version='E1.1' description='watch (emoji style)'>\n",
      "<EmojiSequence code_points='231B FE0E' string='⌛︎' version='E1.1' description='hourglass (text style)'>\n",
      "<EmojiSequence code_points='231B FE0F' string='⌛️' version='E1.1' description='hourglass (emoji style)'>\n",
      "<EmojiSequence code_points='2328 FE0E' string='⌨︎' version='E1.1' description='keyboard (text style)'>\n",
      "<EmojiSequence code_points='2328 FE0F' string='⌨️' version='E1.0' description='keyboard'>\n",
      "<EmojiSequence code_points='23CF FE0E' string='⏏︎' version='E4.0' description='eject symbol (text style)'>\n",
      "<EmojiSequence code_points='23CF FE0F' string='⏏️' version='E1.0' description='eject button'>\n",
      "<EmojiSequence code_points='23E9 FE0E' string='⏩︎' version='E6.0' description='black right-pointing double triangle (text style)'>\n",
      "<EmojiSequence code_points='23E9 FE0F' string='⏩️' version='E6.0' description='black right-pointing double triangle (emoji style)'>\n",
      "<EmojiSequence code_points='23EA FE0E' string='⏪︎' version='E6.0' description='black left-pointing double triangle (text style)'>\n",
      "<EmojiSequence code_points='23EA FE0F' string='⏪️' version='E6.0' description='black left-pointing double triangle (emoji style)'>\n",
      "<EmojiSequence code_points='23EB FE0E' string='⏫︎' version='E6.0' description='black up-pointing double triangle (text style)'>\n",
      "<EmojiSequence code_points='23EB FE0F' string='⏫️' version='E6.0' description='black up-pointing double triangle (emoji style)'>\n",
      "<EmojiSequence code_points='23EC FE0E' string='⏬︎' version='E6.0' description='black down-pointing double triangle (text style)'>\n",
      "<EmojiSequence code_points='23EC FE0F' string='⏬️' version='E6.0' description='black down-pointing double triangle (emoji style)'>\n",
      "<EmojiSequence code_points='23ED FE0E' string='⏭︎' version='E6.0' description='black right-pointing double triangle with vertical bar (text style)'>\n",
      "<EmojiSequence code_points='23ED FE0F' string='⏭️' version='E0.7' description='next track button'>\n",
      "<EmojiSequence code_points='23EE FE0E' string='⏮︎' version='E6.0' description='black left-pointing double triangle with vertical bar (text style)'>\n",
      "<EmojiSequence code_points='23EE FE0F' string='⏮️' version='E0.7' description='last track button'>\n",
      "<EmojiSequence code_points='23EF FE0E' string='⏯︎' version='E6.0' description='black right-pointing triangle with double vertical bar (text style)'>\n",
      "<EmojiSequence code_points='23EF FE0F' string='⏯️' version='E1.0' description='play or pause button'>\n",
      "<EmojiSequence code_points='23F0 FE0E' string='⏰︎' version='E6.0' description='alarm clock (text style)'>\n",
      "<EmojiSequence code_points='23F0 FE0F' string='⏰️' version='E6.0' description='alarm clock (emoji style)'>\n",
      "<EmojiSequence code_points='23F1 FE0E' string='⏱︎' version='E6.0' description='stopwatch (text style)'>\n",
      "<EmojiSequence code_points='23F1 FE0F' string='⏱️' version='E1.0' description='stopwatch'>\n",
      "<EmojiSequence code_points='23F2 FE0E' string='⏲︎' version='E6.0' description='timer clock (text style)'>\n",
      "<EmojiSequence code_points='23F2 FE0F' string='⏲️' version='E1.0' description='timer clock'>\n",
      "<EmojiSequence code_points='23F3 FE0E' string='⏳︎' version='E6.0' description='hourglass with flowing sand (text style)'>\n",
      "<EmojiSequence code_points='23F3 FE0F' string='⏳️' version='E6.0' description='hourglass with flowing sand (emoji style)'>\n",
      "<EmojiSequence code_points='23F8 FE0E' string='⏸︎' version='E7.0' description='double vertical bar (text style)'>\n",
      "<EmojiSequence code_points='23F8 FE0F' string='⏸️' version='E0.7' description='pause button'>\n",
      "<EmojiSequence code_points='23F9 FE0E' string='⏹︎' version='E7.0' description='black square for stop (text style)'>\n",
      "<EmojiSequence code_points='23F9 FE0F' string='⏹️' version='E0.7' description='stop button'>\n",
      "<EmojiSequence code_points='23FA FE0E' string='⏺︎' version='E7.0' description='black circle for record (text style)'>\n",
      "<EmojiSequence code_points='23FA FE0F' string='⏺️' version='E0.7' description='record button'>\n",
      "<EmojiSequence code_points='24C2 FE0E' string='Ⓜ︎' version='E1.1' description='circled latin capital letter m (text style)'>\n",
      "<EmojiSequence code_points='24C2 FE0F' string='Ⓜ️' version='E0.6' description='circled M'>\n",
      "<EmojiSequence code_points='25AA FE0E' string='▪︎' version='E1.1' description='black small square (text style)'>\n",
      "<EmojiSequence code_points='25AA FE0F' string='▪️' version='E0.6' description='black small square'>\n",
      "<EmojiSequence code_points='25AB FE0E' string='▫︎' version='E1.1' description='white small square (text style)'>\n",
      "<EmojiSequence code_points='25AB FE0F' string='▫️' version='E0.6' description='white small square'>\n",
      "<EmojiSequence code_points='25B6 FE0E' string='▶︎' version='E1.1' description='black right-pointing triangle (text style)'>\n",
      "<EmojiSequence code_points='25B6 FE0F' string='▶️' version='E0.6' description='play button'>\n",
      "<EmojiSequence code_points='25C0 FE0E' string='◀︎' version='E1.1' description='black left-pointing triangle (text style)'>\n",
      "<EmojiSequence code_points='25C0 FE0F' string='◀️' version='E0.6' description='reverse button'>\n",
      "<EmojiSequence code_points='25FB FE0E' string='◻︎' version='E3.2' description='white medium square (text style)'>\n",
      "<EmojiSequence code_points='25FB FE0F' string='◻️' version='E0.6' description='white medium square'>\n"
     ]
    }
   ],
   "source": [
    "for es, _ in zip(EmojiSequence.values(), range(100)):\n",
    "    print(repr(es))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check if hex list represents an EmojiSequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1F6A3 is EmojiSequence 🚣\n",
      "1F468 1F3FC 200D F68F is NOT EmojiSequence!\n",
      "1F468 1F3FB 200D 2708 FE0F is EmojiSequence 👨🏻‍✈️\n",
      "023A is NOT EmojiSequence!\n",
      "1F469 200D 1F52C is EmojiSequence 👩‍🔬\n",
      "1F468 200D 1F468 200D 1F467 200D 1F467 is EmojiSequence 👨‍👨‍👧‍👧\n",
      "1F441 FE0F 200D 1F5E8 FE0E is NOT EmojiSequence!\n"
     ]
    }
   ],
   "source": [
    "emojis_data = [\n",
    "    \"1F6A3\",\n",
    "    \"1F468 1F3FC 200D F68F\",\n",
    "    \"1F468 1F3FB 200D 2708 FE0F\",\n",
    "    \"023A\",\n",
    "    \"1F469 200D 1F52C\",\n",
    "    \"1F468 200D 1F468 200D 1F467 200D 1F467\",\n",
    "    \"1F441 FE0F 200D 1F5E8 FE0E\",\n",
    "]\n",
    "\n",
    "for hex_data in emojis_data:\n",
    "    try:\n",
    "        es = EmojiSequence.from_hex(hex_data)\n",
    "    except KeyError:\n",
    "        print(\"{} is NOT EmojiSequence!\".format(hex_data))\n",
    "    else:\n",
    "        print(\"{} is EmojiSequence {}\".format(hex_data, es.string))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check if a string is EmojiSequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "False\n",
      "True\n",
      "False\n"
     ]
    }
   ],
   "source": [
    "print(\"👨\" in EmojiSequence)\n",
    "print(\"©\" in EmojiSequence)  # 00AE, unqualified\n",
    "print(\"5️⃣\" in EmojiSequence)\n",
    "print(\"9⃣\" in EmojiSequence)  # 0039 20E3, unqualified"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Search EmojiSequence inside texts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[6:11] - 👨🏻‍⚕️ <EmojiSequence code_points='1F468 1F3FB 200D 2695 FE0F' string='👨🏻\\u200d⚕️' version='E4.0' description='man health worker: light skin tone'>\n",
      "[20:22] - 👨🏻 <EmojiSequence code_points='1F468 1F3FB' string='👨🏻' version='E1.0' description='man: light skin tone'>\n",
      "---\n",
      "[15:20] - 👨‍👨‍👧 <EmojiSequence code_points='1F468 200D 1F468 200D 1F467' string='👨\\u200d👨\\u200d👧' version='E2.0' description='family: man, man, girl'>\n",
      "[20:27] - 👨‍👨‍👧‍👧 <EmojiSequence code_points='1F468 200D 1F468 200D 1F467 200D 1F467' string='👨\\u200d👨\\u200d👧\\u200d👧' version='E2.0' description='family: man, man, girl, girl'>\n",
      "[49:50] - 🌞 <EmojiSequence code_points='1F31E' string='🌞' version='E1.0' description='full moon face..sun with face'>\n",
      "[50:52] - ⛈️ <EmojiSequence code_points='26C8 FE0F' string='⛈️' version='E0.7' description='cloud with lightning and rain'>\n",
      "[63:64] - 😀 <EmojiSequence code_points='1F600' string='😀' version='E1.0' description='grinning face'>\n",
      "---\n",
      "[59:61] - ©️ <EmojiSequence code_points='00A9 FE0F' string='©️' version='E0.6' description='copyright'>\n",
      "---\n",
      "---\n",
      "---\n"
     ]
    }
   ],
   "source": [
    "strings = [\n",
    "    \"First:👨🏻‍⚕️. Second:👨🏻.\",\n",
    "    \"The two emojis 👨‍👨‍👧👨‍👨‍👧‍👧 are long. Today is a 🌞⛈️ day, I am 😀.\",\n",
    "    \"© 00AE is unqualified, the full-qualified one is 00A9 FE0F ©️\",\n",
    "    \"9⃣ 0039 20E3 is also unqualified, it will not be matched!\",\n",
    "    \"and no more emoji.\",\n",
    "]\n",
    "\n",
    "for s in strings:\n",
    "    for es, begin, end in EmojiSequence.find(s):\n",
    "        print(f\"[{begin}:{end}] - {es} {es!r}\")\n",
    "    print(\"---\")"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "e144dc637bf9ec72e07dec98e4c470d8eef797ad693ee1854340bc4ce2529e51"
  },
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit ('venv': venv)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}