element-web-Github/src/emoji.js
David Baker f31cc7dfee Strip all variation selectors on emoji
...when inserting into or looking up in the unicode to emoji map.

This broke with emojibase 4.2.0 which changed the type of a whole
load of emojis to 'text' when previously they were 'emoji'. This
caused them to get the 'text' variant of the unicode string which
has the text variation selector (15) appended instead of the emoji
variation selector (16). We were only stripping the emoji selector,
so upgrading to 4.2.0 caused riot to fail to find the heart in the
unicode map, which therefore prevented the app from starting.
2020-01-08 10:51:52 +00:00

100 lines
3.2 KiB
JavaScript

/*
Copyright 2019 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import EMOJIBASE from 'emojibase-data/en/compact.json';
// The unicode is stored without the variant selector
const UNICODE_TO_EMOJI = new Map(); // not exported as gets for it are handled by getEmojiFromUnicode
export const EMOTICON_TO_EMOJI = new Map();
export const SHORTCODE_TO_EMOJI = new Map();
export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(stripVariation(unicode));
const EMOJIBASE_GROUP_ID_TO_CATEGORY = [
"people", // smileys
"people", // actually people
"control", // modifiers and such, not displayed in picker
"nature",
"foods",
"places",
"activity",
"objects",
"symbols",
"flags",
];
export const DATA_BY_CATEGORY = {
"people": [],
"nature": [],
"foods": [],
"places": [],
"activity": [],
"objects": [],
"symbols": [],
"flags": [],
};
// Store various mappings from unicode/emoticon/shortcode to the Emoji objects
EMOJIBASE.forEach(emoji => {
const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group];
if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) {
DATA_BY_CATEGORY[categoryId].push(emoji);
}
// This is used as the string to match the query against when filtering emojis
emoji.filterString = `${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}`.toLowerCase();
// Add mapping from unicode to Emoji object
// The 'unicode' field that we use in emojibase has either
// VS15 or VS16 appended to any characters that can take
// variation selectors. Which one it appends depends
// on whether emojibase considers their type to be 'text' or
// 'emoji'. We therefore strip any variation chars from strings
// both when building the map and when looking up.
UNICODE_TO_EMOJI.set(stripVariation(emoji.unicode), emoji);
if (emoji.emoticon) {
// Add mapping from emoticon to Emoji object
EMOTICON_TO_EMOJI.set(emoji.emoticon, emoji);
}
if (emoji.shortcodes) {
// Add mapping from each shortcode to Emoji object
emoji.shortcodes.forEach(shortcode => {
SHORTCODE_TO_EMOJI.set(shortcode, emoji);
});
}
});
/**
* Strips variation selectors from a string
* NB. Skin tone modifers are not variation selectors:
* this function does not touch them. (Should it?)
*
* @param {string} str string to strip
* @returns {string} stripped string
*/
function stripVariation(str) {
let ret = '';
for (let i = 0; i < str.length; ++i) {
const charCode = str.charCodeAt(i);
// append to output only if it's outside the variation selector range
if (charCode < 0xFE00 && charCode > 0xFE0F) {
ret += str.charAt(i);
}
}
return ret;
}