Spaces:
Running
Running
Add slang map
Browse files- .gitattributes +3 -1
- .gitignore +2 -2
- README.md +3 -0
- app/constants.py +3 -0
- app/data.py +100 -3
- data/slang.json +229 -0
.gitattributes
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
# Hide from GitHub's language detection
|
6 |
*.yaml linguist-documentation
|
7 |
*.toml linguist-documentation
|
|
|
8 |
|
9 |
# Remove assets from github statistics
|
10 |
*.yaml linguist-vendored
|
@@ -12,10 +13,11 @@
|
|
12 |
|
13 |
# Set the language for these files to ensure GitHub doesn't show the comments as errors
|
14 |
.vscode/*.json linguist-language=JSON5
|
|
|
15 |
|
16 |
# Do not try and merge these files
|
17 |
poetry.lock -diff
|
18 |
-
*.
|
19 |
|
20 |
# LFS
|
21 |
models/** filter=lfs diff=lfs merge=lfs -text
|
|
|
5 |
# Hide from GitHub's language detection
|
6 |
*.yaml linguist-documentation
|
7 |
*.toml linguist-documentation
|
8 |
+
*.json linguist-documentation
|
9 |
|
10 |
# Remove assets from github statistics
|
11 |
*.yaml linguist-vendored
|
|
|
13 |
|
14 |
# Set the language for these files to ensure GitHub doesn't show the comments as errors
|
15 |
.vscode/*.json linguist-language=JSON5
|
16 |
+
data/* binary
|
17 |
|
18 |
# Do not try and merge these files
|
19 |
poetry.lock -diff
|
20 |
+
*.pkl -diff
|
21 |
|
22 |
# LFS
|
23 |
models/** filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -194,6 +194,6 @@ pyrightconfig.json
|
|
194 |
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python
|
195 |
|
196 |
# Custom
|
197 |
-
data
|
198 |
-
|
199 |
flagged/
|
|
|
194 |
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python
|
195 |
|
196 |
# Custom
|
197 |
+
data/*
|
198 |
+
!data/slang.json
|
199 |
flagged/
|
README.md
CHANGED
@@ -138,6 +138,9 @@ python -m app evaluate --help
|
|
138 |
| imdb50k | `data/imdb50k.csv` | | [IMDB Movie Reviews](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews) |
|
139 |
| test | `data/test.csv` | required for `evaluate` | [Multiclass Sentiment Analysis](https://huggingface.co/datasets/Sp1786/multiclass-sentiment-analysis-dataset) |
|
140 |
|
|
|
|
|
|
|
141 |
|
142 |
### Vectorizers
|
143 |
| Option | Description | When to Use |
|
|
|
138 |
| imdb50k | `data/imdb50k.csv` | | [IMDB Movie Reviews](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews) |
|
139 |
| test | `data/test.csv` | required for `evaluate` | [Multiclass Sentiment Analysis](https://huggingface.co/datasets/Sp1786/multiclass-sentiment-analysis-dataset) |
|
140 |
|
141 |
+
#### Used for text preprocessing
|
142 |
+
- [Slang Map](Https://www.kaggle.com/code/nmaguette/up-to-date-list-of-slangs-for-text-preprocessing)
|
143 |
+
|
144 |
|
145 |
### Vectorizers
|
146 |
| Option | Description | When to Use |
|
app/constants.py
CHANGED
@@ -19,6 +19,9 @@ IMDB50K_URL = "https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-5
|
|
19 |
TEST_DATASET_PATH = DATA_DIR / "test.csv"
|
20 |
TEST_DATASET_URL = "https://huggingface.co/datasets/Sp1786/multiclass-sentiment-analysis-dataset"
|
21 |
|
|
|
|
|
|
|
22 |
CACHE_DIR.mkdir(exist_ok=True, parents=True)
|
23 |
DATA_DIR.mkdir(exist_ok=True, parents=True)
|
24 |
MODEL_DIR.mkdir(exist_ok=True, parents=True)
|
|
|
19 |
TEST_DATASET_PATH = DATA_DIR / "test.csv"
|
20 |
TEST_DATASET_URL = "https://huggingface.co/datasets/Sp1786/multiclass-sentiment-analysis-dataset"
|
21 |
|
22 |
+
SLANGMAP_PATH = DATA_DIR / "slang.json"
|
23 |
+
SLANGMAP_URL = "Https://www.kaggle.com/code/nmaguette/up-to-date-list-of-slangs-for-text-preprocessing"
|
24 |
+
|
25 |
CACHE_DIR.mkdir(exist_ok=True, parents=True)
|
26 |
DATA_DIR.mkdir(exist_ok=True, parents=True)
|
27 |
MODEL_DIR.mkdir(exist_ok=True, parents=True)
|
app/data.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1 |
from __future__ import annotations
|
2 |
|
3 |
import bz2
|
|
|
|
|
|
|
4 |
from typing import TYPE_CHECKING, Literal, Sequence
|
5 |
|
|
|
6 |
import pandas as pd
|
7 |
import spacy
|
8 |
from tqdm import tqdm
|
@@ -14,11 +18,15 @@ from app.constants import (
|
|
14 |
IMDB50K_URL,
|
15 |
SENTIMENT140_PATH,
|
16 |
SENTIMENT140_URL,
|
|
|
|
|
17 |
TEST_DATASET_PATH,
|
18 |
TEST_DATASET_URL,
|
19 |
)
|
20 |
|
21 |
if TYPE_CHECKING:
|
|
|
|
|
22 |
from spacy.tokens import Doc
|
23 |
|
24 |
__all__ = ["load_data", "tokenize"]
|
@@ -35,6 +43,81 @@ except OSError:
|
|
35 |
nlp = spacy.load("en_core_web_sm")
|
36 |
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
def _lemmatize(doc: Doc, threshold: int = 2) -> Sequence[str]:
|
39 |
"""Lemmatize the provided text using spaCy.
|
40 |
|
@@ -46,12 +129,15 @@ def _lemmatize(doc: Doc, threshold: int = 2) -> Sequence[str]:
|
|
46 |
Sequence of lemmatized tokens
|
47 |
"""
|
48 |
return [
|
49 |
-
|
50 |
for token in doc
|
51 |
if not token.is_stop # Ignore stop words
|
52 |
and not token.is_punct # Ignore punctuation
|
|
|
|
|
|
|
53 |
and not token.is_alpha # Ignore non-alphabetic tokens
|
54 |
-
and not (len(token.lemma_) < threshold) # Ignore short tokens
|
55 |
]
|
56 |
|
57 |
|
@@ -74,14 +160,25 @@ def tokenize(
|
|
74 |
Returns:
|
75 |
Tokenized text data
|
76 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
return pd.Series(
|
78 |
[
|
79 |
_lemmatize(doc, character_threshold)
|
80 |
for doc in tqdm(
|
81 |
nlp.pipe(text_data, batch_size=batch_size, n_process=n_jobs, disable=["parser", "ner", "tok2vec"]),
|
82 |
total=len(text_data),
|
83 |
-
|
84 |
unit="doc",
|
|
|
85 |
)
|
86 |
],
|
87 |
)
|
|
|
1 |
from __future__ import annotations
|
2 |
|
3 |
import bz2
|
4 |
+
import json
|
5 |
+
import re
|
6 |
+
from functools import lru_cache
|
7 |
from typing import TYPE_CHECKING, Literal, Sequence
|
8 |
|
9 |
+
import emoji
|
10 |
import pandas as pd
|
11 |
import spacy
|
12 |
from tqdm import tqdm
|
|
|
18 |
IMDB50K_URL,
|
19 |
SENTIMENT140_PATH,
|
20 |
SENTIMENT140_URL,
|
21 |
+
SLANGMAP_PATH,
|
22 |
+
SLANGMAP_URL,
|
23 |
TEST_DATASET_PATH,
|
24 |
TEST_DATASET_URL,
|
25 |
)
|
26 |
|
27 |
if TYPE_CHECKING:
|
28 |
+
from re import Pattern
|
29 |
+
|
30 |
from spacy.tokens import Doc
|
31 |
|
32 |
__all__ = ["load_data", "tokenize"]
|
|
|
43 |
nlp = spacy.load("en_core_web_sm")
|
44 |
|
45 |
|
46 |
+
@lru_cache(maxsize=1)
|
47 |
+
def slang() -> tuple[Pattern, dict[str, str]]:
|
48 |
+
"""Compile a re pattern for slang terms.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
Slang pattern and mapping
|
52 |
+
|
53 |
+
Raises:
|
54 |
+
FileNotFoundError: If the file is not found
|
55 |
+
"""
|
56 |
+
if not SLANGMAP_PATH.exists():
|
57 |
+
# msg = f"Missing slang mapping file: {SLANG_PATH}"
|
58 |
+
msg = (
|
59 |
+
f"Slang mapping file not found at: '{SLANGMAP_PATH}'\n"
|
60 |
+
"Please download the file from:\n"
|
61 |
+
f"{SLANGMAP_URL}"
|
62 |
+
) # fmt: off
|
63 |
+
raise FileNotFoundError(msg)
|
64 |
+
|
65 |
+
with SLANGMAP_PATH.open() as f:
|
66 |
+
mapping = json.load(f)
|
67 |
+
|
68 |
+
return re.compile(r"\b(" + "|".join(map(re.escape, mapping.keys())) + r")\b"), mapping
|
69 |
+
|
70 |
+
|
71 |
+
def _clean(text: str) -> str:
|
72 |
+
"""Perform basic text cleaning.
|
73 |
+
|
74 |
+
Args:
|
75 |
+
text: Text to clean
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
Cleaned text
|
79 |
+
"""
|
80 |
+
# Make text lowercase
|
81 |
+
text = text.lower()
|
82 |
+
|
83 |
+
# Remove HTML tags
|
84 |
+
text = re.sub(r"<[^>]*>", "", text)
|
85 |
+
|
86 |
+
# Map slang terms
|
87 |
+
slang_pattern, slang_mapping = slang()
|
88 |
+
text = slang_pattern.sub(lambda x: slang_mapping[x.group()], text)
|
89 |
+
|
90 |
+
# Remove acronyms and abbreviations
|
91 |
+
# text = re.sub(r"(?:[a-z]\.){2,}", "", text)
|
92 |
+
text = re.sub(r"(?:[a-z]\.?)(?:[a-z]\.)", "", text)
|
93 |
+
|
94 |
+
# Remove honorifics
|
95 |
+
text = re.sub(r"\b(?:mr|mrs|ms|dr|prof|sr|jr)\.?\b", "", text)
|
96 |
+
|
97 |
+
# Remove year abbreviations
|
98 |
+
text = re.sub(r"\b(?:\d{3}0|\d0)s?\b", "", text)
|
99 |
+
|
100 |
+
# Remove hashtags
|
101 |
+
text = re.sub(r"#[^\s]+", "", text)
|
102 |
+
|
103 |
+
# Replace mentions with a generic tag
|
104 |
+
text = re.sub(r"@[^\s]+", "user", text)
|
105 |
+
|
106 |
+
# Replace X/Y with X or Y
|
107 |
+
text = re.sub(r"\b([a-z]+)[//]([a-z]+)\b", r"\1 or \2", text)
|
108 |
+
|
109 |
+
# Convert emojis to text
|
110 |
+
text = emoji.demojize(text, delimiters=("emoji_", ""))
|
111 |
+
|
112 |
+
# Remove special characters
|
113 |
+
text = re.sub(r"[^a-z0-9\s]", "", text)
|
114 |
+
|
115 |
+
# EXTRA: imdb50k specific cleaning
|
116 |
+
text = re.sub(r"mst3k", "", text) # Very common acronym for Mystery Science Theater 3000
|
117 |
+
|
118 |
+
return text.strip()
|
119 |
+
|
120 |
+
|
121 |
def _lemmatize(doc: Doc, threshold: int = 2) -> Sequence[str]:
|
122 |
"""Lemmatize the provided text using spaCy.
|
123 |
|
|
|
129 |
Sequence of lemmatized tokens
|
130 |
"""
|
131 |
return [
|
132 |
+
tok
|
133 |
for token in doc
|
134 |
if not token.is_stop # Ignore stop words
|
135 |
and not token.is_punct # Ignore punctuation
|
136 |
+
and not token.like_email # Ignore email addresses
|
137 |
+
and not token.like_url # Ignore URLs
|
138 |
+
and not token.like_num # Ignore numbers
|
139 |
and not token.is_alpha # Ignore non-alphabetic tokens
|
140 |
+
and not (len(tok := token.lemma_.lower().strip()) < threshold) # Ignore short tokens
|
141 |
]
|
142 |
|
143 |
|
|
|
160 |
Returns:
|
161 |
Tokenized text data
|
162 |
"""
|
163 |
+
text_data = [
|
164 |
+
_clean(text)
|
165 |
+
for text in tqdm(
|
166 |
+
text_data,
|
167 |
+
desc="Cleaning",
|
168 |
+
unit="doc",
|
169 |
+
disable=not show_progress,
|
170 |
+
)
|
171 |
+
]
|
172 |
+
|
173 |
return pd.Series(
|
174 |
[
|
175 |
_lemmatize(doc, character_threshold)
|
176 |
for doc in tqdm(
|
177 |
nlp.pipe(text_data, batch_size=batch_size, n_process=n_jobs, disable=["parser", "ner", "tok2vec"]),
|
178 |
total=len(text_data),
|
179 |
+
desc="Lemmatization",
|
180 |
unit="doc",
|
181 |
+
disable=not show_progress,
|
182 |
)
|
183 |
],
|
184 |
)
|
data/slang.json
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"$": " dollar ",
|
3 |
+
"€": " euro ",
|
4 |
+
"4ao": "for adults only",
|
5 |
+
"a.m": "before midday",
|
6 |
+
"a3": "anytime anywhere anyplace",
|
7 |
+
"aamof": "as a matter of fact",
|
8 |
+
"acct": "account",
|
9 |
+
"adih": "another day in hell",
|
10 |
+
"afaic": "as far as i am concerned",
|
11 |
+
"afaict": "as far as i can tell",
|
12 |
+
"afaik": "as far as i know",
|
13 |
+
"afair": "as far as i remember",
|
14 |
+
"afk": "away from keyboard",
|
15 |
+
"app": "application",
|
16 |
+
"approx": "approximately",
|
17 |
+
"apps": "applications",
|
18 |
+
"asap": "as soon as possible",
|
19 |
+
"asl": "age, sex, location",
|
20 |
+
"atk": "at the keyboard",
|
21 |
+
"ave.": "avenue",
|
22 |
+
"aymm": "are you my mother",
|
23 |
+
"ayor": "at your own risk",
|
24 |
+
"b&b": "bed and breakfast",
|
25 |
+
"b+b": "bed and breakfast",
|
26 |
+
"b.c": "before christ",
|
27 |
+
"b2b": "business to business",
|
28 |
+
"b2c": "business to customer",
|
29 |
+
"b4": "before",
|
30 |
+
"b4n": "bye for now",
|
31 |
+
"b@u": "back at you",
|
32 |
+
"bae": "before anyone else",
|
33 |
+
"bak": "back at keyboard",
|
34 |
+
"bbbg": "bye bye be good",
|
35 |
+
"bbc": "british broadcasting corporation",
|
36 |
+
"bbias": "be back in a second",
|
37 |
+
"bbl": "be back later",
|
38 |
+
"bbs": "be back soon",
|
39 |
+
"be4": "before",
|
40 |
+
"bfn": "bye for now",
|
41 |
+
"blvd": "boulevard",
|
42 |
+
"bout": "about",
|
43 |
+
"brb": "be right back",
|
44 |
+
"bros": "brothers",
|
45 |
+
"brt": "be right there",
|
46 |
+
"bsaaw": "big smile and a wink",
|
47 |
+
"btw": "by the way",
|
48 |
+
"bwl": "bursting with laughter",
|
49 |
+
"c/o": "care of",
|
50 |
+
"cet": "central european time",
|
51 |
+
"cf": "compare",
|
52 |
+
"cia": "central intelligence agency",
|
53 |
+
"csl": "can not stop laughing",
|
54 |
+
"cu": "see you",
|
55 |
+
"cul8r": "see you later",
|
56 |
+
"cv": "curriculum vitae",
|
57 |
+
"cwot": "complete waste of time",
|
58 |
+
"cya": "see you",
|
59 |
+
"cyt": "see you tomorrow",
|
60 |
+
"dae": "does anyone else",
|
61 |
+
"dbmib": "do not bother me i am busy",
|
62 |
+
"diy": "do it yourself",
|
63 |
+
"dm": "direct message",
|
64 |
+
"dwh": "during work hours",
|
65 |
+
"e123": "easy as one two three",
|
66 |
+
"eet": "eastern european time",
|
67 |
+
"eg": "example",
|
68 |
+
"embm": "early morning business meeting",
|
69 |
+
"encl": "enclosed",
|
70 |
+
"encl.": "enclosed",
|
71 |
+
"etc": "and so on",
|
72 |
+
"faq": "frequently asked questions",
|
73 |
+
"fawc": "for anyone who cares",
|
74 |
+
"fb": "facebook",
|
75 |
+
"fc": "fingers crossed",
|
76 |
+
"fig": "figure",
|
77 |
+
"fimh": "forever in my heart",
|
78 |
+
"ft.": "feet",
|
79 |
+
"ft": "featuring",
|
80 |
+
"ftl": "for the loss",
|
81 |
+
"ftw": "for the win",
|
82 |
+
"fwiw": "for what it is worth",
|
83 |
+
"fyi": "for your information",
|
84 |
+
"g9": "genius",
|
85 |
+
"gahoy": "get a hold of yourself",
|
86 |
+
"gal": "get a life",
|
87 |
+
"gcse": "general certificate of secondary education",
|
88 |
+
"gfn": "gone for now",
|
89 |
+
"gg": "good game",
|
90 |
+
"gl": "good luck",
|
91 |
+
"glhf": "good luck have fun",
|
92 |
+
"gmt": "greenwich mean time",
|
93 |
+
"gmta": "great minds think alike",
|
94 |
+
"gn": "good night",
|
95 |
+
"g.o.a.t": "greatest of all time",
|
96 |
+
"goat": "greatest of all time",
|
97 |
+
"goi": "get over it",
|
98 |
+
"gps": "global positioning system",
|
99 |
+
"gr8": "great",
|
100 |
+
"gratz": "congratulations",
|
101 |
+
"gyal": "girl",
|
102 |
+
"h&c": "hot and cold",
|
103 |
+
"hp": "horsepower",
|
104 |
+
"hr": "hour",
|
105 |
+
"hrh": "his royal highness",
|
106 |
+
"ht": "height",
|
107 |
+
"ibrb": "i will be right back",
|
108 |
+
"ic": "i see",
|
109 |
+
"icq": "i seek you",
|
110 |
+
"icymi": "in case you missed it",
|
111 |
+
"idc": "i do not care",
|
112 |
+
"idgadf": "i do not give a damn fuck",
|
113 |
+
"idgaf": "i do not give a fuck",
|
114 |
+
"idk": "i do not know",
|
115 |
+
"ie": "that is",
|
116 |
+
"i.e": "that is",
|
117 |
+
"ifyp": "i feel your pain",
|
118 |
+
"IG": "instagram",
|
119 |
+
"iirc": "if i remember correctly",
|
120 |
+
"ilu": "i love you",
|
121 |
+
"ily": "i love you",
|
122 |
+
"imho": "in my humble opinion",
|
123 |
+
"imo": "in my opinion",
|
124 |
+
"imu": "i miss you",
|
125 |
+
"iow": "in other words",
|
126 |
+
"irl": "in real life",
|
127 |
+
"j4f": "just for fun",
|
128 |
+
"jic": "just in case",
|
129 |
+
"jk": "just kidding",
|
130 |
+
"jsyk": "just so you know",
|
131 |
+
"l8r": "later",
|
132 |
+
"lb": "pound",
|
133 |
+
"lbs": "pounds",
|
134 |
+
"ldr": "long distance relationship",
|
135 |
+
"lmao": "laugh my ass off",
|
136 |
+
"lmfao": "laugh my fucking ass off",
|
137 |
+
"lol": "laughing out loud",
|
138 |
+
"ltd": "limited",
|
139 |
+
"ltns": "long time no see",
|
140 |
+
"m8": "mate",
|
141 |
+
"mf": "motherfucker",
|
142 |
+
"mfs": "motherfuckers",
|
143 |
+
"mfw": "my face when",
|
144 |
+
"mofo": "motherfucker",
|
145 |
+
"mph": "miles per hour",
|
146 |
+
"mr": "mister",
|
147 |
+
"mrw": "my reaction when",
|
148 |
+
"ms": "miss",
|
149 |
+
"mte": "my thoughts exactly",
|
150 |
+
"nagi": "not a good idea",
|
151 |
+
"nbc": "national broadcasting company",
|
152 |
+
"nbd": "not big deal",
|
153 |
+
"nfs": "not for sale",
|
154 |
+
"ngl": "not going to lie",
|
155 |
+
"nhs": "national health service",
|
156 |
+
"nrn": "no reply necessary",
|
157 |
+
"nsfl": "not safe for life",
|
158 |
+
"nsfw": "not safe for work",
|
159 |
+
"nth": "nice to have",
|
160 |
+
"nvr": "never",
|
161 |
+
"nyc": "new york city",
|
162 |
+
"oc": "original content",
|
163 |
+
"og": "original",
|
164 |
+
"ohp": "overhead projector",
|
165 |
+
"oic": "oh i see",
|
166 |
+
"omdb": "over my dead body",
|
167 |
+
"omg": "oh my god",
|
168 |
+
"omw": "on my way",
|
169 |
+
"p.a": "per annum",
|
170 |
+
"p.m": "after midday",
|
171 |
+
"pm": "prime minister",
|
172 |
+
"poc": "people of color",
|
173 |
+
"pov": "point of view",
|
174 |
+
"pp": "pages",
|
175 |
+
"ppl": "people",
|
176 |
+
"prw": "parents are watching",
|
177 |
+
"ps": "postscript",
|
178 |
+
"pt": "point",
|
179 |
+
"ptb": "please text back",
|
180 |
+
"pto": "please turn over",
|
181 |
+
"qpsa": "what happens",
|
182 |
+
"ratchet": "rude",
|
183 |
+
"rbtl": "read between the lines",
|
184 |
+
"rlrt": "real life retweet",
|
185 |
+
"rofl": "rolling on the floor laughing",
|
186 |
+
"roflol": "rolling on the floor laughing out loud",
|
187 |
+
"rotflmao": "rolling on the floor laughing my ass off",
|
188 |
+
"rt": "retweet",
|
189 |
+
"ruok": "are you ok",
|
190 |
+
"sfw": "safe for work",
|
191 |
+
"sk8": "skate",
|
192 |
+
"smh": "shake my head",
|
193 |
+
"sq": "square",
|
194 |
+
"srsly": "seriously",
|
195 |
+
"ssdd": "same stuff different day",
|
196 |
+
"tbh": "to be honest",
|
197 |
+
"tbs": "tablespooful",
|
198 |
+
"tbsp": "tablespooful",
|
199 |
+
"tfw": "that feeling when",
|
200 |
+
"thks": "thank you",
|
201 |
+
"tho": "though",
|
202 |
+
"thx": "thank you",
|
203 |
+
"tia": "thanks in advance",
|
204 |
+
"til": "today i learned",
|
205 |
+
"tl;dr": "too long i did not read",
|
206 |
+
"tldr": "too long i did not read",
|
207 |
+
"tmb": "tweet me back",
|
208 |
+
"tntl": "trying not to laugh",
|
209 |
+
"ttyl": "talk to you later",
|
210 |
+
"u": "you",
|
211 |
+
"u2": "you too",
|
212 |
+
"u4e": "yours for ever",
|
213 |
+
"utc": "coordinated universal time",
|
214 |
+
"w/": "with",
|
215 |
+
"w/o": "without",
|
216 |
+
"w8": "wait",
|
217 |
+
"wassup": "what is up",
|
218 |
+
"wb": "welcome back",
|
219 |
+
"wtf": "what the fuck",
|
220 |
+
"wtg": "way to go",
|
221 |
+
"wtpa": "where the party at",
|
222 |
+
"wuf": "where are you from",
|
223 |
+
"wuzup": "what is up",
|
224 |
+
"wywh": "wish you were here",
|
225 |
+
"yd": "yard",
|
226 |
+
"ygtr": "you got that right",
|
227 |
+
"ynk": "you never know",
|
228 |
+
"zzz": "sleeping bored and tired"
|
229 |
+
}
|