From 7cdf7a73b766a1ea952d7518d746b5243eadd7dd Mon Sep 17 00:00:00 2001 From: Adam Phillabaum Date: Wed, 9 Oct 2013 10:18:25 -0700 Subject: [PATCH 1/4] I had some data about common misspellings in an old exel spreadsheet, that I've been meaning to put in here. I did a little reformatting and used JSON. This duplicates data in the domains.txt, and I don't know how you want to rectify that. But, *shrug* --- typomapping.json | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 typomapping.json diff --git a/typomapping.json b/typomapping.json new file mode 100644 index 0000000..eb3ba9f --- /dev/null +++ b/typomapping.json @@ -0,0 +1,37 @@ +[{"domain": "aol.com", "typos": [] }, +{"domain": "anonymous.to", "typos": [] }, +{"domain": "comcast.net", "typos": [ "cocast.net", "comcsat.net" ] }, +{"domain": "dispostable.com", "typos": [] }, +{"domain": "everymail.net", "typos": [] }, +{"domain": "everyone.net", "typos": [] }, +{"domain": "facebook.com", "typos": [] }, +{"domain": "fastmail.fm", "typos": [] }, +{"domain": "flashmail.com", "typos": [] }, +{"domain": "gmail.com", "typos": [] }, +{"domain": "gmx.com", "typos": [] }, +{"domain": "googlemail.com", "typos": [] }, +{"domain": "guerillamail.com", "typos": [] }, +{"domain": "hotmail.com", "typos": [ "homtail.com","HOTAIL.COM","hotamil.com","hotmail.co","hotmail.om","hotmal.com","hotmial.com","hotmsil.com","hotrmail.com" ] }, +{"domain": "hotmail.fr", "typos": [] }, +{"domain": "hotmail.it", "typos": [] }, +{"domain": "hushmail.com", "typos": [] }, +{"domain": "inbox.com", "typos": [] }, +{"domain": "live.com", "typos": [] }, +{"domain": "lycos.com", "typos": [] }, +{"domain": "mail.com", "typos": [] }, +{"domain": "mail.ru", "typos": [] }, +{"domain": "mailinator.com", "typos": [] }, +{"domain": "me.com", "typos": [] }, +{"domain": "msn.com", "typos": [] }, +{"domain": "onebox.com", "typos": [] }, +{"domain": "outlook.com", "typos": [] }, +{"domain": "qmail.com", "typos": [] }, +{"domain": "rediff.com", "typos": [ "redifmail.com" ] }, +{"domain": "runbox.com", "typos": [] }, +{"domain": "seznam.cz", "typos": [] }, +{"domain": "spamgourmet.com", "typos": [] }, +{"domain": "trashmail.net", "typos": [] }, +{"domain": "yahoo.com", "typos": [ "tahoo.com", "ahoo.com" ,"ayhoo.com","yahho.com","yaho.cm","yahoo.cm","yahoo.com.com","yahoo.om","yahooo.com", "yhaoo.com", "yohoo.com"] }, +{"domain": "ymail.com", "typos": [] }, +{"domain": "yandex.ru", "typos": [] }, +{"domain": "zoho.com", "typos": [] }] \ No newline at end of file From 5b47885657d80b36f9d8ed2e5e45a8b88fbe5d73 Mon Sep 17 00:00:00 2001 From: Adam Phillabaum Date: Wed, 9 Oct 2013 10:41:49 -0700 Subject: [PATCH 2/4] Moving to a different format for ease of diffing/reading --- typomapping.json => typomapping.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename typomapping.json => typomapping.txt (100%) diff --git a/typomapping.json b/typomapping.txt similarity index 100% rename from typomapping.json rename to typomapping.txt From ff92660fe5901c2907fcc08360843273598d3559 Mon Sep 17 00:00:00 2001 From: Adam Phillabaum Date: Wed, 9 Oct 2013 10:47:38 -0700 Subject: [PATCH 3/4] REALLY updating the file format this time (not just changing the file name) --- typomapping.txt | 66 ++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/typomapping.txt b/typomapping.txt index eb3ba9f..1fe6d37 100644 --- a/typomapping.txt +++ b/typomapping.txt @@ -1,37 +1,29 @@ -[{"domain": "aol.com", "typos": [] }, -{"domain": "anonymous.to", "typos": [] }, -{"domain": "comcast.net", "typos": [ "cocast.net", "comcsat.net" ] }, -{"domain": "dispostable.com", "typos": [] }, -{"domain": "everymail.net", "typos": [] }, -{"domain": "everyone.net", "typos": [] }, -{"domain": "facebook.com", "typos": [] }, -{"domain": "fastmail.fm", "typos": [] }, -{"domain": "flashmail.com", "typos": [] }, -{"domain": "gmail.com", "typos": [] }, -{"domain": "gmx.com", "typos": [] }, -{"domain": "googlemail.com", "typos": [] }, -{"domain": "guerillamail.com", "typos": [] }, -{"domain": "hotmail.com", "typos": [ "homtail.com","HOTAIL.COM","hotamil.com","hotmail.co","hotmail.om","hotmal.com","hotmial.com","hotmsil.com","hotrmail.com" ] }, -{"domain": "hotmail.fr", "typos": [] }, -{"domain": "hotmail.it", "typos": [] }, -{"domain": "hushmail.com", "typos": [] }, -{"domain": "inbox.com", "typos": [] }, -{"domain": "live.com", "typos": [] }, -{"domain": "lycos.com", "typos": [] }, -{"domain": "mail.com", "typos": [] }, -{"domain": "mail.ru", "typos": [] }, -{"domain": "mailinator.com", "typos": [] }, -{"domain": "me.com", "typos": [] }, -{"domain": "msn.com", "typos": [] }, -{"domain": "onebox.com", "typos": [] }, -{"domain": "outlook.com", "typos": [] }, -{"domain": "qmail.com", "typos": [] }, -{"domain": "rediff.com", "typos": [ "redifmail.com" ] }, -{"domain": "runbox.com", "typos": [] }, -{"domain": "seznam.cz", "typos": [] }, -{"domain": "spamgourmet.com", "typos": [] }, -{"domain": "trashmail.net", "typos": [] }, -{"domain": "yahoo.com", "typos": [ "tahoo.com", "ahoo.com" ,"ayhoo.com","yahho.com","yaho.cm","yahoo.cm","yahoo.com.com","yahoo.om","yahooo.com", "yhaoo.com", "yohoo.com"] }, -{"domain": "ymail.com", "typos": [] }, -{"domain": "yandex.ru", "typos": [] }, -{"domain": "zoho.com", "typos": [] }] \ No newline at end of file +"ahoo.com","yahoo.com" +"ayhoo.com","yahoo.com" +"cocast.net","comcast.net" +"comcsat.net","com" +"eathlink.net","earthlink.net" +"gmaill.com","gmail.com" +"gmial.com","gmail.com" +"homtail.com","hotmail.com" +"HOTAIL.COM","hotmail.com" +"hotamil.com","hotmail.com" +"hotmail.co","hotmail.com" +"hotmail.om","hotmail.com" +"hotmal.com","hotmail.com" +"hotmial.com","hotmail.com" +"hotmsil.com","hotmail.com" +"hotrmail.com","hotmail.com" +"htomail.com","hotmail.com" +"redifmail.com","rediffmail.com" +"sbcglobl.net","sbcglobal.net" +"sbclobal.net","sbcglobal.net" +"tahoo.com","yahoo.com" +"yahho.com","yahoo.com" +"yaho.cm","yahoo.com" +"yahoo.cm","yahoo.com" +"yahoo.com.com","yahoo.com" +"yahoo.om","yahoo.com" +"yahooo.com","yahoo.com" +"yhaoo.com","yahoo.com" +"yohoo.com","yahoo.com" \ No newline at end of file From 7d11c99ca3d53e05534d5fbc8a36def923a0a825 Mon Sep 17 00:00:00 2001 From: Adam Phillabaum Date: Wed, 9 Oct 2013 10:48:38 -0700 Subject: [PATCH 4/4] adding a newline at the end of the file --- typomapping.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/typomapping.txt b/typomapping.txt index 1fe6d37..616b6a2 100644 --- a/typomapping.txt +++ b/typomapping.txt @@ -26,4 +26,4 @@ "yahoo.om","yahoo.com" "yahooo.com","yahoo.com" "yhaoo.com","yahoo.com" -"yohoo.com","yahoo.com" \ No newline at end of file +"yohoo.com","yahoo.com"