Created
September 9, 2017 02:20
-
-
Save marceloszilagyi/74167f1fc89bf50e700071d6c44e2efa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function to expand contractions and convert numbers to "num" ----- | |
# Loop only used to control memory/progress | |
expand_contraction = function (text) { | |
text %<>% str_replace_all(pattern = "[`''']",replacement = "'" ) | |
text %<>% tolower() | |
text %<>% str_replace_all(pattern = "\\b\\w*\\d,*\\.*\\w*,*\\.*\\b", replacement = "num") # the real regex is \b\w*\d,*\.*\w*,*\.*\b | |
text %<>% str_replace_all(pattern = "\\b(num)\\W*\\S*(num)\\b", replacement = "num") | |
{for (i in (seq_along(contrac_repl))) | |
text %<>% str_replace_all(pattern = tolower(names(contrac_repl))[i] ,replacement = tolower(contrac_repl[i])) | |
} | |
# after the contractions, replace the any 's after the word | |
text %<>% str_replace_all("'s","") | |
return(text) | |
} | |
# Contraction list | |
contraction expanded | |
1 'cause because | |
2 'tis it is | |
3 'twas it was | |
4 ain't am not | |
5 aren't are not | |
6 can't can not | |
7 could've could have | |
8 couldn't could not | |
9 didn't did not | |
10 doesn't does not | |
11 don't do not | |
12 hasn't has not | |
13 he'd he would | |
14 he'll he will | |
15 he's he is | |
16 how'd how did | |
17 how'll how will | |
18 how's how is | |
19 I'd I would | |
20 I'll I will | |
21 I'm I am | |
22 I've I have | |
23 isn't is not | |
24 it'll it will | |
25 it's it is | |
26 let's let us | |
27 might've might have | |
28 mightn't might not | |
29 must've must have | |
30 mustn't must not | |
31 shan't shall not | |
32 she'd she would | |
33 she'll she will | |
34 she's she is | |
35 should've should have | |
36 shouldn't should not | |
37 that'll that will | |
38 that's that is | |
39 there'll there will | |
40 there's there is | |
41 they'd they would | |
42 they'll they will | |
43 they're they are | |
44 they've they have | |
45 wasn't was not | |
46 we'd we would | |
47 we'll we will | |
48 we're we are | |
49 we've we have | |
50 weren't were not | |
51 what's what is | |
52 what'd what did | |
53 when'd when did | |
54 when'll when will | |
55 when's when is | |
56 where'll where will | |
57 where's where is | |
58 who'd who would | |
59 who'll who will | |
60 who's who is | |
61 why'd why did | |
62 why'll why will | |
63 why's why is | |
64 won't will not | |
65 would've would have | |
66 wouldn't would not | |
67 you'd you would | |
68 you'll you will | |
69 you're you are | |
70 you've you have | |
71 cause because | |
72 tis it is | |
73 twas it was | |
74 aint am not | |
75 arent are not | |
76 cant can not | |
77 couldve could have | |
78 couldnt could not | |
79 didnt did not | |
80 doesnt does not | |
81 dont do not | |
82 hasnt has not | |
83 hed he would | |
84 hell he will | |
85 hes he is | |
86 howd how did | |
87 howll how will | |
88 hows how is | |
89 Id I would | |
90 Ill I will | |
91 Im I am | |
92 Ive I have | |
93 isnt is not | |
94 itll it will | |
95 lets let us | |
96 mightve might have | |
97 mightnt might not | |
98 mustve must have | |
99 mustnt must not | |
100 shant shall not | |
101 shed she would | |
102 shell she will | |
103 shes she is | |
104 shouldve should have | |
105 shouldnt should not | |
106 thatll that will | |
107 thats that is | |
108 therell there will | |
109 theres there is | |
110 theyd they would | |
111 theyll they will | |
112 theyre they are | |
113 theyve they have | |
114 wasnt was not | |
115 wed we would | |
116 well we will | |
117 were we are | |
118 weve we have | |
119 werent were not | |
120 whats what is | |
121 whatd what did | |
122 whend when did | |
123 whenll when will | |
124 whens when is | |
125 wherell where will | |
126 wheres where is | |
127 whod who would | |
128 wholl who will | |
129 whos who is | |
130 whyd why did | |
131 whyll why will | |
132 whys why is | |
133 wont will not | |
134 wouldve would have | |
135 wouldnt would not | |
136 youd you would | |
137 youll you will | |
138 youre you are | |
139 youve you have |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment