Skip to content

Instantly share code, notes, and snippets.

@dgrahn
Created July 21, 2021 19:34
Show Gist options
  • Save dgrahn/8fb4b3735754f17aa6fe63536c7829fd to your computer and use it in GitHub Desktop.
Save dgrahn/8fb4b3735754f17aa6fe63536c7829fd to your computer and use it in GitHub Desktop.
C/C++ Token Frequencies
token freq
0 Identifier 0.28975789809994656
1 , 0.11085500513657347
2 IntegerLiteral 0.07681658334963642
3 ( 0.07371915527596774
4 ) 0.07371346442857962
5 ; 0.07120506446978692
6 = 0.03211986303113621
7 -> 0.029400075358314605
8 { 0.02097414346886977
9 } 0.020972327475899624
10 * 0.017773761473940605
11 . 0.016039581633917055
12 if 0.012348802863512129
13 StringLiteral 0.011417610919209918
14 & 0.010554010968825212
15 [ 0.008786112959013884
16 ] 0.008785204270047805
17 struct 0.008649981041622385
18 return 0.007634802230846169
19 Directive 0.007328694156260919
20 int 0.006982866794537391
21 < 0.0060099036264591454
22 - 0.005206501627095519
23 static 0.004545366388250609
24 : 0.004410796631068959
25 == 0.003943534491907844
26 void 0.0037814462704533016
27 :: 0.0034559100641442992
28 + 0.0033367818639992723
29 > 0.0029978779658566294
30 const 0.002879925867759952
31 Not 0.0027847130377939935
32 case 0.00253097937922428
33 else 0.0023321982049886426
34 FloatingLiteral 0.0022200953400748897
35 unsigned 0.0021729338056678525
36 ++ 0.002104277468442321
37 break 0.0019950357811655394
38 char 0.001898146262325379
39 != 0.001895911164461226
40 AndAnd 0.001766705134334138
41 | 0.0016638320794668284
42 BooleanLiteral 0.001322241085718184
43 for 0.001320744942031019
44 sizeof 0.0012710460033290303
45 goto 0.0012687614700151984
46 OrOr 0.0011465693491827663
47 long 0.0010043174385345383
48 CharacterLiteral 0.0009884402724732364
49 += 0.0008778460928471118
50 |= 0.0007968639014632323
51 bool 0.000640555972860234
52 ? 0.0006262089974689497
53 / 0.0005873789328862162
54 >= 0.0005569013049138375
55 while 0.0005059921017671452
56 ~ 0.0005045036138422262
57 switch 0.0004450728933059277
58 EOF 0.0003992373830852928
59 <= 0.00036605361627921497
60 &= 0.0003408760071087381
61 default 0.0003302084526067349
62 this 0.00029528344230811984
63 -- 0.0002926599395412768
64 float 0.0002858186503732425
65 continue 0.00027592009602970516
66 double 0.00027365922248361854
67 enum 0.00021285161887805719
68 new 0.00020547138713060672
69 -= 0.00020360668966295733
70 inline 0.0001989171698112128
71 % 0.00019744514754579771
72 auto 0.00019552155072137154
73 MultiLineMacro 0.0001612895600262687
74 PointerLiteral 0.0001251066195069563
75 short 0.000124903414802518
76 extern 0.00011851700871386241
77 UserDefinedLiteral 0.00011629075921813445
78 typedef 0.00010647857264465849
79 namespace 9.858140382535317e-05
80 class 9.815264266841171e-05
81 ^ 8.51566592509554e-05
82 do 8.462298722142718e-05
83 static_cast 8.070516050984315e-05
84 template 7.647194717065874e-05
85 union 7.576861729437307e-05
86 volatile 7.188418355580161e-05
87 using 6.643181893232157e-05
88 private 5.974022107533789e-05
89 public 5.677063168957646e-05
90 typename 4.849909994355986e-05
91 delete 4.797254507993871e-05
92 *= 4.781323597712179e-05
93 >>= 3.9646538161357816e-05
94 register 3.886607359510646e-05
95 ... 3.6170514332350325e-05
96 virtual 3.269310554132849e-05
97 ^= 3.225314927477107e-05
98 operator 3.223945353929586e-05
99 throw 3.1813654662218426e-05
100 asm 2.9072391652988855e-05
101 <<= 2.820059653612476e-05
102 /= 2.6522715055743893e-05
103 wchar_t 2.564314876313263e-05
104 override 2.525155075357614e-05
105 reinterpret_cast 2.5131405298831575e-05
106 try 1.741370447388534e-05
107 catch 1.600658306730923e-05
108 static_assert 1.5125054745177641e-05
109 signed 1.3128978242000654e-05
110 noexcept 1.2640509833774034e-05
111 constexpr 1.1923330337344905e-05
112 dynamic_cast 9.529269610878605e-06
113 final 7.148750735219252e-06
114 const_cast 5.982535776805313e-06
115 decltype 5.7780230526868046e-06
116 %= 5.5216127241973635e-06
117 protected 4.7412635714472045e-06
118 explicit 2.6733229281924574e-06
119 friend 2.44818965852701e-06
120 export 2.0843678315915803e-06
121 typeid 2.066824979409857e-06
122 mutable 1.2851293358124168e-06
123 ->* 1.2548909984991834e-06
124 char16_t 1.2363094248067004e-06
125 char32_t 8.326314687917419e-07
126 alignof 6.199628572554138e-07
127 .* 3.985612907952893e-07
128 alignas 3.5347308299489553e-07
129 thread_local 2.8872610882419364e-07
@dgrahn
Copy link
Author

dgrahn commented Jul 21, 2021

This data contains the frequency of usage for C/C++ tokens. freq is the # of type / # of tokens. This data was drawn from over 10M C/C++ files from the top ~36,500 GitHub repositories.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment