ABCDEFGHIJKLMNOPQRSTUVWXYZAAABACAD
1
Our Dialog: multi-round and single-round Instruction FollowingPreTraining TextCommonSense TextCommonSense QA
2
[Dialog]
lmflow_chat_zh_multi
(nll)
[Dialog]
lmflow_chat_zh_single
(nll)
[Dialog]
lmflow_chat_en_multi
(nll)
[Dialog]
lmflow_chat_en_single
(nll)
[Instruction]
gpt4_en_eval
(nll)
[Instruction]
gpt4_zh_eval
(nll)
[PreTrain Text]
wiki_en_eval
(nll)
[PreTrain Text]
wiki_zh_eval
(nll)
[CS_Text]
winogrande
(nll)
[CS_Text]
boolq
(nll)
[CS_Text]
arc_easy
(nll)
[CS_Text]
hellaswag
(nll)
[CS_Text]
piqa
(nll)
[CS_Text]
OBQA
(nll)
[CS_Text]
arc_challenge
(nll)
[CS_QA]
winogrande
(acc)
[CS_QA]
boolq
(acc)
[CS_QA]
arc_easy
(acc)
[CS_QA]
hellaswag
(acc)
[CS_QA]
piqa
(acc)
[CS_QA]
OBQA
(acc)
[CS_QA]
arc_challenge
(acc)
Chat_NLLEn_Chat_NLLInstructionCS_NLL_AVERAGE
CS_ACC_AVERAGE
WikiNLL_AVERAGE
3
llama_13b45046915717720341412721072791537020711957.258370.0168.5374.5476.2379.1142.244.54313.25167203109.7565.02285714308.5339.0227273
4
llama_7b4845031631842144501352112879.516771.5214121588567.973.1567.372.9678.3542.441.38333.5173.5214113.714285763.34857143332358.1818182
5
vicuna_13b4144301501702063681568124895.520788.52621497310566.2279.8564.6573.0477.6441.640.3629116020614063.33714286287397.2727273
6
robin_7b33234413715217228214641080821887621912760.759067.9668.4770.8870.4277.314141.55241.25144.5172120.392857162.51285714227349.1590909
7
redpajama-3b5035311631932244741328115281.517874.522712660.58865.1171.0169.9170.1676.6638.638.4347.5178224119.357142961.40714286349364.8636364
8
vicuna_7b4184351551752113821632126497219892581517210563.6977.3763.1368.8276.3339.638.74295.75165211141.571428661.09714286296.5407.2727273
9
redpajama-3b-chat541561164186219506152012888320078.523513060.59463.7767.3770.0866.3275.6839.637.46363175219125.857142960.04362.5401.2727273
10
dolly_7b498519201223266456163212409124290.52741516810960.6964.6567.2168.875.0339.640.36360.25212266146.559.47714286361419.9545455
11
pythia_deduped_12b430447178201235392141610568319879.5232127619466.1465.8470..7568.8377.0440.238.31314189.5235124.928571459.39333333313.5361.2272727
12
redpajama-3b-inst5535801802072435281424122480.519077.52301276190.562.966.4269.0264.2174.3739.239.25380193.5243122.357142959.33857143385.5388.6818182
13
gptj_6b44747117619923241813761080832328022712660.759564.0965.4466.9266.2576.1738.236.6323.25187.5232129.107142959.09571429325364.5227273
14
pythia_deduped_6.9b440460180203238406143210728320281.523212761.59662.6764.468.5265.7976.8238.835.58320.75191.5238126.142857158.94322366.4545455
15
dolly_12b50252622225030046416481240922529229615768.5112.561.4857.196770.6975.2440.237.37375236300152.857142958.45285714382429.2727273
16
pythia-6.9b4554761842072434221424110483.520382.523712861.759761.1763.5267.1763.8776.3937.235.24330.5195.5243127.535714357.79428571332.5371.4318182
17
dolly_3b5125381992212644761624124889.523890.52681456710959.8357.8364.964.9674.3239.437.12367.5210264143.857142956.90857143370419.9090909
18
bloom_7.1b32335218821325230016487048521581.523713062.59664.462.9165.0359.6473.5635.833.36269200.5252129.571428656.38571429276346.4545455
19
pythia_deduped_2.8b475498188211250442146411288421085.524013062.75100.558.6464.3163.5959.4974.163532.94343199.5250130.392857155.44714286346381.5227273
20
bloom_3b340374202228272322175275286.52288624513464.5101.558.7261.6259.4752.7270.8442.230.55286215272135.071428653.73142857297367.5909091
21
pythia_deduped_1.4b512534201224266480158411928522287.524513363.7510356.0458.0761.754.3572.4733.229.69367.75212.5266134.178571452.21714286373405.5681818
22
codegen_16b_multi5195472192463024981952122498.5270101.528215873.511857.762.8453.4550.765.6731.428.41382.75232.5302157.357142950.02428571400461.5909091
23
codegen_16b_mono4855132262533204662112117699.528410329015974120.556.8362.2952.4444.6264.9130.626.96369.25239.5320161.428571448.37857143393473.0909091
24
gpt-neo_2.7b56157919622026052415121256852188524013262.75100.557.761.7761.0755.872.9133.230.2389208260131.892857153.23571429392406.8409091
25
codegen_6b_mono53556226228837251223681248105314110.53141697812951.0761.2844.1936.1460.1226.223.72411.75275372174.214285743.24571429442519.9545455
26
stablelm-base-alpha-7b6006242262523065721944133691.528498.52461736811850.5560.0650.5541.2267.363227.05425.5239306154.142857146.97439476.0909091
27
codegen_6b_multi58762025128235657222081344104.529811030616977.512854.7862.2647.5241.2262.3529.424.83435266.5356170.428571446.05142857464515.7272727
28
galactica_6.9b7557741942182687121576150492.52158126014365.69657.4666.7970.5451.1368.723738.23485.25206268136.157142955.69571429490455.7363636
29
stablelm-base-alpha-3b683767244266324652206414249230410127416769120.552.6457.6544.7438.3163.8229.425.77490255324161.071428644.61857143488508.3181818
30
opt_6.9b7718001842102427441504149681.520081.5224125619665.1966.0665.5767.1976.537.434.64491.25197242124.142857158.93571429493441.3636364
31
opt_2.7b86489820022526083216081616832308523012861.7510060.7760.3760.7760.6374.7635.231.23546.75212.5260131.107142954.81857143546475.7954545
32
galactica_1.3b88790822223730684017681664972428827815368103.554.362.0562.4241.0463.2830.230.63563.5229.5306147.071428649.13142857573509.7727273
33
gpt2_xl_1.2b100110342212472969681976178485.522887.524313462.75103.558.3361.7758.2950.8970.843228.5625.75234296134.892857151.51714286632542.5681818
34
open-llama-7b hz tune317322
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100