ABCDEFGHIJKLMNOPQRSTUVWXYZ
1
IndexMethodOxfordPetsFlowers102
FGVCAircraft
DTDEuroSAT
StanfordCars
Food101SUN397Caltech101UCF101ImageNetAverage
2
0Handcrafted prompt85.7766.1417.2842.3237.5655.6177.3158.5286.2961.4658.18
3
1
4
2[V]x16, pos=end, shots=185.8968.129.6444.3950.6355.5974.3260.2987.5361.9257.15
5
3[V]x16, pos=end, shots=282.6477.5118.6845.1561.5058.2872.4959.4887.9364.0957.81
6
4[V]x16, pos=end, shots=486.7086.2021.8753.4970.1862.6273.3363.4789.5567.0359.99
7
5[V]x16, pos=end, shots=885.3291.1826.1359.9776.7368.4371.8265.5290.2171.9461.56
8
6[V]x16, pos=end, shots=1687.0194.5131.2663.5883.5373.3674.6769.2691.8375.7162.95
9
7[V]x16, pos=mid, shots=185.5569.486.5841.8653.2656.0272.4359.2886.5261.4757.33
10
8[V]x16, pos=mid, shots=280.5779.4014.1845.9260.6857.0971.0659.5587.3065.0558.42
11
9[V]x16, pos=mid, shots=486.0686.5222.0252.7270.9361.6272.6463.6788.5367.0659.96
12
10[V]x16, pos=mid, shots=883.5891.8128.1859.1477.6567.3272.0465.6490.3372.7462.04
13
11[V]x16, pos=mid, shots=1686.1694.8032.2963.1683.5573.2774.4669.1291.6275.2963.08
14
12[V]x16, pos=end, CSC, shots=164.6562.8914.6131.5449.6734.1450.6336.9571.4045.0432.40
15
13[V]x16, pos=end, CSC, shots=266.8978.8217.7039.6458.4542.3954.8645.1976.5652.9638.80
16
14[V]x16, pos=end, CSC, shots=474.9187.1022.9049.9670.4553.3360.2653.3183.4260.3644.93
17
15[V]x16, pos=end, CSC, shots=876.1292.6129.5258.3378.0063.0664.5258.4186.7167.4250.11
18
16[V]x16, pos=end, CSC, shots=1681.1095.5936.9362.8784.5572.2070.0263.8390.5873.2354.57
19
17[V]x16, pos=mid, CSC, shots=165.0762.5814.0428.9248.7934.2646.9235.6669.2042.7231.01
20
18[V]x16, pos=mid, CSC, shots=266.6976.7918.4839.6658.9942.6153.0644.7776.6552.2838.19
21
19[V]x16, pos=mid, CSC, shots=474.3587.0422.9250.3770.5553.6859.7153.0483.3159.2344.67
22
20[V]x16, pos=mid, CSC, shots=875.9692.5329.4757.8478.3763.2264.5558.3186.3367.9750.29
23
21[V]x16, pos=mid, CSC, shots=1680.8795.5237.4862.5784.8772.3270.0463.7690.5773.3554.65
24
22linear probe CLIP, shots=130.1458.0712.8929.5951.0024.6430.1332.8070.6241.4322.07
25
23linear probe CLIP, shots=243.4773.3517.8539.4861.5836.5342.7944.4478.7253.5531.95
26
24linear probe CLIP, shots=456.3584.8023.5750.0668.2748.4255.1554.5984.3462.2341.29
27
25linear probe CLIP, shots=865.9492.0029.5556.5676.9360.8263.8262.1787.7869.6449.55
28
26linear probe CLIP, shots=1676.4294.9536.3963.9782.7670.0870.1767.1590.6373.7255.87
29
27
30
28
31
29
32
30oxford_pets
oxford_flowers
fgvc_aircraft
describable textures
eurosat
stanford_cars
food101sun397caltech101ucf101imagenet
33
31How many context words to use?OxfordPetsFlowers102
FGVCAircraft
DTDEuroSAT
StanfordCars
Food101SUN397Caltech101UCF101ImageNet
34
32[V]x4, pos=end, shots=1687.1293.1828.4762.9283.0869.3676.5669.1991.4474.5163.3372.65
35
33[V]x8, pos=end, shots=1686.7594.2930.0262.9883.4171.3375.5869.5792.0275.9963.2773.20
36
34[V]x16, pos=end, shots=1687.0194.5131.2663.5883.5373.3674.6769.2691.8375.7162.9573.42
37
35[V]x4, pos=mid, shots=1686.7392.8328.4563.0281.7268.9376.6268.9191.7175.6663.2472.53
38
36[V]x8, pos=mid, shots=1686.4994.2931.1163.6784.0071.4175.6769.1091.7974.9163.2273.24
39
37[V]x16, pos=mid, shots=1686.1694.8032.2963.1683.5573.2774.4669.1291.6275.2963.0873.35
40
38
41
39Context initialization
42
40a photo of a {CLASS}86.3792.5328.1663.2483.3269.3677.1269.0591.4875.3663.1172.65
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100