{"id":527,"date":"2022-07-21T03:51:18","date_gmt":"2022-07-21T03:51:18","guid":{"rendered":"https:\/\/blog.liguanxin.cn\/?p=527"},"modified":"2022-07-21T03:51:18","modified_gmt":"2022-07-21T03:51:18","slug":"%e8%ae%ba%e6%96%87%e7%ac%94%e8%ae%b0-fast-vision-transformers-with-hilo-attention","status":"publish","type":"post","link":"https:\/\/blog.liguanxin.cn\/index.php\/2022\/07\/21\/%e8%ae%ba%e6%96%87%e7%ac%94%e8%ae%b0-fast-vision-transformers-with-hilo-attention\/","title":{"rendered":"\u8bba\u6587\u7b14\u8bb0\u2014\u2014Fast Vision Transformers with HiLo Attention"},"content":{"rendered":"<p><strong>\u521b\u65b0\u70b9\uff1a<br \/>\n\u2460\u9ad8\u9891\u6355\u6349\u5c40\u90e8\u7cbe\u7ec6\u6570\u636e\uff0c\u4f4e\u9891\u805a\u7126\u5168\u5c40\u7ed3\u6784<br \/>\n\u2461\u4e3a\u4e86\u533a\u5206\u4e0d\u540c\u9891\u7387\u7684\u72ec\u7279\u6027\u8d28\uff0c\u8ba9attention\u4e2d\u7684\u4e0d\u540c\u5934\u5206\u4e3a\u4e24\u7ec4\uff0c\u5206\u522b\u8fdb\u5165\u8fdb\u5165\u9ad8\\\u4f4e\u9891\u6ce8\u610f\u529b\u6a21\u5757\uff0c\u9ad8\u9891\u901a\u8fc7\u5c40\u90e8\u7a97\u53e3\u8ba1\u7b97\u81ea\u6ce8\u610f\u529b\uff0c\u800c\u4f4e\u9891\u901a\u8fc7\u5e73\u5747\u6c60\u5316K\u548cV\u6765\u8ba1\u7b97\u5168\u5c40\u81ea\u6ce8\u610f\u529b<br \/>\n\u2462\u5728GPU\u4e0a\u7684flop\u3001\u901f\u5ea6\u548c\u5185\u5b58\u6d88\u8017\u4f18\u4e8e\u73b0\u6709\u7684\u6ce8\u610f\u529b\u673a\u5236\u3002<br \/>\n<\/strong><\/p>\n<p>\u73b0\u6709\u65b9\u6cd5\u5b58\u5728\u7684\u95ee\u9898\uff1a\u5c3d\u7ba1\u73b0\u6709\u7684\u81ea\u6ce8\u610f\u529b\u673a\u5236\u5728\u4f4e\u5206\u8fa8\u7387\u56fe\u50cf\u4e0a\u6548\u679c\u5f88\u597d\uff0c\u4f46\u662f\u7531\u4e8e\u4e8c\u6b21\u590d\u6742\u5ea6\uff0c\u5728\u9ad8\u5206\u8fa8\u7387\u56fe\u50cf\u4e0a\u901f\u5ea6\u4f1a\u6162\u5f97\u591a\u3002<\/p>\n<h1>Attention\u7ed3\u6784<\/h1>\n<p><img src=\"https:\/\/blog.liguanxin.cn\/wp-content\/uploads\/2022\/07\/\u5fae\u4fe1\u622a\u56fe_20220720115537.png\" alt=\"\" \/><\/p>\n<p><strong>\u9ad8\u9891\u6ce8\u610f\u529b(Hi-Fi)\uff1a<\/strong>\u7531\u4e8e\u9ad8\u9891\u6570\u636e\u5173\u6ce8\u5c40\u90e8\u7ec6\u8282\uff0c\u5e94\u7528\u5168\u5c40\u6ce8\u610f\u529b\u53ef\u80fd\u662f\u5197\u4f59\u7684\uff0c\u4e8e\u662f\u91c7\u7528\u5c40\u90e8\u7a97\u53e3\u81ea\u6ce8\u610f\uff08\u4f8b\u59822\u00d72\u7a97\u53e3\uff09\u6355\u83b7\u7ec6\u7c92\u5ea6\uff0c\u4ece\u800c\u8282\u7701\u4e86\u663e\u8457\u7684\u8ba1\u7b97\u590d\u6742\u5ea6\u3002(\u65e0\u6ed1\u52a8\u6216\u8005\u591a\u5c3a\u5ea6\u7a97\u53e3)<\/p>\n<p><strong>\u4f4e\u9891\u6ce8\u610f\u529b(Lo-Fi)\uff1a<\/strong>\u5168\u5c40\u6ce8\u610f\u529b\u6709\u52a9\u4e8e\u6355\u83b7\u4f4e\u9891\uff0c\u4e3a\u4e86\u51cf\u5c11\u8ba1\u7b97\u590d\u6742\u5ea6\uff0c\u5bf9\u6bcf\u4e2a\u7a97\u53e3\u5e94\u7528\u5e73\u5747\u6c60\u5316\u6765\u83b7\u5f97<span class=\"katex-eq\" data-katex-display=\"false\">\\textbf{K}\\in\\mathbb{R}^{N\/s^2\\times D_h}<\/span>\u548c<span class=\"katex-eq\" data-katex-display=\"false\">\\textbf{V}\\in\\mathbb{R}^{N\/s^2\\times D_h}<\/span>\uff0c\u5176\u4e2d<span class=\"katex-eq\" data-katex-display=\"false\">s^2<\/span>\u662f\u7a97\u53e3\u5927\u5c0f\u3002<\/p>\n<p><strong>\u5206\u5934\u7b56\u7565\uff1a<\/strong><span class=\"katex-eq\" data-katex-display=\"false\">(1\u2212\\alpha )N_h<\/span>\u4e2a\u5934\u5c06\u7528\u4e8e\u9ad8\u9891\uff0c\u5176\u4ed6<span class=\"katex-eq\" data-katex-display=\"false\">\\alpha N_h<\/span>\u4e2a\u5934\u5c06\u7528\u4e8e\u4f4e\u9891\u3002<\/p>\n<p>\u76f8\u6bd4\u4e8eswin\u6216\u8005vit\uff0c\u672c\u6587\u63d0\u51fa\u7684\u65b9\u6cd5\u6bcf\u4e2a\u81ea\u6ce8\u610f\u529b\u6a21\u5757\u90fd\u663e\u8457\u51cf\u5c11\u4e86\u8ba1\u7b97\u91cf\u3002<\/p>\n<h2>\u4f4d\u7f6e\u7f16\u7801<\/h2>\n<p>\u5728\u6bcf\u4e2aFFN\u4e2d\u52a0\u5165\u96f6\u586b\u5145\u76843\u00d73\u7684DW\u5377\u79ef\u4f5c\u4e3a\u76f8\u5bf9\u4f4d\u7f6e\u7f16\u7801\u3002<\/p>\n<h1>\u603b\u4f53\u7ed3\u6784<\/h1>\n<p><img src=\"https:\/\/blog.liguanxin.cn\/wp-content\/uploads\/2022\/07\/\u5fae\u4fe1\u622a\u56fe_20220720170352.png\" alt=\"\" \/><\/p>\n<ul>\n<li>ConvFFN\u4ee3\u8868\u5e94\u7528\u4e86DW\u5377\u79ef\u7684FFN\u3002<\/li>\n<li>DTM\u6765\u81ea\u4e8eLITv1\u4e2d\u7684\u53ef\u53d8\u5f62token\u878d\u5408\u6a21\u5757\u3002<\/li>\n<\/ul>\n<h2>\u5b9e\u9a8c\u7ed3\u679c<\/h2>\n<p><img src=\"https:\/\/blog.liguanxin.cn\/wp-content\/uploads\/2022\/07\/\u5fae\u4fe1\u622a\u56fe_20220720170043.png\" alt=\"\" \/><br \/>\n\u53c2\u6570\u91cf\u5dee\u522b\u4e0d\u5927\uff0c\u63d0\u5347\u4e860.2\u4e2a\u70b9<br \/>\n\u76f8\u6bd4\u4e8ev1\uff0c\u63d0\u5347\u4e861\u4e2a\u591a\u70b9<\/p>\n<h1>CODE<\/h1>\n<pre><code class=\"language-python\">class HiLo(nn.Module):\n    &quot;&quot;&quot;\n    HiLo Attention\n    Link: https:\/\/arxiv.org\/abs\/2205.13213\n    &quot;&quot;&quot;\n    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., window_size=2, alpha=0.5):\n        super().__init__()\n        assert dim % num_heads == 0, f&quot;dim {dim} should be divided by num_heads {num_heads}.&quot;\n        head_dim = int(dim\/num_heads)\n        self.dim = dim\n\n        # self-attention heads in Lo-Fi\n        # \u4f4e\u9891\u7279\u5f81\n        self.l_heads = int(num_heads * alpha)\n        # token dimension in Lo-Fi\n        self.l_dim = self.l_heads * head_dim\n\n        # self-attention heads in Hi-Fi\n        # \u9ad8\u9891\u7279\u5f81\n        self.h_heads = num_heads - self.l_heads\n        # token dimension in Hi-Fi\n        self.h_dim = self.h_heads * head_dim\n\n        # local window size. The `s` in our paper.\n        self.ws = window_size\n\n        if self.ws == 1:\n            # \u7a97\u53e3\u662f1\u7684\u60c5\u51b5\u7b49\u4e8e\u6807\u51c6\u591a\u5934\u81ea\u6ce8\u610f\u529b\n            self.h_heads = 0\n            self.h_dim = 0\n            self.l_heads = num_heads\n            self.l_dim = dim\n\n        self.scale = qk_scale or head_dim ** -0.5\n\n        # Low frequence attention (Lo-Fi)\n        if self.l_heads &gt; 0:\n            # \u7a97\u53e3\u5927\u5c0f\u4e0d\u7b49\u4e8e1\u5219\u6839\u636e\u7a97\u53e3\u505a\u5e73\u5747\u6c60\u5316\n            if self.ws != 1:\n                self.sr = nn.AvgPool2d(kernel_size=window_size, stride=window_size)\n            self.l_q = nn.Linear(self.dim, self.l_dim, bias=qkv_bias)\n            self.l_kv = nn.Linear(self.dim, self.l_dim * 2, bias=qkv_bias)\n            self.l_proj = nn.Linear(self.l_dim, self.l_dim)\n\n        # High frequence attention (Hi-Fi)\n        if self.h_heads &gt; 0:\n            self.h_qkv = nn.Linear(self.dim, self.h_dim * 3, bias=qkv_bias)\n            self.h_proj = nn.Linear(self.h_dim, self.h_dim)\n\n    # \u9ad8\u9891\u81ea\u6ce8\u610f\u529b\n    def hifi(self, x):\n        B, H, W, C = x.shape\n        h_group, w_group = H \/\/ self.ws, W \/\/ self.ws\n\n        # \u7a97\u53e3\u6570\u91cf\n        total_groups = h_group * w_group\n\n        x = x.reshape(B, h_group, self.ws, w_group, self.ws, C).transpose(2, 3)\n\n        # \u53ea\u5728\u7a97\u53e3\u5185\u505a\u81ea\u6ce8\u610f\u529b\n        qkv = self.h_qkv(x).reshape(B, total_groups, -1, 3, self.h_heads, self.h_dim \/\/ self.h_heads).permute(3, 0, 1, 4, 2, 5)\n        q, k, v = qkv[0], qkv[1], qkv[2]  # B, hw, n_head, ws*ws, head_dim\n\n        attn = (q @ k.transpose(-2, -1)) * self.scale  # B, hw, n_head, ws*ws, ws*ws\n        attn = attn.softmax(dim=-1)\n        attn = (attn @ v).transpose(2, 3).reshape(B, h_group, w_group, self.ws, self.ws, self.h_dim)\n        x = attn.transpose(2, 3).reshape(B, h_group * self.ws, w_group * self.ws, self.h_dim)\n\n        x = self.h_proj(x)\n        return x\n\n    def lofi(self, x):\n        B, H, W, C = x.shape\n\n        # \u5168\u5c40\u81ea\u6ce8\u610f\u529b\n        q = self.l_q(x).reshape(B, H * W, self.l_heads, self.l_dim \/\/ self.l_heads).permute(0, 2, 1, 3)\n\n        if self.ws &gt; 1:\n            x_ = x.permute(0, 3, 1, 2)\n            # \u5bf9kv\u8fdb\u884c\u5e73\u5747\u6c60\u5316\n            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)\n            kv = self.l_kv(x_).reshape(B, -1, 2, self.l_heads, self.l_dim \/\/ self.l_heads).permute(2, 0, 3, 1, 4)\n        else:\n            kv = self.l_kv(x).reshape(B, -1, 2, self.l_heads, self.l_dim \/\/ self.l_heads).permute(2, 0, 3, 1, 4)\n        k, v = kv[0], kv[1]\n\n        attn = (q @ k.transpose(-2, -1)) * self.scale\n        attn = attn.softmax(dim=-1)\n\n        x = (attn @ v).transpose(1, 2).reshape(B, H, W, self.l_dim)\n        x = self.l_proj(x)\n        return x\n\n    def forward(self, x):\n        B, N, C = x.shape\n        H = W = int(N ** 0.5)\n\n        x = x.reshape(B, H, W, C)\n\n        if self.h_heads == 0:\n            x = self.lofi(x)\n            return x.reshape(B, N, C)\n\n        if self.l_heads == 0:\n            x = self.hifi(x)\n            return x.reshape(B, N, C)\n\n        hifi_out = self.hifi(x)\n        lofi_out = self.lofi(x)\n\n        x = torch.cat((hifi_out, lofi_out), dim=-1)\n        x = x.reshape(B, N, C)\n        return x<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u521b\u65b0\u70b9\uff1a \u2460\u9ad8\u9891\u6355\u6349\u5c40\u90e8\u7cbe\u7ec6\u6570\u636e\uff0c\u4f4e\u9891\u805a\u7126\u5168\u5c40\u7ed3\u6784 \u2461\u4e3a\u4e86\u533a\u5206\u4e0d\u540c\u9891\u7387\u7684\u72ec\u7279\u6027\u8d28\uff0c\u8ba9attention\u4e2d\u7684\u4e0d\u540c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[6],"tags":[13,17,11],"_links":{"self":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/posts\/527"}],"collection":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/comments?post=527"}],"version-history":[{"count":0,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/posts\/527\/revisions"}],"wp:attachment":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/media?parent=527"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/categories?post=527"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/tags?post=527"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}