{"id":459,"date":"2022-06-30T03:31:05","date_gmt":"2022-06-30T03:31:05","guid":{"rendered":"https:\/\/blog.liguanxin.cn\/?p=459"},"modified":"2022-06-30T03:31:05","modified_gmt":"2022-06-30T03:31:05","slug":"mindspore%e7%ae%97%e5%ad%90%e5%bc%80%e5%8f%91-cuda%e4%bb%a3%e7%a0%81%e4%bc%98%e5%8c%96","status":"publish","type":"post","link":"https:\/\/blog.liguanxin.cn\/index.php\/2022\/06\/30\/mindspore%e7%ae%97%e5%ad%90%e5%bc%80%e5%8f%91-cuda%e4%bb%a3%e7%a0%81%e4%bc%98%e5%8c%96\/","title":{"rendered":"MindSpore\u7b97\u5b50\u5f00\u53d1\u2014\u2014CUDA\u4ee3\u7801\u4f18\u5316"},"content":{"rendered":"<p>CUDA\u7f16\u7a0b\u5165\u95e8\uff1a<a href=\"https:\/\/face2ai.com\/program-blog\/\">\u8c2d\u5347\u7684\u535a\u5ba2<\/a><\/p>\n<p>\u5bf9\u4e8ecuda\u7f16\u7a0b\uff0c\u6838\u5fc3\u662f\u5982\u4f55\u9ad8\u6548\u7387\u5730\u5229\u7528\u591a\u7ebf\u7a0b\uff0c\u6bcf\u4e2a\u7ebf\u7a0b\u5b8c\u6210\u4e00\u4e2a\u5c0f\u4efb\u52a1\uff0c\u6700\u7ec8\u5b9e\u73b0\u5b8c\u6210\u4e00\u4e2a\u7b97\u5b50\u4efb\u52a1\u3002<br \/>\n<img src=\"https:\/\/blog.liguanxin.cn\/wp-content\/uploads\/2022\/06\/4.png\" alt=\"\" \/><\/p>\n<p>\u4e00\u4e2a\u6838\u51fd\u6570\u53ea\u80fd\u6709\u4e00\u4e2agrid\uff0c\u4e00\u4e2agrid\u53ef\u4ee5\u6709\u5f88\u591a\u4e2a\u5757\uff0c\u6bcf\u4e2a\u5757\u53ef\u4ee5\u6709\u5f88\u591a\u7684\u7ebf\u7a0b\u3002<\/p>\n<p>\u6bcf\u4e2a\u7ebf\u7a0b\u901a\u8fc7\u7ebf\u7a0b\u53f7\u552f\u4e00\u6807\u8bc6\uff0c\u7ebf\u7a0b\u53f7\u5730\u83b7\u5f97\u901a\u8fc7\uff1a<\/p>\n<pre><code class=\"language-c++\">    int64_t ourOutputPoint = threadIdx.x + blockIdx.x * blockDim.x;\n    int64_t plane = blockIdx.y;\n    int64_t batch = blockIdx.z;<\/code><\/pre>\n<p>\u56e0\u4e3a\u4e00\u4e2agrid\u91cc\u9762\u7684block\u662f\u4e09\u7ef4\u7684\uff0c\u5bf9\u4e8eNCHW\u7c7b\u578b\u7684Tensor\uff0c\u901a\u5e38pytorch\u4f1a\u628aN\u7ef4\u653e\u5728blockIdx.z\uff0cC\u7ef4\u653e\u5728blockIdx.y\uff0c\u4ece\u800c\u9ad8\u6548\u5730\u5229\u7528\u7ebf\u7a0b\u3002<\/p>\n<hr \/>\n<p>\u58f0\u660egrid\u548cblock\u7684\u65f6\u5019\uff0c\u6709\u53ef\u80fd\u56e0\u4e3ashape\u592a\u5927\u5bfc\u81f4\u58f0\u660e\u7684\u7ebf\u7a0b\u5757\u4e0d\u8db3\uff0c\u5bfc\u81f4\u62a5\u9519\uff0c\u4e8e\u662f\u8981\u8bbe\u7f6e\u4e00\u4e2a\u6700\u5927block\u548cthreads\u6570\uff1a(\u4ee3\u7801\u6765\u81eapytorch)<\/p>\n<pre><code class=\"language-c++\">    dim3 grid(\n      (H*W + 127) \/ 128, \/\/ ceil(outputPlaneSize \/ 128)\n      C,\n      N);\n    dim3 block(H*W &gt; 128 ? 128 : H*W);<\/code><\/pre>\n<hr \/>\n<p>\u6709\u53ef\u80fd\u56e0\u4e3a\u5143\u7d20\u592a\u591a\uff08\u4ebf\u4e07\u7ea7\uff09\uff0c\u5bfc\u81f4\u7ebf\u7a0b\u6570\u4e0d\u8db3\u4ee5\u5b8c\u6210\u5982\u6b64\u591a\u4efb\u52a1\uff0c\u4e8e\u662f\u53ef\u4ee5\u8ba9\u6bcf\u4e2a\u7ebf\u7a0b\u5b8c\u6210\u4e24\u4e2a\u53ca\u4ee5\u4e0a\u7684\u4efb\u52a1\uff1a\uff08\u4ee3\u7801\u6765\u81eamindspore\uff09<\/p>\n<pre><code class=\"language-c++\">template &lt;&gt;\n__global__ void Fractionalmaxpool3dwithfixedksize) {\n  for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos &lt; outer_size; pos += blockDim.x * gridDim.x) {\n    \u5355\u4e2a\u7ebf\u7a0b\u7684\u4efb\u52a1\n  }\n  return;\n}<\/code><\/pre>\n<hr \/>\n<p>\u5728\u8ba1\u7b97\u67d0\u4e9b\u7b97\u5b50\u65f6\uff0c\u7531\u4e8eGPU\u7684\u5730\u5740\u7a7a\u95f4\u4e0d\u4f1a\u521d\u59cb\u5316\u4e3a0\uff0c\u6240\u4ee5\u53ef\u80fd\u9700\u8981\u624b\u52a8\u521d\u59cb\u5316\uff1a\uff08\u4ee3\u7801\u6765\u81eamindspore\uff09<\/p>\n<pre><code class=\"language-c++\">template &lt;typename T&gt;\n__global__ void InitOutput(T *output, const int64_t outer_size) {\n  T zero = 0;\n  for (size_t id = blockIdx.x * blockDim.x + threadIdx.x; id &lt; outer_size; id += blockDim.x * gridDim.x) {\n    output[id] = zero;\n  }\n  return;\n}<\/code><\/pre>\n<hr \/>\n<p>\u5728\u591a\u4e2a\u7ebf\u7a0b\u4e89\u593a\u540c\u4e00\u4e2a\u5730\u5740\u65f6\uff0c\u5584\u7528\u539f\u5b50\u52a0\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>CUDA\u7f16\u7a0b\u5165\u95e8\uff1a\u8c2d\u5347\u7684\u535a\u5ba2 \u5bf9\u4e8ecuda\u7f16\u7a0b\uff0c\u6838\u5fc3\u662f\u5982\u4f55\u9ad8\u6548\u7387\u5730\u5229\u7528\u591a\u7ebf\u7a0b\uff0c\u6bcf\u4e2a\u7ebf\u7a0b\u5b8c\u6210\u4e00\u4e2a\u5c0f\u4efb\u52a1\uff0c\u6700\u7ec8\u5b9e\u73b0 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[29],"tags":[30],"_links":{"self":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/posts\/459"}],"collection":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/comments?post=459"}],"version-history":[{"count":0,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/posts\/459\/revisions"}],"wp:attachment":[{"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/media?parent=459"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/categories?post=459"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.liguanxin.cn\/index.php\/wp-json\/wp\/v2\/tags?post=459"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}