1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides a class for CUDA code generation targeting the NVIDIA CUDA
// runtime library.
//
//===----------------------------------------------------------------------===//
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/Decl.h"
#include "llvm/BasicBlock.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Support/CallSite.h"
#include <vector>
using namespace clang;
using namespace CodeGen;
namespace {
class CGNVCUDARuntime : public CGCUDARuntime {
private:
llvm::Type *IntTy, *SizeTy;
llvm::PointerType *CharPtrTy, *VoidPtrTy;
llvm::Constant *getSetupArgumentFn() const;
llvm::Constant *getLaunchFn() const;
public:
CGNVCUDARuntime(CodeGenModule &CGM);
void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
};
}
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
IntTy = Types.ConvertType(Ctx.IntTy);
SizeTy = Types.ConvertType(Ctx.getSizeType());
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
}
llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
std::vector<llvm::Type*> Params;
Params.push_back(VoidPtrTy);
Params.push_back(SizeTy);
Params.push_back(SizeTy);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
Params, false),
"cudaSetupArgument");
}
llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
// cudaError_t cudaLaunch(char *)
std::vector<llvm::Type*> Params;
Params.push_back(CharPtrTy);
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
Params, false),
"cudaLaunch");
}
void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
FunctionArgList &Args) {
// Build the argument value list and the argument stack struct type.
llvm::SmallVector<llvm::Value *, 16> ArgValues;
std::vector<llvm::Type *> ArgTypes;
for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
ArgValues.push_back(V);
assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
}
llvm::StructType *ArgStackTy = llvm::StructType::get(
CGF.getLLVMContext(), ArgTypes);
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
// Emit the calls to cudaSetupArgument
llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
for (unsigned I = 0, E = Args.size(); I != E; ++I) {
llvm::Value *Args[3];
llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
Args[1] = CGF.Builder.CreateIntCast(
llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
SizeTy, false);
Args[2] = CGF.Builder.CreateIntCast(
llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
SizeTy, false);
llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
CGF.EmitBlock(NextBlock);
}
// Emit the call to cudaLaunch
llvm::Constant *cudaLaunchFn = getLaunchFn();
llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
CGF.EmitBranch(EndBlock);
CGF.EmitBlock(EndBlock);
}
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
return new CGNVCUDARuntime(CGM);
}
|