From 82d20201d05e39b03c3844c85429cb8796ad0580 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Tue, 4 Sep 2018 15:13:17 +0000 Subject: [PATCH] [libomptarget][NVPTX] Drop dead code and data structures, NFCI. * cg and HasCancel in WorkDescr were never read and can be removed. * This eliminates the last use of priv in ThreadPrivateContext. * CounterGroup is unused afterwards. * Remove duplicate external declares in omptarget-nvptx.cu that are already in the header omptarget-nvptx.h. Differential Revision: https://reviews.llvm.org/D51622 llvm-svn: 341370 --- .../deviceRTLs/nvptx/src/counter_group.h | 51 ------------ .../deviceRTLs/nvptx/src/counter_groupi.h | 82 ------------------- .../deviceRTLs/nvptx/src/interface.h | 2 + .../deviceRTLs/nvptx/src/omptarget-nvptx.cu | 19 ----- .../deviceRTLs/nvptx/src/omptarget-nvptx.h | 12 --- .../deviceRTLs/nvptx/src/omptarget-nvptxi.h | 14 ---- .../deviceRTLs/nvptx/src/option.h | 7 -- .../deviceRTLs/nvptx/src/parallel.cu | 4 - 8 files changed, 2 insertions(+), 189 deletions(-) delete mode 100644 openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h delete mode 100644 openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h b/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h deleted file mode 100644 index b183871e73a5..000000000000 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h +++ /dev/null @@ -1,51 +0,0 @@ -//===------ counter_group.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// -// -// Interface to implement OpenMP loop scheduling -// -//===----------------------------------------------------------------------===// - -#ifndef _OMPTARGET_NVPTX_COUNTER_GROUP_H_ -#define _OMPTARGET_NVPTX_COUNTER_GROUP_H_ - -#include "option.h" - -// counter group type for synchronizations -class omptarget_nvptx_CounterGroup { -public: - // getters and setters - INLINE Counter &Event() { return v_event; } - INLINE volatile Counter &Start() { return v_start; } - INLINE Counter &Init() { return v_init; } - - // Synchronization Interface - - INLINE void Clear(); // first time start=event - INLINE void Reset(); // init = first - INLINE void Init(Counter &priv); // priv = init - INLINE Counter Next(); // just counts number of events - - // set priv to n, to be used in later waitOrRelease - INLINE void Complete(Counter &priv, Counter n); - - // check priv and decide if we have to wait or can free the other warps - INLINE void Release(Counter priv, Counter current_event_value); - INLINE void WaitOrRelease(Counter priv, Counter current_event_value); - -private: - Counter v_event; // counter of events (atomic) - - // volatile is needed to force loads to read from global - // memory or L2 cache and see the write by the last master - volatile Counter v_start; // signal when events registered are finished - - Counter v_init; // used to initialize local thread variables -}; - -#endif /* SRC_COUNTER_GROUP_H_ */ diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h b/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h deleted file mode 100644 index f34de3e46b85..000000000000 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h +++ /dev/null @@ -1,82 +0,0 @@ -//===----- counter_groupi.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// -// -// Interface implementation for OpenMP loop scheduling -// -//===----------------------------------------------------------------------===// - -#include "option.h" - -INLINE void omptarget_nvptx_CounterGroup::Clear() { - PRINT0(LD_SYNCD, "clear counters\n") - v_event = 0; - v_start = 0; - // v_init does not need to be reset (its value is dead) -} - -INLINE void omptarget_nvptx_CounterGroup::Reset() { - // done by master before entering parallel - ASSERT(LT_FUSSY, v_event == v_start, - "error, entry %lld !=start %lld at reset\n", P64(v_event), - P64(v_start)); - v_init = v_start; -} - -INLINE void omptarget_nvptx_CounterGroup::Init(Counter &priv) { - PRINT(LD_SYNCD, "init priv counter 0x%llx with val %lld\n", P64(&priv), - P64(v_start)); - priv = v_start; -} - -// just counts number of events -INLINE Counter omptarget_nvptx_CounterGroup::Next() { - Counter oldVal = atomicAdd(&v_event, (Counter)1); - PRINT(LD_SYNCD, "next event counter 0x%llx with val %lld->%lld\n", - P64(&v_event), P64(oldVal), P64(oldVal + 1)); - - return oldVal; -} - -// set priv to n, to be used in later waitOrRelease -INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) { - PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n", - P64(&priv), P64(priv), P64(priv + n), n); - priv += n; -} - -INLINE void omptarget_nvptx_CounterGroup::Release(Counter priv, - Counter current_event_value) { - if (priv - 1 == current_event_value) { - PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n", - P64(&v_start), P64(v_start), P64(priv)); - v_start = priv; - } -} - -// check priv and decide if we have to wait or can free the other warps -INLINE void -omptarget_nvptx_CounterGroup::WaitOrRelease(Counter priv, - Counter current_event_value) { - if (priv - 1 == current_event_value) { - PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n", - P64(&v_start), P64(v_start), P64(priv)); - v_start = priv; - } else { - PRINT(LD_SYNCD, - "Start waiting while start counter 0x%llx with val %lld < %lld\n", - P64(&v_start), P64(v_start), P64(priv)); - while (priv > v_start) { - // IDLE LOOP - // start is volatile: it will be re-loaded at each while loop - } - PRINT(LD_SYNCD, - "Done waiting as start counter 0x%llx with val %lld >= %lld\n", - P64(&v_start), P64(v_start), P64(priv)); - } -} diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h index d958a074ab0c..c3f9f702295b 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h @@ -20,6 +20,8 @@ #ifndef _INTERFACES_H_ #define _INTERFACES_H_ +#include "option.h" + //////////////////////////////////////////////////////////////////////////////// // OpenMP interface //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu index 0f7deae51e3e..fb28c4c589f9 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -21,25 +21,10 @@ extern __device__ omptarget_nvptx_Queue omptarget_nvptx_device_State[MAX_SM]; -extern __device__ __shared__ - omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; - extern __device__ omptarget_nvptx_Queue< omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT> omptarget_nvptx_device_simpleState[MAX_SM]; -extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext - *omptarget_nvptx_simpleThreadPrivateContext; - -// -// The team master sets the outlined function and its arguments in these -// variables to communicate with the workers. Since they are in shared memory, -// there is one copy of these variables for each kernel, instance, and team. -// -extern volatile __device__ __shared__ omptarget_nvptx_WorkFn - omptarget_nvptx_workFn; -extern __device__ __shared__ uint32_t execution_param; - //////////////////////////////////////////////////////////////////////////////// // init entry points //////////////////////////////////////////////////////////////////////////////// @@ -146,8 +131,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); // init team context currTeamDescr.InitTeamDescr(); - // init counters (copy start to init) - workDescr.CounterGroup().Reset(); } __syncthreads(); @@ -168,8 +151,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, newTaskDescr); // init thread private from init value - workDescr.CounterGroup().Init( - omptarget_nvptx_threadPrivateContext->Priv(threadId)); PRINT(LD_PAR, "thread will execute parallel region with id %d in a team of " "%d threads\n", diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 7058d6050f43..f6e35a4b1208 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -26,7 +26,6 @@ #include // local includes -#include "counter_group.h" #include "debug.h" // debug #include "interface.h" // interfaces with omp, compiler, and user #include "option.h" // choices we have @@ -242,15 +241,10 @@ class omptarget_nvptx_WorkDescr { public: // access to data - INLINE omptarget_nvptx_CounterGroup &CounterGroup() { return cg; } INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; } - // init - INLINE void InitWorkDescr(); private: - omptarget_nvptx_CounterGroup cg; // for barrier (no other needed) omptarget_nvptx_TaskDescr masterTaskICV; - bool hasCancel; }; //////////////////////////////////////////////////////////////////////////////// @@ -347,9 +341,6 @@ public: INLINE uint16_t &SimdLimitForNextSimd(int tid) { return nextRegion.slim[tid]; } - // sync - INLINE Counter &Priv(int tid) { return priv[tid]; } - INLINE void IncrementPriv(int tid, Counter val) { priv[tid] += val; } // schedule (for dispatch) INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; } INLINE int64_t &Chunk(int tid) { return chunk[tid]; } @@ -377,8 +368,6 @@ private: // simd limit uint16_t slim[MAX_THREADS_PER_TEAM]; } nextRegion; - // sync - Counter priv[MAX_THREADS_PER_TEAM]; // schedule (for dispatch) kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for int64_t chunk[MAX_THREADS_PER_TEAM]; @@ -469,7 +458,6 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId); // inlined implementation //////////////////////////////////////////////////////////////////////////////// -#include "counter_groupi.h" #include "omptarget-nvptxi.h" #include "supporti.h" diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h index 086f4c5d2c95..1cca8201b35f 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h @@ -168,31 +168,17 @@ omptarget_nvptx_ThreadPrivateContext::InitThreadPrivateContext(int tid) { topTaskDescr[tid] = NULL; // no num threads value has been pushed nextRegion.tnum[tid] = 0; - // priv counter init to zero - priv[tid] = 0; // the following don't need to be init here; they are init when using dyn // sched // current_Event, events_Number, chunk, num_Iterations, schedule } -//////////////////////////////////////////////////////////////////////////////// -// Work Descriptor -//////////////////////////////////////////////////////////////////////////////// - -INLINE void omptarget_nvptx_WorkDescr::InitWorkDescr() { - cg.Clear(); // start and stop to zero too - // threadsInParallelTeam does not need to be init (done in start parallel) - hasCancel = FALSE; -} - //////////////////////////////////////////////////////////////////////////////// // Team Descriptor //////////////////////////////////////////////////////////////////////////////// INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() { levelZeroTaskDescr.InitLevelZeroTaskDescr(); - workDescrForActiveParallel.InitWorkDescr(); - // omp_init_lock(criticalLock); } //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h index 43172ad45d07..791d6f3917fe 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h @@ -46,13 +46,6 @@ // algo options //////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////// -// data options -//////////////////////////////////////////////////////////////////////////////// - -// decide if counters are 32 or 64 bit -#define Counter unsigned long long - //////////////////////////////////////////////////////////////////////////////// // misc options (by def everythig here is device) //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 6157aa20794f..f0ba41bd18ec 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -306,8 +306,6 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr, CudaThreadsForParallel / NumLanes); - // init counters (copy start to init) - workDescr.CounterGroup().Reset(); } // All workers call this function. Deactivate those not needed. @@ -345,8 +343,6 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn, omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId, newTaskDescr); // init private from int value - workDescr.CounterGroup().Init( - omptarget_nvptx_threadPrivateContext->Priv(threadId)); PRINT(LD_PAR, "thread will execute parallel region with id %d in a team of " "%d threads\n", -- GitLab