Skip to content

Commit

Permalink
Merge pull request #340 from jjfumero/feat/api/info
Browse files Browse the repository at this point in the history
New calls to profile the Execution Plan: withThreadInfo and withPrintKernel
  • Loading branch information
jjfumero authored Feb 26, 2024
2 parents 77dfc9b + 3f94c3a commit c36582a
Show file tree
Hide file tree
Showing 30 changed files with 364 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,27 @@ void disableProfiler(ProfilerMode profilerMode) {
taskGraph.disableProfiler(profilerMode);
}

public void withConcurrentDevices() {
void withConcurrentDevices() {
taskGraph.withConcurrentDevices();
}

public void withoutConcurrentDevices() {
void withoutConcurrentDevices() {
taskGraph.withoutConcurrentDevices();
}

void withThreadInfo() {
taskGraph.withThreadInfo();
}

void withoutThreadInfo() {
taskGraph.withoutThreadInfo();
}

void withPrintKernel() {
taskGraph.withPrintKernel();
}

void withoutPrintKernel() {
taskGraph.withoutPrintKernel();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -897,12 +897,27 @@ void disableProfiler(ProfilerMode profilerMode) {
taskGraphImpl.disableProfiler(profilerMode);
}

public void withConcurrentDevices() {
void withConcurrentDevices() {
taskGraphImpl.withConcurrentDevices();
}

public void withoutConcurrentDevices() {
void withoutConcurrentDevices() {
taskGraphImpl.withoutConcurrentDevices();
}

void withThreadInfo() {
taskGraphImpl.withThreadInfo();
}

void withoutThreadInfo() {
taskGraphImpl.withoutThreadInfo();
}

void withPrintKernel() {
taskGraphImpl.withPrintKernel();
}

void withoutPrintKernel() {
taskGraphImpl.withoutPrintKernel();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,26 @@ public TornadoExecutionPlan clearProfiles() {
return this;
}

public TornadoExecutionPlan withThreadInfo() {
tornadoExecutor.withThreadInfo();
return this;
}

public TornadoExecutionPlan withoutThreadInfo() {
tornadoExecutor.withoutThreadInfo();
return this;
}

public TornadoExecutionPlan withPrintKernel() {
tornadoExecutor.withPrintKernel();
return this;
}

public TornadoExecutionPlan withoutPrintKernel() {
tornadoExecutor.withoutPrintKernel();
return this;
}

static class TornadoExecutor {

private List<ImmutableTaskGraph> immutableTaskGraphList;
Expand Down Expand Up @@ -497,5 +517,21 @@ void enableProfiler(ProfilerMode profilerMode) {
void disableProfiler(ProfilerMode profilerMode) {
immutableTaskGraphList.forEach(immutableTaskGraph -> immutableTaskGraph.disableProfiler(profilerMode));
}

void withThreadInfo() {
immutableTaskGraphList.forEach(ImmutableTaskGraph::withThreadInfo);
}

void withoutThreadInfo() {
immutableTaskGraphList.forEach(ImmutableTaskGraph::withoutThreadInfo);
}

void withPrintKernel() {
immutableTaskGraphList.forEach(ImmutableTaskGraph::withPrintKernel);
}

void withoutPrintKernel() {
immutableTaskGraphList.forEach(ImmutableTaskGraph::withoutPrintKernel);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,12 @@ public interface TornadoTaskGraphInterface extends ProfileInterface {
void withConcurrentDevices();

void withoutConcurrentDevices();

void withThreadInfo();

void withoutThreadInfo();

void withPrintKernel();

void withoutPrintKernel();
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,8 @@ public interface TaskMetaDataInterface {
int getDeviceIndex();

void setDevice(TornadoDevice device);

boolean isPrintKernelEnabled();

void setPrintKernelFlag(boolean printKernelEnabled);
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
public class OCLCodeCache {

private static final String FALSE = "False";
private static final String TRUE = "True";
private static final int SPIRV_MAGIC_NUMBER = 119734787;
private static final String OPENCL_SOURCE_SUFFIX = ".cl";
private final boolean OPENCL_CACHE_ENABLE = Boolean.parseBoolean(getProperty("tornado.opencl.codecache.enable", FALSE));
Expand Down Expand Up @@ -478,15 +477,17 @@ private String getDeviceVendor() {
return deviceContext.getPlatformContext().getPlatform().getVendor().toLowerCase().split("\\(")[0];
}

OCLInstalledCode installFPGASource(String id, String entryPoint, byte[] source, boolean shouldCompile) { // TODO Override this method for each FPGA backend
OCLInstalledCode installFPGASource(String id, String entryPoint, byte[] source, boolean shouldCompile, boolean printKernel) { // TODO Override this method for each FPGA backend
String[] compilationCommand;
final String inputFile = fpgaSourceDir + entryPoint + OPENCL_SOURCE_SUFFIX;
final String outputFile = fpgaSourceDir + entryPoint;
File fpgaBitStreamFile = new File(outputFile);

appendSourceToFile(source, entryPoint);

RuntimeUtilities.maybePrintSource(source);
if (printKernel) {
RuntimeUtilities.dumpKernel(source);
}

String[] commandRename;
String[] linkCommand = null;
Expand Down Expand Up @@ -583,7 +584,9 @@ public OCLInstalledCode installSource(TaskMetaData meta, String id, String entry
appendSourceToFile(source, entryPoint);
}

RuntimeUtilities.maybePrintSource(source);
if (meta.isPrintKernelEnabled()) {
RuntimeUtilities.dumpKernel(source);
}

final long t0 = System.nanoTime();
program.build(meta.getCompilerFlags());
Expand All @@ -599,16 +602,16 @@ public OCLInstalledCode installSource(TaskMetaData meta, String id, String entry
debug(log);
}
final Path outDir = resolveLogDirectory();
final String identifier = id + "-" + entryPoint;
final String identifier = STR."\{id}-\{entryPoint}";
error("Unable to compile task %s: check logs at %s/%s.log", identifier, outDir.toAbsolutePath(), identifier);

File file = new File(outDir + "/" + identifier + ".log");
File file = new File(STR."\{outDir}/\{identifier}.log");
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(log.getBytes());
} catch (IOException e) {
error("unable to write error log: ", e.getMessage());
}
file = new File(outDir + "/" + identifier + OPENCL_SOURCE_SUFFIX);
file = new File(STR."\{outDir}/\{identifier}\{OPENCL_SOURCE_SUFFIX}");
try (FileOutputStream fos = new FileOutputStream(file)) {
fos.write(source);
} catch (IOException e) {
Expand All @@ -633,13 +636,13 @@ public OCLInstalledCode installSource(TaskMetaData meta, String id, String entry
if (meta.shouldPrintCompileTimes()) {
debug("compile: kernel %s opencl %.9f\n", entryPoint, (t1 - t0) * 1e-9f);
}
cache.put(id + "-" + entryPoint, code);
cache.put(STR."\{id}-\{entryPoint}", code);

// BUG Apple does not seem to like implementing the OpenCL spec
// properly, this causes a sigfault.
// properly, this causes a SIGFAULT.
if ((OPENCL_CACHE_ENABLE || OPENCL_DUMP_BINS) && !deviceContext.getPlatformContext().getPlatform().getVendor().equalsIgnoreCase("Apple")) {
final Path outDir = resolveCacheDirectory();
program.dumpBinaries(outDir.toAbsolutePath() + "/" + entryPoint);
program.dumpBinaries(STR."\{outDir.toAbsolutePath()}/\{entryPoint}");
}
} else {
warn("\tunable to compile %s", entryPoint);
Expand Down Expand Up @@ -672,11 +675,11 @@ private OCLInstalledCode installBinary(String id, String entryPoint, byte[] bina
long afterLoad = (TornadoOptions.TIME_IN_NANOSECONDS) ? System.nanoTime() : System.currentTimeMillis();

if (PRINT_LOAD_TIME) {
System.out.println("Binary load time: " + (afterLoad - beforeLoad) + (TornadoOptions.TIME_IN_NANOSECONDS ? " ns" : " ms") + " \n");
System.out.println(STR."Binary load time: \{afterLoad - beforeLoad}\{TornadoOptions.TIME_IN_NANOSECONDS ? " ns" : " ms"} \n");
}

if (program == null) {
throw new OCLException("unable to load binary for " + entryPoint);
throw new OCLException(STR."unable to load binary for \{entryPoint}");
}

program.build("");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,16 +394,19 @@ public int readBuffer(long bufferId, long offset, long bytes, long hostPointer,
: null), EventDescriptor.DESC_READ_SEGMENT, queue);
}

@Override
public int enqueueBarrier(int[] events) {
long oclEvent = queue.enqueueBarrier(oclEventPool.serialiseEvents(events, queue) ? oclEventPool.waitEventsBuffer : null);
return queue.getOpenclVersion() < 120 ? -1 : oclEventPool.registerEvent(oclEvent, EventDescriptor.DESC_SYNC_BARRIER, queue);
}

@Override
public int enqueueMarker(int[] events) {
long oclEvent = queue.enqueueMarker(oclEventPool.serialiseEvents(events, queue) ? oclEventPool.waitEventsBuffer : null);
return queue.getOpenclVersion() < 120 ? -1 : oclEventPool.registerEvent(oclEvent, EventDescriptor.DESC_SYNC_MARKER, queue);
}

@Override
public void reset() {
oclEventPool.reset();
codeCache.reset();
Expand Down Expand Up @@ -483,13 +486,15 @@ public void retainEvent(int localEventId) {
oclEventPool.retainEvent(localEventId);
}

@Override
public Event resolveEvent(int event) {
if (event == -1) {
return EMPTY_EVENT;
}
return new OCLEvent(oclEventPool.getDescriptor(event).getNameDescription(), queue, event, oclEventPool.getOCLEvent(event));
}

@Override
public void flush() {
queue.flush();
}
Expand All @@ -498,10 +503,12 @@ public void finish() {
queue.finish();
}

@Override
public void flushEvents() {
queue.flushEvents();
}

@Override
public boolean isKernelAvailable() {
return codeCache.isKernelAvailable();
}
Expand All @@ -510,15 +517,18 @@ public OCLInstalledCode installCode(OCLCompilationResult result) {
return installCode(result.getMeta(), result.getId(), result.getName(), result.getTargetCode());
}

@Override
public OCLInstalledCode installCode(TaskMetaData meta, String id, String entryPoint, byte[] code) {
entryPoint = checkKernelName(entryPoint);
return codeCache.installSource(meta, id, entryPoint, code);
}

public OCLInstalledCode installCode(String id, String entryPoint, byte[] code, boolean shouldCompile) {
return codeCache.installFPGASource(id, entryPoint, code, shouldCompile);
@Override
public OCLInstalledCode installCode(String id, String entryPoint, byte[] code, boolean shouldCompile, boolean printKernel) {
return codeCache.installFPGASource(id, entryPoint, code, shouldCompile, printKernel);
}

@Override
public boolean isCached(String id, String entryPoint) {
entryPoint = checkKernelName(entryPoint);
return codeCache.isCached(id + "-" + entryPoint);
Expand All @@ -535,6 +545,7 @@ public OCLInstalledCode getInstalledCode(String id, String entryPoint) {
return codeCache.getInstalledCode(id, entryPoint);
}

@Override
public OCLCodeCache getCodeCache() {
return this.codeCache;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
Expand Down Expand Up @@ -42,7 +42,7 @@ public interface OCLDeviceContextInterface extends TornadoDeviceContext {

OCLInstalledCode getInstalledCode(String id, String entryPoint);

OCLInstalledCode installCode(String id, String entryPoint, byte[] code, boolean shouldCompile);
OCLInstalledCode installCode(String id, String entryPoint, byte[] code, boolean shouldCompile, boolean printKernel);

OCLInstalledCode installCode(OCLCompilationResult result);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ private TornadoInstalledCode compileTask(SchedulableTask task) {
OCLInstalledCode installedCode;
if (OCLBackend.isDeviceAnFPGAAccelerator(deviceContext)) {
// A) for FPGA
installedCode = deviceContext.installCode(result.getId(), result.getName(), result.getTargetCode(), task.shouldCompile());
installedCode = deviceContext.installCode(result.getId(), result.getName(), result.getTargetCode(), task.shouldCompile(), task.meta().isPrintKernelEnabled());
} else {
// B) for CPU multi-core or GPU
installedCode = deviceContext.installCode(result);
Expand Down Expand Up @@ -318,7 +318,7 @@ private TornadoInstalledCode compilePreBuiltTask(SchedulableTask task) {
OCLInstalledCode installedCode;
if (OCLBackend.isDeviceAnFPGAAccelerator(deviceContext)) {
// A) for FPGA
installedCode = deviceContext.installCode(task.getId(), executable.getEntryPoint(), source, task.shouldCompile());
installedCode = deviceContext.installCode(task.getId(), executable.getEntryPoint(), source, task.shouldCompile(), task.meta().isPrintKernelEnabled());
} else {
// B) for CPU multi-core or GPU
installedCode = deviceContext.installCode(executable.meta(), task.getId(), executable.getEntryPoint(), source);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* This file is part of Tornado: A heterogeneous programming framework:
* This file is part of Tornado: A heterogeneous programming framework:
* https://github.com/beehive-lab/tornadovm
*
* Copyright (c) 2013-2020, APT Group, Department of Computer Science,
Expand All @@ -12,7 +12,7 @@
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
Expand Down Expand Up @@ -71,7 +71,7 @@ public static void main(String[] args) {
OCLInstalledCode code = codeCache.installSource(meta, "saxpy", "saxpy", source);

String generatedSourceCode = code.getGeneratedSourceCode();
if (TornadoOptions.PRINT_SOURCE) {
if (meta.isPrintKernelEnabled()) {
System.out.println("Compiled code: " + generatedSourceCode);
}
}
Expand Down
Loading

0 comments on commit c36582a

Please sign in to comment.