# -*- coding: utf-8 -*-
################################ Begin license #################################
# Copyright (C) Laboratory of Imaging technologies,
# Faculty of Electrical Engineering,
# University of Ljubljana.
#
# This file is part of PyXOpto.
#
# PyXOpto is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# PyXOpto is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PyXOpto. If not, see <https://www.gnu.org/licenses/>.
################################# End license ##################################
from typing import List, Tuple, Dict
import os
import time
import ctypes
import numpy as np
import pyopencl as cl
import jinja2
from xopto import ROOT_PATH, USER_DATA_PATH
from xopto.cl import clrng, clinfo, cloptions
from xopto.mccyl.mcobject import McObject
from xopto.mccyl import cltypes
from xopto.mccyl import mctypes
from xopto.mccyl import mclayer
from xopto.mccyl import mcsurface
from xopto.mccyl import mcdetector
from xopto.mccyl import mctrace
from xopto.mccyl import mcsv
from xopto.mccyl import mcfluence
from xopto.mccyl import mcsource
from xopto.mccyl import mcpf
from xopto.mccyl import mcprogress
from xopto.mccyl import mcoptions
from xopto.mccyl.mcutil.buffer import NumpyAllocators
from xopto import MCBASE_PATH, MCCYL_PATH
from xopto.mcbase import mcsrc
from xopto.mcbase import mcworker
from xopto.mcvox.mcfluence import FLUENCE_TYPE
MC_RESULT_TYPE = Tuple[
mctrace.Trace or None,
mcfluence.FLUENCE_TYPE or None,
mcdetector.Detectors or None
]
# Instructions for fusing OpenCL source files into a single file.
_MCML_FUSION_SPEC = {
'inputs': [
os.path.join(MCBASE_PATH, 'kernel', 'mcbase.template.h'),
os.path.join(MCBASE_PATH, 'kernel', 'mcbase.template.c'),
os.path.join(MCBASE_PATH, 'kernel', 'mcsv.template.h'),
os.path.join(MCBASE_PATH, 'kernel', 'mcsv.template.c'),
os.path.join(MCCYL_PATH, 'kernel', 'mccyl.template.h'),
os.path.join(MCCYL_PATH, 'kernel', 'mccyl.template.c'),
],
'output': os.path.join(MCCYL_PATH, 'kernel', 'mccyl.fusion.template.h')
}
[docs]class Mc(mcworker.ClWorkerStandardBufferLutMixin, mcworker.ClWorkerRngMixin,
mcworker.ClWorker):
MC_CL_TEMPLATE_FILE = os.path.join(
ROOT_PATH, 'mccyl', 'kernel', 'mc.template.h')
''' Location of the OpenCL source template file. '''
AUTOGEN_PATH = os.path.join(USER_DATA_PATH, 'mccyl', 'auto')
''' Directory that will be used to save the rendered OpenCL source code. '''
if not os.path.exists(AUTOGEN_PATH):
try:
os.makedirs(AUTOGEN_PATH, exist_ok=True)
except OSError:
pass
def __init__(self, layers: mclayer.Layers or List[mclayer.Layer],
source: mcsource.Source,
detectors: mcdetector.Detectors = None,
trace: mctrace.Trace = None,
fluence: mcfluence.FLUENCE_TYPE = None,
surface: mcsurface.SurfaceLayouts = None,
types: mctypes.McDataTypesBase = mctypes.McDataTypesSingle,
options: List[mcoptions.McOption] = None,
rnginit: np.uint64 = None,
cl_devices: str or cl.Device or List[cl.Device] or
cl.Context or cl.CommandQueue = None,
cl_build_options: List[str or cloptions.ClBuildOption] = None,
cl_profiling : bool = False):
'''
Cylindrical Multilayer Monte Carlo light propagation simulator object
constructor.
The object uses existing OpenCL kernel code in the mccyl.h and mccyl.c
files and adds additional code and kernel configuration options
generated by the source, detector, trace, fluence and other objects
passed to the constructor.
The generated OpenCL code is compiled just before the first call to
the kernel/simulation invoked by the run method.
All publically accessible properties of the objects
(layers, source, detector, trace, fluence, ...) can be changed
between simulations.
Parameters
----------
layers: mclayer.Layers or list[mclayer.Layer]
A python list of layers. The list must contain at
least two layers. The first layer represent the
medium that surrounds the sample and the remaining layers represent
the sample.
The layers must be specified with monotonically decreasing diameter.
Note that the diameter of the outermost (first) layer that
represents the surrounding medium will be automatically set to
infinity.
source: mcsource.Source
One of the available photon packet source object:
- Line - Infinity thin beam.
- UniformBeam - Collimated uniform beam.
- GaussianBeam - Collimated beam with a Gaussian crosssection.
- IsotropicPoint - Isotropic point source.
See the :py:mod:`xopto.mcml.mcsource` module for further sources.
detectors: mcdetector.Detectors
A set of detectors for the outer sample surface and
specular reflections.
The Detector object will set the following simulator options:
- MC_USE_DETECTORS
- MC_USE_OUTER_DETECTOR
- MC_USE_SPECULAR_DETECTOR
- MC_USE_FP_LUT (if a lookup table is used)
trace: mctrace.Trace
Trace object that can collect the various states of the photon
packet including the initial, all intermediate and/or the final.
The Trace object will set the following simulator options:
- MC_USE_TRACE
fluence: mcfluence.Fluence or mcfluence.FluenceCyl or mcfluence.FluenceRz
Fluence object that collects the energy deposited by the photon
packets on a regular 3D grid.
The Fluence object will set the following simulator options:
- MC_USE_FLUENCE
surface: mcsurface.SurfaceLayouts
Surface geometry that is applied to the outer sample surface
and / or internal sample layers.
The SurfaceLayouts object will set the following simulator options
as required:
- MC_USE_OUTER_SURFACE_LAYOUT
- MC_USE_INTERNAL_SURFACE_LAYOUT
- MC_USE_SURFACE_LAYOUTS
types: mctypes.McDataTypes
A class that defines all the simulator data types. Use one
of the following predefined type classes derived from
mctypes.McDataTypes:
- mctypes.McDataTypesSingle
- 32-bit size type
- 32-bit default integers,
- 64-bit detector accumulators,
- 32-bit single precision floating-point arithmetics,
- 32-bit photon packet counter (maximum number of photon
packets per OpenCL kernel call limited to 4,294,967,295)
- mctypes.McDataTypesDouble
- 32-bit size type
- 32-bit default integers,
- 64-bit detector accumulators,
- 64-bit double precision floating-point arithmetics,
- 32-bit photon packet counter (maximum number of photon
packets per OpenCL kernel call limited to 4,294,967,295)
- mctypes.McDataTypesSingleCnt64
- 64-bit size type
- 32-bit default integers,
- 64-bit detector accumulators,
- 32-bit single precision floating-point arithmetics,
- 64-bit photon packet counter (maximum number of photon
packets per OpenCL kernel call virtually unlimited)
- mctypes.McDataTypesDoubleCnt64
- 64-bit size type
- 32-bit default integers,
- 64-bit detector accumulators,
- 64-bit double precision floating-point arithmetics,
- 64-bit photon packet counter (maximum number of photon
packets per OpenCL kernel call virtually unlimited)
A custom combination of data types can be defined by the
:py:meth:`mctypes.McDataTypes.custom` method.
options: List[mcoptions.McOption]
A list of compile-time options for the simulator core.
- mcoptions.McUseNativeMath: mcoptions.McUseNativeMath.on or mcoptions.McUseNativeMath.off
Use of native math functions.
Native functions are usually faster but less accurate.
Default value is mcoptions.McUseNativeMath.off.
- mcoptions.McMinimumPacketWeight: mcoptions.McMinimumPacketWeight(float)
Minimum photon packet weight
before going through photon packet termination or lottery.
Default value is mcoptions.McMinimumPacketWeight(1e-4).
- mcoptions.McUseLottery: mcoptions.McUseLottery(float)
Terminate photon packet by lottery.
Default value is mcoptions.McUseLottery.on.
- mcoptions.McPacketLotteryChance: mcoptions.McPacketLotteryChance(float)
Used if photon packets are terminated by lottery.
If the value of a uniform random number exceeds the specified
chance, the photon packet is terminated.
Default value is mcoptions.McPacketLotteryChance(0.01).
- mcoptions.McLutMemory: mcoptions.McLutMemory.global_mem or mcoptions.McLutMemory.constant_mem
- McFpLutMemory: mcoptions.McFpLutMemory.global_mem or mcoptions.McFpLutMemory.constant_mem
Set the OpenCL memory type for integer and floating-point
data lookup tables that are used by some scattering phase
functions, accumulators with custom angular sensitivity and
photon packet sources with custom emission characteristics.
Note that the default setting is mcoptions.McFpLutMemory.global_mem.
Using the constant memory (mcoptions.McFpLutMemory.constant_mem)
can be significantly (10x or more) faster, in particular on
older GPUs. However, note that the amount of constant
memory available on GPUs is typically limited to about 64k.
If using lookup table-based scattering phase
functions that can be fit into the constant memory, then
set this option to mcoptions.McFpLutMemory.constant_mem.
This can significantly speed up the MC simulations (~ 10x).
To further reduce the amount of constant GPU memory required
by the lookup table data arrays, set the scattering phase
functions of the first and last layer (the sourrounding medium)
to one that is used by the internal (sample) layers.
- mcoptions.McDebugMode: mcoptions.McDebugMode.on or mcoptions.McDebugMode.off
Enables stdout debug information on some devices.
Default value is mcoptions.McDebugMode.off.
rnginit: np.uint64
OpenCL random number generator initializer as a 64-bit unsigned
integer.
Use this initializer if there is a need to put the random, number
generator into a known state.
cl_devices: str or cl.Device or List[cl.Device] or cl.Context cl.CommnadQueue
A python list of OpenCL devices that are used for
conducting the simulation.See the clGpuDevices and clCpuDevices
functions of the clinfo module for details on obtaining a
list of OpenCl capable devices. If None is provided, the
first available device is used (GPU devices have priority over
CPU devices). Use function clinfo.device to get a desired
device by simple keywords, e.g. a call
clinfo.device(['amd', 'nvidia', 'hd', 'cpu'], that will search
for an AMD GPU, Nvidia GPU, Intel Hd GPU, any CPU and return
the first device found.
The value of this input argument can also be an
instance of OpenCL Context or an instance of OpenCL CommandQueue.
Note that in case an instance of CommandQueue is passed, the value
of parameter cl_profiling is ignored since it is not possible to
enable or disable profiling on an existing OpenCL CommandQueue.
cl_build_options: List[str]
A list of OpenCL build option as specified by the OpenCl manuals at
https://www.khronos.org/.
An example of commonly used build options:
.. code-block:: python
cloptions=['-cl-opt-disable', '-Werror',
'-cl-fast-relaxed-math', '-cl-mad-enable']
Note that the "-cl-fast-relaxed-math" build option will likely
lead to a significant performance improvement but may on some
platforms lead to errors in the simulated quantities.
cl_profiling: bool
Enables OpenCL command queue profiling if set to True.
Note that in case an instance of CommandQueue is passed as the
cl_devices parameter, the value of parameter cl_profiling is
ignored since it is not possible to enable or disable profiling
on an existing OpenCL CommandQueue.
Examples
--------
See the test/example.py for additional examples.
>>> source = mcsource.GaussianBeam(0.001, 0.22)
>>> l0 = mclayer.Layer(d=0.000, n=1.0, mua=0.0, mus=0.0, pf=Hg(0.5))
>>> l1 = mclayer.Layer(d=0.001, n=1.3, mua=1e2, mus=100e2, pf=Hg(0.8))
>>> l2 = mclayer.Layer(d=0.010, n=1.3, mua=10e2, mus=30e2, pf=Hg(0.7))
>>> l3 = mclayer.Layer(d=0.000, n=1.0, mua=0.0, mus=0.0, pf=Hg(0.5))
>>> layers = mclayer.Layers([l0, l1, l2, l3])
>>> trace = mctrace.Trace(maxlen=1000, options=mctrace.Trace.TRACE_ALL)
>>> mc = Mc(layers, source, trace)
>>> trace_data, _, _ = mc.run(100)
'''
super().__init__(types=types, options=options, cl_devices=cl_devices,
cl_build_options=cl_build_options,
cl_profiling=cl_profiling, rnginit=rnginit)
# Initialization of members for later use.
self._cl_exec = self._cl_src = self._cl_src_options = None
# the latest kernel timing report and the number of used threads
self._run_report = {'upload': None, 'download': None,
'execution': None, 'threads': None,
'items': None}
# Size of structures on the target OpenCL device
self._sizeof_types = {}
# Data types of the simulator objects.
self._mc_obj_types = {'pf': None, 'source': None, 'detectors': None,
'fluence': None, 'trace': None,
'surface_layouts': None}
# Prepare the sample layer stack.
self._layers = mclayer.Layers(layers)
self._layers.check()
# Save the type of scattering phase function.
self._mc_obj_types['pf'] = type(self._layers[0].pf)
# Photon packet source.
self._source = source
self._mc_obj_types['source'] = type(source)
# Packed buffers of simulator objects ready for use/transfer to the
# OpenCL device.
self._packed = {'source': None,
'surface_layouts': None,
'layers': None, 'detectors': None,
'trace': None, 'fluence': None}
# Sample surface reflectance/transmittance object
self._detectors = detectors
if self._detectors is not None:
self._mc_obj_types['detectors'] = \
type(self._detectors), self._detectors.types()
# Photon packet trace object.
self._trace = trace
if self._trace is not None:
self._mc_obj_types['trace'] = type(self._trace)
# Fluence object.
self._fluence = fluence
if self._fluence is not None:
self._mc_obj_types['fluence'] = type(self._fluence)
# Surface geometry object
self._surface_layouts = surface
if self._surface_layouts is not None:
self._mc_obj_types['surface_layouts'] = \
type(self._surface_layouts), self._surface_layouts.types()
# Monte Carlo simulator options.
if options is None:
options = []
self._mc_options = options
# Default maximum simulation radius used by the Monte Carlo kernel.
self._rmax = float('inf')
[docs] def layer(self, index:int) -> mclayer.Layer:
'''
Returns the layer at the specified index.
Layers can be also accessed by the :py:attr:`~xopto.mccyl.mc.Mc.layers`
property and [] operator.
Parameters
----------
index: int
Index of the layer.
Returns
-------
layer: xopto.mccyl.mclayer.layer.Layer
Layer at the specified index.
'''
return self._layers.layer(index)
[docs] def layer_index(self, r: float) -> int:
'''
Returns the index of the layer that contains the specified r coordinate.
Parameters
----------
r: float
R coordinate for which to find the layer index.
Returns
-------
index: int
Layer index that contains the specified r coordinate.
'''
return self._layers.layer_index(r)
[docs] def append_r_lut(self, data: np.ndarray, force: bool = False) \
-> mcworker.LutEntry:
'''
Append a read-only lookup table data array to the list of managed
lookup table arrays.
Parameters
----------
data: np.ndarray
Lookup table data to be added to the managed list. The numpy
array must be of integer or floating-point type.
force: bool
Add the array to the managed list even if an existing entry for the
given data array is found.
Returns
-------
lutetry: LutEntry
Lookup table entry. Use the offset property to locate the
first element of the entry in the lookup table buffer.
'''
kind = data.dtype.kind
if kind in ('i', 'u'):
# integer data
lutetry, new_item = self.append_r_int_lut(data, force=force)
elif kind == 'f':
# floating point data
lutetry, new_item = self.append_r_float_lut(data, force=force)
else:
raise TypeError('Lookup tables only support integer and '
'floating-point data types!')
return lutetry
[docs] def clear_r_luts(self):
'''
Clear all the read-only lookup table managers
'''
self.clear_r_float_lut()
self.clear_r_int_lut()
def _pack(self, nphotons: int):
'''
Pack the data into buffers.
Parameters
----------
nphotons: int
The number of photon packets that will be run by the simulator.
'''
# Clear existing allocations of detector and fluence.
self.cl_rw_accumulator_allocator.clear()
self.cl_rw_float_allocator.clear()
self.cl_rw_int_allocator.clear()
# Clear the lookup table managers
self.clear_r_luts()
# Finalize and pack the sample layer stack.
self._packed['layers'] = self._layers.cl_pack(self, self._packed['layers'])
if self._mc_obj_types['pf'] != type(self._layers[1].pf):
raise ValueError('The scattering phase function kind/type must not' \
'change simulation calls!')
# Finalize and pack the photon packet source.
self._packed['source'], _, _ = \
self._source.cl_pack(self, self._packed['source'])
if self._mc_obj_types['source'] != type(self._source):
raise ValueError('The photon packet source type/kind must not' \
'change between simulation calls!')
# pack the detector configuration
if self._detectors is not None:
self._packed['detectors'] = self._detectors.cl_pack(
self, self._packed['detectors'])
if self._mc_obj_types['detectors'] != \
(type(self._detectors), self._detectors.types()):
raise ValueError(
'Detector types must not change between simulation calls!')
# Finalize and pack the fluence configuration data.
if self._fluence is not None:
self._packed['fluence'] = self._fluence.cl_pack(
self, self._packed['fluence'])
# Finalize and pack the photon packet trace configuration data.
if self._trace is not None:
self._packed['trace'] = self._trace.cl_pack(
self, self._packed['trace'], nphotons)
if self._surface_layouts:
self._packed['surface_layouts'] = self._surface_layouts.cl_pack(
self, self._packed['surface_layouts'])
if self._mc_obj_types['surface_layouts'] != \
(type(self._surface_layouts), self._surface_layouts.types()):
raise ValueError(
'Surface layout types must not change between simulation calls!')
def _build_src(self, verbose=False):
'''
Generate and build the OpenCL source from a template.
'''
# Load the OpenCL kernel template from a file
#with open(Mc.MC_CL_TEMPLATE_FILE, 'rt') as fp:
# template = jinja2.Template(fp.read())
template = jinja2.Template(mcsrc.fuse(**_MCML_FUSION_SPEC, verbose=verbose))
context = {'source':{}, 'trace':{}, 'fluence':{}, 'detectors': {},
'surface_layouts':{}, 'mc':{}}
# Collect the photon packet source type compile context.
context['source'] = {
'options': self._source.fetch_cl_options(self),
'declaration': self._source.fetch_cl_declaration(self),
'implementation': self._source.fetch_cl_implementation(self)
}
# Collect the layer stack and scattering phase function compile context.
context['pf'] = {
'options': self._layers.fetch_cl_options(self),
'declaration': self._layers.fetch_cl_declaration(self),
'implementation': self._layers.fetch_cl_implementation(self),
}
# Collect the photon packet trace type compile context.
if self._trace is not None:
context['trace'] = {
'options': self._trace.fetch_cl_options(self),
'declaration': self._trace.fetch_cl_declaration(self),
'implementation': self._trace.fetch_cl_implementation(self)
}
# Collect the fluence type compile context.
if self._fluence is not None:
context['fluence'] = {
'options': self._fluence.fetch_cl_options(self),
'declaration': self._fluence.fetch_cl_declaration(self),
'implementation': self._fluence.fetch_cl_implementation(self)
}
# Collect the surface detector type build context.
if self._detectors is not None:
context['detectors'] = {
'options': self._detectors.fetch_cl_options(self),
'declaration': self._detectors.fetch_cl_declaration(self),
'implementation': self._detectors.fetch_cl_implementation(self)
}
# Collect the top and bottom sample surface geometry.
if self._surface_layouts is not None:
context['surface_layouts'] = {
'options': self._surface_layouts.fetch_cl_options(self),
'declaration': self._surface_layouts.fetch_cl_declaration(self),
'implementation': self._surface_layouts.fetch_cl_implementation(self)
}
mc_options = []
for item in self._mc_options:
if isinstance(item, mcoptions.McOption):
mc_options.extend(item.fetch_cl_options(self))
elif isinstance(item, (tuple, list)):
mc_options.append(tuple(item))
else:
raise ValueError(
'Simulator options must be a list of McOption or a list '
'of tuples!')
# Resolve/join all the simulator options.
resolved_options = mcoptions.resolve_cl_options(
self._types().fetch_cl_options(self),
context['source'].get('options', []),
context['pf'].get('options', []),
context['detectors'].get('options', []),
context['fluence'].get('options', []),
context['trace'].get('options', []),
context['surface_layouts'].get('options', []),
mc_options
)
# Convert the resolved simulator options to defines.
context['mc'] = {'options': mcoptions.make_defines(resolved_options)}
# save all the kernel options for later use
self._cl_src_options = dict(resolved_options)
# Render the Monte Carlo OpenCL source file.
self._cl_src = template.render(context)
[docs] def export_src(self, filename: str = None) -> str:
'''
Export the OpenCL source to a file.
Parameters
----------
filename: str
The OpenCl source code will be saved to this file. If a file
name is not provided, the OpenCL source will be saved to
:py:attr:`Mc.AUTOGEN_PATH` directory with a name
"mcml_<random>.cl"
Returns
-------
filename: str
The filename that was used to save the OpenCL source code.
'''
if filename is None:
suffix = str(time.perf_counter()).replace('.', '')
filename = os.path.join(Mc.AUTOGEN_PATH, 'mcml_{}.cl'.format(suffix))
with open(filename, 'wt') as fp:
fp.write(self._cl_src)
def _build_exec(self, exportsrc=None, verbose=False):
'''
Build the OpenCL executable.
'''
# Rebuild from source if required - first call of the run method.
tb = time.perf_counter()
# Generate / render the source code for this simulator instance.
self._build_src(verbose=verbose)
if exportsrc is not None and exportsrc:
filename = None
if isinstance(exportsrc, str):
filename = exportsrc
self.export_src(filename)
# Compile the OpenCL source code.
self._cl_exec = self.cl_build(self._cl_src, verbose)
# options=['-cl-opt-disable', '-Werror']
buildtime = time.perf_counter() - tb
if verbose:
print('OpenCL engine created in {:.3f} ms.'.format(buildtime*1e3))
return buildtime
def _fetch_typesizes(self, validate=True):
'''
Fetch the size of data types / structures on the target OpenCL device.
'''
np_sizes = np.zeros(10, dtype=np.uint32)
cl_type_sizes = self.cl_rw_buffer('cl_type_sizes', np_sizes)
self._cl_exec.sizeof_datatypes(
self.cl_queue, (1,), None, cl_type_sizes).wait()
cl.enqueue_copy(self.cl_queue, np_sizes, cl_type_sizes).wait()
siz = self._sizeof_types = dict(
zip(('layer', 'source', 'surface_layouts',
'detectors', 'trace', 'fluence'), np_sizes))
if validate:
n = cltypes.sizeof(self.layers[0].fetch_cl_type(self))
if siz['layer'] != n:
raise RuntimeError(
'The host and device size of the McLayer structure '
'are not the same!'.format(n, siz['layer']))
n = cltypes.sizeof(self.source.fetch_cl_type(self))
if siz['source'] != n:
raise RuntimeError(
'The host ({}) and device ({}) size of the McSource '
'structure are not the same!'.format(n, siz['source']))
if self.detectors is not None:
n = cltypes.sizeof(self.detectors.fetch_cl_type(self))
if siz['detectors'] != n:
raise RuntimeError(
'The host ({}) and device ({}) size of the McDetectors '
'structure are not the same!'.format(n, siz['detectors']))
if self.trace is not None:
n = cltypes.sizeof(self.trace.fetch_cl_type(self))
if siz['trace'] != n:
raise RuntimeError(
'The host ({}) and device ({}) size of the McTrace '
'structure are not the same!'.format(n, siz['trace']))
if self.fluence is not None:
n = cltypes.sizeof(self.fluence.fetch_cl_type(self))
if siz['fluence'] != n:
raise RuntimeError(
'The host ({}) and device ({}) size of the McFluence '
'structure are not the same!'.format(n, siz['fluence']))
if self.surface is not None:
n = cltypes.sizeof(self.surface.fetch_cl_type(self))
if siz['surface_layouts'] != n:
raise RuntimeError(
'The host ({}) and device ({}) size of the '
'McSurfaceLayouts structure are not the same!'.format(
n, siz['surface_layouts']))
[docs] def run(self, nphotons: int,
out: MC_RESULT_TYPE or None = None,
wgsize: int = None, maxthreads: int = None,
copyseeds: bool = False, exportsrc: bool = None,
verbose: bool = False) -> MC_RESULT_TYPE:
'''
Run a simulation with the specified number of photon packets.
Parameters
----------
nphotons: int
Number of photon packets to simulate. Note that the maximum number
of photon packets is limited to :py:attr:`Mc.types.mc_cnt_max`.
out: MC_RESULT_TYPE or None
Existing trace, fluence and/or detector objects that will be
updated with the simulation results if not None. Pass objects
that were returned after the previous/first call of the
:py:meth:`run` method.
Do NOT pass any of the objects that were used as input arguments
of the :py:meth:`Mc` constructor!.
wgsize: int
Local work group size. If None (default), the optimal work group
size is determined automatically.
maxthreads: int
Total number of threads to use. Limited by the
number of available random number generators (150,000).
copyseeds: bool
If True, copy the mutable seeds of the random number generator
from the OpenCL kernel to the host. This is only required if the
seeds need to be reused or inspected (for debug purposes).
exportsrc: str or bool
Optional location to save the OpenCL source code. The source file
is saved only on the first run of the simulator.
verbose: bool
If set to True, additional debug information is displayed.
Returns
-------
trace: mctrace.Trace
Trace object instance with simulation results or None if not used
by the simulator.
fluence: mcfluence.Fluence or mcfluence.FluenceCyl or mcfluence.FluenceRz
Fluence object instance with simulation results or None if not used
by the simulator.
detector: mcdetector.Detectors
Detectors object instance with simulation results or None if not
used by the simulator.
Note
----
The returned :py:class:`mctrace.Trace`,
:py:class:`mcfluence.Fluence` and :py:class:`mcdetector.Detector`
objects are not the same instances as were passed to the
constructor :py:meth:`Mc`. A new set of instances is created
for the results of each run of the simulator.
The maximum number of photon packets that can be simulated in a
single run is limited by the kernel data type used as the photon
packet counter. The maximum value is limited to
:py:attr:`Mc.types.mc_cnt_max`.
'''
if out is None:
out = (None, None, None)
nphotons = int(nphotons)
# finalize and pack the configuration data
self._pack(nphotons)
# shortcut to memory flags
mf = cl.mem_flags
# predefined combinations of memory access flags
mf_rw = mf.READ_WRITE
mf_r = mf.READ_ONLY
mf_w = mf.WRITE_ONLY
mf_cp_host = mf.COPY_HOST_PTR
# shortcuts to buffers
cl_buffers = self.cl_buffers
np_buffers = self.np_buffers
packed = self._packed
# check if the maximum number of photon packets is not exceeded
if nphotons > self._types.mc_cnt_max:
raise ValueError(
'Maximum number of photon packets that can be '
'simulated in a single run is limited to {:,d}!'.format(
self._types.mc_cnt_max)
)
# start timing this run
t0 = time.perf_counter()
if self._cl_exec is None:
# Rebuild from source if required - first call of the run method.
self._build_exec(exportsrc=exportsrc, verbose=verbose)
# fetch the data type sizes on the OpenCL device and validate /
# compare to the size of the data types on the host
self._fetch_typesizes(validate=True)
# allocate a global counter of processed photon packets
np_buffers['num_processed_packets'] = \
np.zeros([1], dtype=self._types.np_cnt)
# allocate a counter of OpenCL kernels
np_buffers['num_kernels'] = np.zeros([1], dtype=np.uint32)
# allocate and initialize an OpenCL buffer with the mutable seeds
# of the random number generator
self.cl_rw_buffer('rng_seeds_x', self.rng_seeds_x)
# allocate and initialize an OpenCL buffer with the immutable seeds
# of the random number generator
self.cl_r_buffer('rng_seeds_a', self.rng_seeds_a)
data = np_buffers['num_processed_packets']
data[0] = 0
self.cl_rw_buffer('num_processed_packets', data)
# initialize the number of used OpenCL kernels to 0
data = np_buffers['num_kernels']
data[0] = 0
self.cl_rw_buffer('num_kernels', data)
# allocate an OpenCL buffer for the sample layer stack
self.cl_r_buffer('layers', packed['layers'])
# allocate an OpenCL buffer for the photon packet source
self.cl_r_buffer('source', packed['source'])
# allocate and initialize an OpenCL buffer for the surface layouts
self.cl_r_buffer('surface_layouts', packed['surface_layouts'], size=8)
# allocate and intitalize an OpenCL buffer for the trace
self.cl_r_buffer('trace', packed.get('trace'), size=4)
# allocate and intitalize an OpenCL buffer for the fluence
self.cl_r_buffer('fluence', packed.get('fluence'), size=4)
# allocate and intitalize an OpenCL buffer for the surface detector
self.cl_r_buffer('detectors', packed.get('detectors'), size=4)
# allocate and initialize an OpenCL buffer for the floating-point
# lookup-table
self.cl_r_float_lut()
# allocate and initialize the read-write accumulator type buffer
self.cl_rw_accumulator_buffer(fill=0)
# allocate and intitalize the read-write floating-point type buffer
self.cl_rw_float_buffer(fill=0.0)
# allocate and intitalize the read-write integer type buffer
self.cl_rw_int_buffer(fill=0)
# Check if floating-point lookup tables are properly used.
if len(self.float_r_lut_manager) and 'MC_USE_FP_LUT' not in self._cl_src_options:
raise ValueError(
'Floating-point lookup tables are used by one or more modules '
'but the compile-time kernel option MC_USE_FP_LUT was not set '
'to True!')
# Check if integer lookup tables are properly used.
if len(self.int_r_lut_manager) and 'MC_USE_INT_LUT' not in self._cl_src_options:
raise ValueError(
'Integer lookup tables are used by one or more modules but '
'the compile-time kernel option MC_USE_INT_LUT was not '
'set to True!')
cl_queue = self.cl_queue
# limit the maximum number of threads that can be concurrently run on
# the OpenCL device - limited by the available number of rng seeds
if maxthreads is None:
maxthreads = self.cl_max_threads
else:
maxthreads = min(maxthreads, self.cl_max_threads)
# configure the local and global work group size
if wgsize is None:
localWG = None
globalWG = [maxthreads]
else:
wgsize = int(wgsize)
localWG = [wgsize]
globalWG = [(maxthreads//wgsize)*wgsize]
# start timing the kernel call
t1 = time.perf_counter()
# call the kernel
self._cl_exec.McKernel(
cl_queue, globalWG, localWG,
np.dtype(self._types.np_cnt).type(nphotons),
cl_buffers['num_processed_packets'],
cl_buffers['num_kernels'],
np.dtype(self._types.np_float).type(self._rmax),
cl_buffers['rng_seeds_x'],
cl_buffers['rng_seeds_a'],
np.uint32(len(self._layers)),
cl_buffers['layers'],
cl_buffers['source'],
cl_buffers['surface_layouts'],
cl_buffers['trace'],
cl_buffers['fluence'],
cl_buffers['detectors'],
self.cl_r_float_lut(False),
self.cl_rw_int_buffer(),
self.cl_rw_float_buffer(),
self.cl_rw_accumulator_buffer(),
).wait()
t2 = time.perf_counter()
# get the number of concurrent kernels that were used to process the job
cl.enqueue_copy(
cl_queue,
np_buffers['num_kernels'], cl_buffers['num_kernels']).wait()
# copy seeds from the kernel - actually not required but good for debug
if copyseeds:
cl.enqueue_copy(
cl_queue,
self.rng_seeds_x, cl_buffers['rng_seeds_x']).wait()
# download the trace data and update the trace object
trace_res = out[0]
if self._trace is not None:
with self.np_allocators as np_allocators:
all_data = self._download_allocated_buffers(
self._trace, np_allocators, nphotons)
if all_data:
if trace_res is None:
trace_res = type(self._trace)(self._trace)
trace_res.update_data(self, all_data, nphotons=nphotons)
# download the fluence data and update the fluence object
fluence_res = out[1]
if self._fluence is not None:
with self.np_allocators as np_allocators:
all_data = self._download_allocated_buffers(
self._fluence, np_allocators, nphotons)
if all_data:
if fluence_res is None:
fluence_res = type(self._fluence)(self._fluence)
fluence_res.update_data(self, all_data, nphotons=nphotons)
# download the detector data and update the detectors
detectors_res = out[2]
if self._detectors is not None:
for detector in self._detectors:
with self.np_allocators as np_allocators:
all_data = self._download_allocated_buffers(
detector, np_allocators, nphotons)
if all_data:
if detectors_res is None:
detectors_res = type(self._detectors)(self._detectors)
detectors_res.update_data(
self, detector, all_data, nphotons=nphotons)
uploadtime = t1 - t0
exectime = t2 - t1
downloadtime = time.perf_counter() - t2
self._run_report.update(upload=uploadtime, download=downloadtime,
execution=exectime, items=nphotons,
threads=int(np_buffers['num_kernels'][0]))
if verbose:
print('McKernel processed {:,d} packets in {:,d} threads:\n'
' uploaded/built in: {:10.3f} ms\n'
' executed in : {:10.3f} ms\n'
' downloaded in : {:10.3f} ms'.format(
nphotons, int(np_buffers['num_kernels'][0]),
uploadtime*1e3, exectime*1e3, downloadtime*1e3)
)
return trace_res, fluence_res, detectors_res
def _download_allocated_buffers(
self, owner: any, np_allocators: NumpyAllocators, nphotons) -> \
Dict[np.dtype, List[np.array]]:
all_data = {}
for allocator in self.cl_rw_allocators:
for allocation in allocator.allocations(owner):
if allocation.download:
buffer = None
if hasattr(owner, 'np_buffer'):
buffer = owner.np_buffer(
self, allocation, nphotons=nphotons)
if buffer is None:
buffer = np_allocators.allocate(
allocation.dtype, allocation.size)
self.cl_allocation_download(allocation, buffer)
data_of_type = all_data.get(allocation.dtype, [])
data_of_type.append(buffer)
all_data[allocation.dtype] = data_of_type
return all_data
[docs] def sampling_volume(self, trace: mctrace.Trace, sv: mcsv.SamplingVolume,
wgsize: int = None, maxthreads: int = None,
exportsrc: bool = None, verbose: bool = False) \
-> mcsv.SamplingVolume:
'''
Compute the sampling volume from the given simulation trace.
Parameters
----------
trace: mctrace.Trace
Simulation trace object to process.
sv: mcsv.SamplingVolume
Target sampling volume that will be updated with the results.
wgsize: int
Local work group size. If None (default), the optimal work group
size is determined automatically.
maxthreads: int
Total number of parallel threads to use. Limited by the
number of available random number generators (150,000).
exportsrc: str or bool
Optional location to save the OpenCL source code. The source file
is saved only on the first run of the simulator.
verbose: bool
If set to True, additional debug information is displayed.
Returns
-------
sv: mcsv.SamplingVolume
Input sampling volume updated with the data from the trace.
'''
if self._trace is None:
raise RuntimeError(
'This Monte Carlo simulator was build without the Trace '
'functionallity (the trace argument of :py:meth:Mc.__init__) '
'was None! Sampling volume analysis requires trace '
'functionality!')
if self._cl_exec is None:
# Rebuild from source if required - first call of the run method.
self._build_exec(exportsrc=exportsrc, verbose=verbose)
t0 = time.perf_counter()
nphotons = trace.nphotons
# allocate some numpy buffers
if self.np_buffers.get('num_processed_packets') is None:
self.np_buffers['num_processed_packets'] = np.zeros((1,),
dtype=self.types.np_cnt)
if self.np_buffers.get('num_kernels') is None:
self.np_buffers['num_kernels'] = np.zeros((1,),
dtype=np.uint32)
if self.np_buffers.get('sv_total_weight') is None:
self.np_buffers['sv_total_weight'] = np.zeros((1,),
dtype=self.types.np_accu)
# Clear existing allocations of detector and fluence.
self.cl_rw_accumulator_allocator.clear()
self.cl_rw_float_allocator.clear()
self.cl_rw_int_allocator.clear()
data = self.np_buffers['num_processed_packets']
data.fill(0)
self.cl_rw_buffer('num_processed_packets', data)
# initialize the number of used OpenCL kernels to 0
data = self.np_buffers['num_kernels']
data.fill(0)
self.cl_rw_buffer('num_kernels', data)
# initialize the total weight of photon packets
data = self.np_buffers['sv_total_weight']
data.fill(0)
self.cl_rw_buffer('sv_total_weight', data)
# Pack the data for OpenCL
packed = self._packed
packed['sv_trace'] = trace.cl_pack(
self, packed.get('sv_trace'), nphotons=nphotons)
packed['sv'] = sv.cl_pack(self, packed.get('sv'))
cl_sv_trace = self.cl_r_buffer('sv_trace', packed['sv_trace'])
cl_sv = self.cl_r_buffer('sv', packed['sv'])
# allocate and initialize the read-write accumulator buffer
self.cl_rw_accumulator_buffer(fill=0)
# allocate and intitalize the read-write floating-point buffer
self.cl_rw_float_buffer()
# allocate and intitalize the read-write integer buffer
self.cl_rw_int_buffer()
# upload the trace data
trace_len = self.cl_rw_int_allocator.allocations(trace)[0]
self.cl_allocation_upload(trace_len, trace.n_to_cl(self))
trace_data = self.cl_rw_float_allocator.allocations(trace)[0]
self.cl_allocation_upload(trace_data, trace.data_to_cl(self))
# limit the maximum number of threads that can be concurrently run on
# the OpenCL device - limited by the available number of rng seeds
if maxthreads is None:
maxthreads = self.cl_max_threads
else:
maxthreads = min(maxthreads, self.cl_max_threads)
# configure the local and global work group size
if wgsize is None:
localWG = None
globalWG = [maxthreads]
else:
wgsize = int(wgsize)
localWG = [wgsize]
globalWG = [(maxthreads//wgsize)*wgsize]
t1 = time.perf_counter()
self._cl_exec.SamplingVolume(
self.cl_queue, globalWG, localWG,
np.dtype(self._types.np_cnt).type(nphotons),
self.cl_buffers['num_processed_packets'],
self.cl_buffers['num_kernels'],
cl_sv_trace,
cl_sv,
self.cl_buffers['sv_total_weight'],
self.cl_rw_int_buffer(),
self.cl_rw_float_buffer(),
self.cl_rw_accumulator_buffer()
).wait()
t2 = time.perf_counter()
# Get the number of concurrent kernels that were used to process the job.
cl.enqueue_copy(
self.cl_queue,
self.np_buffers['num_kernels'],
self.cl_buffers['num_kernels']).wait()
cl.enqueue_copy(
self.cl_queue,
self.np_buffers['sv_total_weight'],
self.cl_buffers['sv_total_weight']).wait()
accus = []
sv_out = sv
# sv_out = type(sv)(sv)
with self.np_allocators as np_allocator:
for allocation in self.cl_rw_accumulator_allocator.allocations(sv):
buffer = np_allocator.allocate(
self.types.np_accu, allocation.size)
accus.append(self.cl_allocation_download(allocation, buffer))
if accus:
sv_out.update_data(
self, accumulators=accus, nphotons=nphotons,
total_weight=self.np_buffers['sv_total_weight'])
uploadtime = t1 - t0
exectime = t2 - t1
downloadtime = time.perf_counter() - t2
self._run_report.update(upload=uploadtime, download=downloadtime,
execution=exectime, items=nphotons,
threads=int(self.np_buffers['num_kernels'][0]))
if verbose:
print('SamplingVolume processed {:d} packets in {:d} threads:\n'
' uploaded/built in: {:10.3f} ms\n'
' executed in : {:10.3f} ms\n'
' downloaded in : {:10.3f} ms'.format(
nphotons, int(self.np_buffers['num_kernels'][0]),
uploadtime*1e3, exectime*1e3, downloadtime*1e3)
)
return sv_out
def _get_types(self) -> mctypes.McDataTypes:
return self._types
types = property(_get_types, None, None, 'Simulator data types.')
def _get_rmax(self) -> float:
return self._rmax
def _set_rmax(self, rmax: float):
rmax = float(rmax)
if rmax == 0.0:
raise ValueError('Simulation radius must be > 0.0 m!')
self._rmax = rmax
rmax = property(_get_rmax, _set_rmax, None,
'Maximum simulation radius measured from '\
'the photon packet source position (m).')
def _get_cl_exec(self) -> cl.Program:
return self._cl_exec
cl_exec = property(_get_cl_exec, None, None,
'Compiled OpenCL code / executable with kernels.')
def _get_layers(self) -> mclayer.Layers:
return self._layers
def _set_layers(self, layers: mclayer.Layers or List[mclayer.Layer]):
self._layers = mclayer.Layers(layers)
layers = property(_get_layers, _set_layers, None, 'Sample layer stack.')
def _get_source(self) -> mcsource.Source:
return self._source
source = property(_get_source, None, None, 'Photon packet source object.')
def _get_detectors(self) -> mcdetector.Detectors:
return self._detectors
detectors = property(_get_detectors, None, None,
'Detectors object if available.')
def _get_fluence(self) -> mcfluence.Fluence:
return self._fluence
fluence = property(_get_fluence, None, None,
'Fluence object if available.')
def _get_trace(self) -> mctrace.Trace:
return self._trace
trace = property(_get_trace, None, None, 'Trace object if available.')
def _get_surface(self) -> mcsurface.SurfaceLayouts:
return self._surface_layouts
surface = property(_get_surface, None, None, 'Sample surface layout.')
def _get_run_report(self) -> dict:
return self._run_report
run_report = property(_get_run_report, None, None,
'Timing and performance data of the latest kernel '
'run as a dict with keys "upload", "download", '
'"execution", "threads" and "items". '
'The timing values with keys "upload", "download" '
'and "execution" are given in seconds. '
'The number of OpenCL threads with key "threads" '
'and the number of processed items with key "items" '
'are integer values. '
'All values are initialized to None.')
############################# Testing code ###########################
[docs] def dbg(self):
data = self._npDbg
res = {}
res['nPhotons'] = int(data[0])
res['nPhotonsDone'] = int(data[1])
res['nLayers'] = int(data[2])
res['layers'] = []
pos = 3
for l in range(res['nLayers']):
layer = {}
layer['thickness'] = data[pos]
layer['top'] = data[pos + 1]
layer['bottom'] = data[pos + 2]
layer['n'] = data[pos + 3]
layer['cosCriticalTop'] = data[pos + 4]
layer['cosCriticalBottom'] = data[pos + 5]
layer['muS'] = data[pos + 6]
layer['muA'] = data[pos + 7]
layer['muAS'] = data[pos + 8]
layer['invMuAS'] = data[pos + 9]
layer['muAinvMuAS'] = data[pos + 10]
layer['pfparams'] = data[pos + 11: pos + 16]
res['layers'].append(layer)
pos += 16
res['source'] = {'origin':data[pos:pos + 3], 'layer':data[pos + 3],
'params':data[pos + 4: pos + 8]}
res['fluenceStride'] = data[pos + 8: pos + 11]
res['fluenceTopLeft'] = data[pos + 11: pos + 14]
res['fluenceInvVoxelSize'] = data[pos + 14: pos + 17]
res['rtStride'] = data[pos + 17: pos + 19]
res['rtTopLeft'] = data[pos + 19: pos + 21]
res['rtInvPixelSize'] = data[pos + 21: pos + 23]
res['rtAcceptanceCosMin'] = data[pos + 23: pos + 25]
return res
[docs] def rng_test(self, n: int, x: int = None, a: int = None) -> np.ndarray:
'''
Generate random numbers on the OpenCL device. The random number
generator is the same as used by the Monte Carlo kernel. The
precision is inherited from the Simulator.
The states / seeds x and a can be optionally generated with the
:py:attr:`rng` property that is an instance of
:py:class:`xopto.cl.clrng.Random`.
Parameters
----------
n: int
The number of random numbers to generate.
x: np.uint64
Mutable state of the random number generator.
a: np.uint32
Immutable state of the random number generator.
Returns
-------
data: np.ndarray
A vector of n random numbers.
'''
if x is None or a is None:
x, a = self.rng.seeds(1)
n = int(n)
mf = cl.mem_flags
cl_buffer = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
np.dtype(self.types.np_float).itemsize*n)
self._cl_exec.RngKernel(
self._cl_queue, [1], None,
np.uint64(x), np.uint32(a),
np.dtype(self._types.np_size).type(n),
cl_buffer
).wait()
result = np.zeros((n,), dtype=self.types.np_float)
cl.enqueue_copy(self._cl_queue, result, cl_buffer).wait()
return result
[docs] def testMcSinCos(self, x):
mf = cl.mem_flags
x = np.asarray(x, dtype=self._fptype)
cl_x = cl.Buffer(self._cl_context, mf.READ_ONLY |
mf.COPY_HOST_PTR, hostbuf=x)
cl_sin = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
self._fptype.itemsize*x.size)
cl_cos = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
self._fptype.itemsize*x.size)
self._cl_exec.testMcSinCos(
self._cl_queue, [x.size], None,
self._uitype.type(x.size), cl_x, cl_sin, cl_cos).wait()
sinout = np.zeros_like(x)
cosout = np.zeros_like(x)
cl.enqueue_copy(self._cl_queue, sinout, cl_sin).wait()
cl.enqueue_copy(self._cl_queue, cosout, cl_cos).wait()
return sinout, cosout
[docs] def testMcSqrt(self, x, native=True):
x = np.asarray(x, dtype=self._fptype)
mf = cl.mem_flags
cl_x = cl.Buffer(self._cl_context, mf.READ_ONLY |
mf.COPY_HOST_PTR, hostbuf=x)
cl_out = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
self._fptype.itemsize*x.size)
if native:
fun = self._cl_exec.testMcSqrtNative
else:
fun = self._cl_exec.testMcSqrt
fun(self._cl_queue, [x.size], None,
self._uitype.type(x.size), cl_x, cl_out).wait()
out = np.zeros_like(x)
cl.enqueue_copy(self._cl_queue, out, cl_out).wait()
return out
[docs] def testMcRSqrt(self, x, native=True):
x = np.asarray(x, dtype=self._fptype)
mf = cl.mem_flags
cl_x = cl.Buffer(self._cl_context, mf.READ_ONLY |
mf.COPY_HOST_PTR, hostbuf=x)
cl_out = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
self._fptype.itemsize*x.size)
if native:
fun = self._cl_exec.testMcRSqrtNative
else:
fun = self._cl_exec.testMcRSqrt
fun(self._cl_queue, [x.size], None,
self._uitype.type(x.size), cl_x, cl_out).wait()
out = np.zeros_like(x)
cl.enqueue_copy(self._cl_queue, out, cl_out).wait()
return out
[docs] def testMcFpDiv(self, x, y, native=True):
mf = cl.mem_flags
x = np.asarray(x, dtype=self._fptype)
y = np.asarray(y, dtype=self._fptype)
if x.size != y.size:
raise ValueError('Division parameters must be of the same size!')
cl_x = cl.Buffer(self._cl_context, mf.READ_ONLY |
mf.COPY_HOST_PTR, hostbuf=x)
cl_y = cl.Buffer(self._cl_context, mf.READ_ONLY |
mf.COPY_HOST_PTR, hostbuf=y)
cl_out = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
self._fptype.itemsize*x.size)
if native:
fun = self._cl_exec.testMcFpDivNative
else:
fun = self._cl_exec.testMcFpDiv
fun(self._cl_queue, [x.size], None,
self._uitype.type(x.size), cl_x, cl_y, cl_out).wait()
out = np.zeros_like(x)
cl.enqueue_copy(self._cl_queue, out, cl_out).wait()
return out
[docs] def testMcLog(self, x, native=True):
x = np.asarray(x, dtype=self._fptype)
mf = cl.mem_flags
cl_x = cl.Buffer(self._cl_context, mf.READ_ONLY |
mf.COPY_HOST_PTR, hostbuf=x)
cl_out = cl.Buffer(self._cl_context, mf.WRITE_ONLY,
self._fptype.itemsize*x.size)
if native:
fun = self._cl_exec.testMcFpLogNative
else:
fun = self._cl_exec.testMcFpLog
fun(self._cl_queue, [x.size], None,
self._uitype.type(x.size), cl_x, cl_out).wait()
out = np.zeros_like(x)
cl.enqueue_copy(self._cl_queue, out, cl_out).wait()
return out
[docs] def devDataTypes(self):
mf = cl.mem_flags
s = np.zeros([16], dtype=np.uint32)
cl_s = cl.Buffer(self._cl_context, mf.WRITE_ONLY |
mf.COPY_HOST_PTR, hostbuf=s)
self._cl_exec.dataTypes(
self._cl_queue, [1], None,
cl_s)
cl.enqueue_copy(self._cl_queue, s, cl_s).wait()
return {'McInt':s[0], 'McUInt':s[1],
'McUInt32':s[2], 'McUInt64':s[3],
'McFluenceInt':s[4], 'McRtInt':s[5],
'McFloat':s[6], 'McSim':s[7], 'McPf':s[8]}
if __name__ == '__main__':
import os.path
import matplotlib.pyplot as pp
from xopto import USER_TMP_PATH
from xopto.mccyl.mcutil.fiber import MultimodeFiber, MultimodeFiberLut, FiberLayout
from xopto.mccyl.mcutil.lut import EmissionLut
from xopto.cl import clinfo
from xopto.pf import Hg, Mie
from xopto import make_user_dirs
class Coating(mcsurface.SurfaceLayoutAny):
def cl_type(self, mc: McObject) -> cltypes.Structure:
T = mc.types
class ClCoating(cltypes.Structure):
_fields_ = [
('absorption', T.mc_fp_t),
('window_height', T.mc_fp_t),
('beam_block_r', T.mc_fp_t),
('specular_fraction', T.mc_fp_t)
]
return ClCoating
def cl_declaration(self, mc: McObject) -> str:
'''
Structure that defines the source in the Monte Carlo simulator.
'''
Loc = self.location.capitalize()
return '\n'.join((
'struct MC_STRUCT_ATTRIBUTES Mc{}SurfaceLayout {{'.format(Loc),
' mc_fp_t absorption;',
' mc_fp_t window_height;',
' mc_fp_t beam_block_r;',
' mc_fp_t specular_fraction;',
'};'
))
def fetch_cl_implementation(self, mc: 'McObject') -> str:
loc = self.location
Loc = loc.capitalize()
return '\n'.join((
'void dbg_print_{}_surface_layout('.format(loc),
' __mc_surface_mem const Mc{}SurfaceLayout *layout){{'.format(Loc),
' dbg_print("Mc{}SurfaceLayout:");'.format(Loc),
' dbg_print_float(INDENT "absorption:", layout->absorption);',
' dbg_print_float(INDENT "window_height:", layout->window_height);',
' dbg_print_float(INDENT "beam_block_r:", layout->beam_block_r);',
' dbg_print_float(INDENT "specular_fraction:", layout->specular_fraction);',
'};',
'',
'inline int mcsim_{}_surface_layout_handler('.format(loc),
' McSim *psim, mc_int_t next_layer_index, ',
' mc_fp_t *n2, mc_fp_t *cc){',
'',
' __mc_surface_mem const Mc{}SurfaceLayout *layout = '.format(Loc),
' mcsim_{}_surface_layout(psim);'.format(loc),
' mc_point3f_t *pos = mcsim_position(psim);',
'',
' mc_int_t current_layer_index = mcsim_current_layer_index(psim);',
' mc_size_t num_layers = mcsim_layer_count(psim);',
'',
' if (!((next_layer_index == num_layers - 2 &&',
' current_layer_index == num_layers - 1) ||',
' (next_layer_index == num_layers - 1 &&',
' current_layer_index == num_layers - 2) || next_layer_index == 0)) {',
' return MC_SURFACE_LAYOUT_CONTINUE;',
' }',
'',
' mc_fp_t offset = pos->y*pos->y + pos->z*pos->z;'
' if (offset < mc_fsquare(layout->beam_block_r) && pos->x > FP_0) {',
' mcsim_adjust_weight(psim, mcsim_weight(psim)*layout->absorption);',
' } else if (mc_fabs(pos->z) < layout->window_height*0.5 && pos->y >= FP_0) {',
' return MC_SURFACE_LAYOUT_CONTINUE;',
' } else {',
' mcsim_adjust_weight(psim, mcsim_weight(psim)*layout->absorption);',
' }',
'',
' if (mcsim_weight(psim) <= FP_0)',
' return MC_SURFACE_LAYOUT_CONTINUE;'
'',
' mc_point3f_t new_dir;',
' mc_point3f_t normal;',
' radial_normal(pos, &normal);',
' reflect(mcsim_direction(psim), &normal, &new_dir);',
'',
' if (mcsim_random(psim) > layout->specular_fraction) {',
' scatter_direction(&new_dir, ',
' mc_sqrt(FP_1 - mcsim_random(psim)),',
' FP_2PI*mcsim_random(psim));',
' };',
'',
' mcsim_set_direction(psim, &new_dir);',
'',
' return MC_REFLECTED;',
'};'
))
def __init__(self, absorption: float = 0.99,
window_height: float = 1e-3,
beam_block_diameter: float = 1e-3,
specular: float = 1.0):
self._absorption = 1.0
self._window_height = 1e-3
self._beam_block_diameter = 1e-3
self._specular = 0.0
self._set_absorption(absorption)
self._set_specular(specular)
self._set_window_height(window_height)
self._set_beam_block_diameter(beam_block_diameter)
super().__init__()
def _get_window_height(self) -> float:
return self._window_height
def _set_window_height(self, height: float):
self._window_height = max(0.0, float(height))
window_height = property(_get_window_height, _set_window_height, None,
'Clear window height (m).')
def _get_beam_block_diameter(self) -> float:
return self._beam_block_diameter
def _set_beam_block_diameter(self, diameter: float):
self._beam_block_diameter = max(0.0, float(diameter))
beam_block_diameter = property(
_get_beam_block_diameter, _set_beam_block_diameter, None,
'Beam block coating spot diameter (m).')
def _get_absorption(self) -> float:
return self._absorption
def _set_absorption(self, absorption: float):
self._absorption = min(max(0.0, float(absorption)), 1.0)
absorption = property(_get_absorption, _set_absorption, None,
'Coating absorption as a fraction from 0 to 1.')
def _get_specular(self) -> float:
return self._specular
def _set_specular(self, absorption: float):
self._specular = min(max(0.0, float(absorption)), 1.0)
specular = property(_get_specular, _set_specular, None,
'Fraction of unabsorbed light that is reflected '
'specularly.')
def cl_pack(self, mc: McObject, target: cltypes.Structure = None) \
-> cltypes.Structure:
if target is None:
target_type = self.cl_type(mc)
target = target_type()
target.absorption = self._absorption
target.window_height = self._window_height
target.beam_block_r = self._beam_block_diameter*0.5
target.specular = self._specular
return target
def __str__(self):
return 'Coating(absorption={}, window_height={}, '\
'beam_block_diameter={}, specular={})'.format(
self._absorption, self._window_height,
self._beam_block_diameter, self._specular)
def __repr__(self):
return '{} #{}'.format(self.__str__(), id(self))
exportsrc = os.path.join(USER_TMP_PATH, 'mccyl.h')
make_user_dirs()
cl_device = clinfo.gpus()[0]
cl_device = clinfo.cl.CommandQueue(clinfo.cl.Context([cl_device]))
cl_build_options = [cloptions.FastMath]
pf = mcpf.Hg(0.7)
mie_pf = Mie(1.6, 1.33, 1.0e-6, 500e-9)
pf = mcpf.LutEx(Mie, (1.6, 1.33, 1.0e-6, 500e-9), lutsize=8000)
layers = mclayer.Layers([
mclayer.Layer(d=0.0, n=1.00, mua=0.0, mus=0.0, pf=pf),
mclayer.Layer(d=100.0e-3, n=1.00, mua=0.0, mus=0.0, pf=pf),
mclayer.Layer(d=2.0e-3, n=1.45, mua=0.0, mus=0.0e2, pf=pf),
mclayer.Layer(d=1.0e-3, n=1.33, mua=0.0, mus=0.25e2, pf=pf),
])
source = mcsource.Line()
#source = mcsource.GaussianBeam(50e-6)
#source = mcsource.UniformBeam(1e-6)
#source = mcsource.IsotropicPoint(0.0)
detectors = mcdetector.Detectors(
outer=mcdetector.FiZ(
mcdetector.Axis(-np.pi, np.pi, 1001),
mcdetector.Axis(-1.5e-3, 1.5e-3, 3),
),
specular=mcdetector.Total()
)
surface = mcsurface.SurfaceLayouts(
internal=Coating(beam_block_diameter=5.0e-6, absorption=1.0,
specular=0.0, window_height=1e-3),
outer=Coating(beam_block_diameter=5.0e-6, absorption=1.0,
specular=0.0, window_height=1e-3)
)
mc_obj = Mc(layers, source, detectors, surface=surface,
cl_devices=cl_device, cl_build_options=cl_build_options,
options=[mcoptions.McDebugMode.off, mcoptions.McUseSoft64Atomics.off,
mcoptions.McMinimumPacketWeight.default],
types=mctypes.McDataTypes)
mc_obj.rmax = 1000e-3
out = None
num_packets = 100e6
batch_packets = 100e6
num_packets_done = 0
while (num_packets_done < num_packets):
out = mc_obj.run(batch_packets, out, exportsrc=exportsrc, verbose=True)
num_packets_done += batch_packets
print('Processed {}/{}'.format(num_packets_done, num_packets))
_, _, detectors_res = out
outer = detectors_res.outer
reflectance = outer.reflectance[1, ...]
reflectance[-1] = reflectance[-2]
reflectance[0] = reflectance[1]
#center = int(reflectance.size//2)
#reflectance[center] = 0.5*(reflectance[center - 1] + reflectance[center + 1])
pf_ref = mie_pf(np.cos(outer.fi))
fig = pp.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax1.semilogy(np.rad2deg(outer.fi), reflectance/reflectance.max())
ax2.semilogy(np.rad2deg(outer.fi), reflectance/reflectance.max(), '-r', label='MC')
ax2.semilogy(np.rad2deg(outer.fi), pf_ref/pf_ref.max(), '-g', label='Mie')
ax2.legend()
k = pf_ref.max()/reflectance.max()
ax3.plot(np.rad2deg(outer.fi), (k*reflectance - pf_ref)/pf_ref)
r_sample = mc_obj.layers[1].d*0.5
eps = np.finfo(mc_obj.types.np_float).eps
f = mctrace.Filter(
dir=(np.cos(np.deg2rad(10.0)), 1.0 - eps, (-1.0, 0.0, 0.0)),
r=(r_sample - eps, r_sample + eps, (0.0, 0.0)),
z=(-0.5e-3, 0.5e-3), y=(0.0, np.inf)
)
trace = mctrace.Trace(maxlen=8, filter=f)
mc_obj = Mc(layers, source, trace=trace, surface=surface,
options=[mcoptions.McDebugMode.off])
mc_obj.rmax = 1000e-3
out = None
n = 0
batch_packets = 1000000
while out is None or len(out[0]) < 10:
out = mc_obj.run(batch_packets, out)
n += batch_packets
print('Processed {:,} packets, Collected traces: {:d}'.format(
n, len(out[0])))
trace_res = out[0]
ax = fig.add_subplot(2, 2, 4)
#fig = pp.figure()
#ax = fig.add_subplot(1, 1, 1)
for layer in mc_obj.layers[1:]:
c = pp.Circle((0.0, 0.0), layer.d*0.5, fill=False)
ax.add_artist(c)
ax.set_xlim(-r_sample, r_sample)
ax.set_ylim(-r_sample, r_sample)
trace_res.plot(ax=ax, view='xy')
ax.set_aspect('equal')
pp.show()