提交 88f771ca authored 作者: notoraptor's avatar notoraptor

Update specific tests for runtime algorithms.

上级 771a39c4
...@@ -832,25 +832,31 @@ class BaseTestDnnConv(object): ...@@ -832,25 +832,31 @@ class BaseTestDnnConv(object):
def run_fwd_runtime_algorithm(algo): def run_fwd_runtime_algorithm(algo):
inputs = theano.tensor.TensorType(dtype, _broadcastable)() inputs = theano.tensor.TensorType(dtype, _broadcastable)()
filters = theano.tensor.TensorType(dtype, _broadcastable)() filters = theano.tensor.TensorType(dtype, _broadcastable)()
inputs /= 10 # Scale down the input values to prevent very large absolute errors
filters /= 10 # due to float rounding
conv = dnn_conv(img=inputs, kerns=filters, algo=algo, precision=dtype, lower_inputs = inputs / 10
lower_filters = filters / 10
conv = dnn_conv(img=lower_inputs, kerns=lower_filters, algo=algo, precision=dtype,
subsample=unit_shape, dilation=unit_shape) subsample=unit_shape, dilation=unit_shape)
f = theano.function([inputs, filters], conv, mode=mode_with_gpu) f = theano.function([inputs, filters], conv, mode=mode_with_gpu)
if self.ndim == 3: if self.ndim == 3:
flipped_filters = filters[:, :, ::-1, ::-1, ::-1] flipped_filters = lower_filters[:, :, ::-1, ::-1, ::-1]
else: else:
flipped_filters = filters[:, :, ::-1, ::-1] flipped_filters = lower_filters[:, :, ::-1, ::-1]
conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(inputs), flipped_filters) conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(lower_inputs), flipped_filters)
f_ref = theano.function([inputs, filters], conv_ref, mode='FAST_RUN') f_ref = theano.function([inputs, filters], conv_ref, mode='FAST_RUN')
for ntimes, (inputs_shape, filters_shape) in self.runtime_shapes: runtime_shapes = self.runtime_shapes
if algo in ('time_once', 'guess_once'):
runtime_shapes = [list(runtime_shapes[0])]
runtime_shapes[0][0] = 5
for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
print('Shapes:', inputs_shape, filters_shape) print('Shapes:', inputs_shape, filters_shape)
for i in range(ntimes): for i in range(ntimes):
inputs_val = np.random.random(inputs_shape).astype(dtype) inputs_val = np.random.random(inputs_shape).astype(dtype)
filters_val = np.random.random(filters_shape).astype(dtype) filters_val = np.random.random(filters_shape).astype(dtype)
gpu_res = f(inputs_val, filters_val) gpu_res = f(inputs_val, filters_val)
cpu_res = f_ref(inputs_val, filters_val) cpu_res = f_ref(inputs_val, filters_val)
utt.assert_allclose(cpu_res, gpu_res) utt.assert_allclose(cpu_res, np.asarray(gpu_res))
for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME: for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME:
yield (run_fwd_runtime_algorithm, algo) yield (run_fwd_runtime_algorithm, algo)
...@@ -864,8 +870,6 @@ class BaseTestDnnConv(object): ...@@ -864,8 +870,6 @@ class BaseTestDnnConv(object):
theano.config.dnn.conv.algo_bwd_data = algo theano.config.dnn.conv.algo_bwd_data = algo
inputs = theano.tensor.TensorType(dtype, _broadcastable)() inputs = theano.tensor.TensorType(dtype, _broadcastable)()
filters = theano.tensor.TensorType(dtype, _broadcastable)() filters = theano.tensor.TensorType(dtype, _broadcastable)()
inputs /= 10
filters /= 10
conv = dnn_conv(img=inputs, kerns=filters, algo=algo, precision=dtype, conv = dnn_conv(img=inputs, kerns=filters, algo=algo, precision=dtype,
subsample=unit_shape, dilation=unit_shape) subsample=unit_shape, dilation=unit_shape)
grad_i = theano.tensor.grad(conv.sum(), [inputs]) grad_i = theano.tensor.grad(conv.sum(), [inputs])
...@@ -880,14 +884,18 @@ class BaseTestDnnConv(object): ...@@ -880,14 +884,18 @@ class BaseTestDnnConv(object):
conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(inputs), flipped_filters) conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(inputs), flipped_filters)
grad_i_ref = theano.tensor.grad(conv_ref.sum(), [inputs]) grad_i_ref = theano.tensor.grad(conv_ref.sum(), [inputs])
f_ref = theano.function([inputs, filters], grad_i_ref, mode='FAST_RUN') f_ref = theano.function([inputs, filters], grad_i_ref, mode='FAST_RUN')
for ntimes, (inputs_shape, filters_shape) in self.runtime_shapes: runtime_shapes = self.runtime_shapes
if algo in ('time_once', 'guess_once'):
runtime_shapes = [list(runtime_shapes[0])]
runtime_shapes[0][0] = 5
for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
print('Shapes:', inputs_shape, filters_shape) print('Shapes:', inputs_shape, filters_shape)
for i in range(ntimes): for i in range(ntimes):
inputs_val = np.random.random(inputs_shape).astype(dtype) inputs_val = np.random.random(inputs_shape).astype(dtype)
filters_val = np.random.random(filters_shape).astype(dtype) filters_val = np.random.random(filters_shape).astype(dtype)
gpu_res = f(inputs_val, filters_val) gpu_res = f(inputs_val, filters_val)
cpu_res = f_ref(inputs_val, filters_val) cpu_res = f_ref(inputs_val, filters_val)
utt.assert_allclose(cpu_res, gpu_res) utt.assert_allclose(cpu_res, np.asarray(gpu_res))
for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME: for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME:
yield (run_gradinput_runtime_algorithm, algo) yield (run_gradinput_runtime_algorithm, algo)
...@@ -901,8 +909,6 @@ class BaseTestDnnConv(object): ...@@ -901,8 +909,6 @@ class BaseTestDnnConv(object):
theano.config.dnn.conv.algo_bwd_filter = algo theano.config.dnn.conv.algo_bwd_filter = algo
inputs = theano.tensor.TensorType(dtype, _broadcastable)() inputs = theano.tensor.TensorType(dtype, _broadcastable)()
filters = theano.tensor.TensorType(dtype, _broadcastable)() filters = theano.tensor.TensorType(dtype, _broadcastable)()
inputs /= 10
filters /= 10
conv = dnn_conv(img=inputs, kerns=filters, algo=algo, precision=dtype, conv = dnn_conv(img=inputs, kerns=filters, algo=algo, precision=dtype,
subsample=unit_shape, dilation=unit_shape) subsample=unit_shape, dilation=unit_shape)
grad_w = theano.tensor.grad(conv.sum(), [filters]) grad_w = theano.tensor.grad(conv.sum(), [filters])
...@@ -917,14 +923,18 @@ class BaseTestDnnConv(object): ...@@ -917,14 +923,18 @@ class BaseTestDnnConv(object):
conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(inputs), flipped_filters) conv_ref = self.cpu_conv_class(subsample=unit_shape)(ref_cast(inputs), flipped_filters)
grad_w_ref = theano.tensor.grad(conv_ref.sum(), [filters]) grad_w_ref = theano.tensor.grad(conv_ref.sum(), [filters])
f_ref = theano.function([inputs, filters], grad_w_ref, mode='FAST_RUN') f_ref = theano.function([inputs, filters], grad_w_ref, mode='FAST_RUN')
for ntimes, (inputs_shape, filters_shape) in self.runtime_shapes: runtime_shapes = self.runtime_shapes
if algo in ('time_once', 'guess_once'):
runtime_shapes = [list(runtime_shapes[0])]
runtime_shapes[0][0] = 5
for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
print('Shapes:', inputs_shape, filters_shape) print('Shapes:', inputs_shape, filters_shape)
for i in range(ntimes): for i in range(ntimes):
inputs_val = np.random.random(inputs_shape).astype(dtype) inputs_val = np.random.random(inputs_shape).astype(dtype)
filters_val = np.random.random(filters_shape).astype(dtype) filters_val = np.random.random(filters_shape).astype(dtype)
gpu_res = f(inputs_val, filters_val) gpu_res = f(inputs_val, filters_val)
cpu_res = f_ref(inputs_val, filters_val) cpu_res = f_ref(inputs_val, filters_val)
utt.assert_allclose(cpu_res, gpu_res) utt.assert_allclose(cpu_res, np.asarray(gpu_res))
for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME: for algo in SUPPORTED_DNN_CONV_ALGO_RUNTIME:
yield (run_gradweight_runtime_algorithm, algo) yield (run_gradweight_runtime_algorithm, algo)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论