Skip to content

Commit

Permalink
Merge pull request #1190 from dogvane/master
Browse files Browse the repository at this point in the history
解决keras模式下,使用GPU训练时会爆显存的bug。
  • Loading branch information
Oceania2018 authored Oct 8, 2023
2 parents 43c3705 + baf620a commit 090dc1e
Show file tree
Hide file tree
Showing 20 changed files with 983 additions and 20 deletions.
Binary file added data/img001.bmp
Binary file not shown.
7 changes: 7 additions & 0 deletions src/TensorFlowNET.Core/APIs/tf.image.cs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,13 @@ public Tensor decode_image(Tensor contents, int channels = 0, TF_DataType dtype
=> image_ops_impl.decode_image(contents, channels: channels, dtype: dtype,
name: name, expand_animations: expand_animations);

public Tensor encode_png(Tensor contents, string name = null)
=> image_ops_impl.encode_png(contents, name: name);

public Tensor encode_jpeg(Tensor contents, string name = null)
=> image_ops_impl.encode_jpeg(contents, name: name);


/// <summary>
/// Convenience function to check if the 'contents' encodes a JPEG image.
/// </summary>
Expand Down
7 changes: 7 additions & 0 deletions src/TensorFlowNET.Core/APIs/tf.io.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.

using System.Collections.Generic;
using Tensorflow.IO;
using Tensorflow.Operations;

namespace Tensorflow
{
Expand Down Expand Up @@ -46,6 +47,12 @@ public Operation save_v2(Tensor prefix, string[] tensor_names,
public Tensor[] restore_v2(Tensor prefix, string[] tensor_names,
string[] shape_and_slices, TF_DataType[] dtypes, string name = null)
=> ops.restore_v2(prefix, tensor_names, shape_and_slices, dtypes, name: name);

public Operation write_file(string filename, Tensor conentes, string name = null)
=> write_file(Tensorflow.ops.convert_to_tensor(filename, TF_DataType.TF_STRING), conentes, name);

public Operation write_file(Tensor filename, Tensor conentes, string name = null)
=> gen_ops.write_file(filename, conentes, name);
}

public GFile gfile = new GFile();
Expand Down
5 changes: 5 additions & 0 deletions src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ BackwardFunction GetGradientFunction(string op_name,
Tensor[] op_outputs)
=> (out_grads, unneeded_gradients) =>
{
if(!ops.gradientFunctions.ContainsKey(op_name))
{
throw new Exception($"gradientFunctions not find op_name: {op_name}");
}

if (ops.gradientFunctions[op_name] == null)
return new Tensor[op_inputs.Length];

Expand Down
31 changes: 31 additions & 0 deletions src/TensorFlowNET.Core/Gradients/nn_grad.cs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,37 @@ public static Tensor[] _Conv2DGrad(Operation op, Tensor[] grads)
};
}

/// <summary>
/// Gradient function for Conv2D.
/// </summary>
/// <param name="op"></param>
/// <param name="grads"></param>
/// <returns></returns>
[RegisterGradient("DepthwiseConv2dNative")]
public static Tensor[] _DepthwiseConv2DGrad(Operation op, Tensor[] grads)
{
var dilations = op.get_attr_list<int>("dilations");
var strides = op.get_attr_list<int>("strides");
var padding = op.get_attr<string>("padding");
var explicit_paddings = op.get_attr_list<int>("explicit_paddings");
var data_format = op.get_attr<string>("data_format");
var shape = gen_array_ops.shape_n(new Tensor[] { op.inputs[0], op.inputs[1] });

return new Tensor[]
{
gen_nn_ops.depthwise_conv2d_native_backprop_input(
shape[0], op.inputs[1], grads[0],
strides, padding, explicit_paddings,
dilations: dilations,
data_format: data_format),
gen_nn_ops.depthwise_conv2d_native_backprop_filter(op.inputs[0], shape[1], grads[0],
strides, padding,
dilations: dilations,
explicit_paddings: explicit_paddings,
data_format: data_format)
};
}

[RegisterGradient("FusedBatchNorm")]
public static Tensor[] _FusedBatchNormGrad(Operation op, Tensor[] grads)
=> _BaseFusedBatchNormGrad(op, 0, grads);
Expand Down
23 changes: 23 additions & 0 deletions src/TensorFlowNET.Core/Keras/Engine/IModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ ICallback fit(NDArray x, NDArray y,
List<ICallback> callbacks = null,
float validation_split = 0f,
ValidationDataPack validation_data = null,
int validation_step = 10,
bool shuffle = true,
Dictionary<int, float> class_weight = null,
NDArray sample_weight = null,
Expand All @@ -47,6 +48,20 @@ ICallback fit(IEnumerable<NDArray> x, NDArray y,
int workers = 1,
bool use_multiprocessing = false);

public ICallback fit(IDatasetV2 dataset,
int batch_size = -1,
int epochs = 1,
int verbose = 1,
List<ICallback> callbacks = null,
IDatasetV2 validation_data = null,
int validation_step = 10, // 间隔多少次会进行一次验证
bool shuffle = true,
Dictionary<int, float> class_weight = null,
int initial_epoch = 0,
int max_queue_size = 10,
int workers = 1,
bool use_multiprocessing = false);

void save(string filepath,
bool overwrite = true,
bool include_optimizer = true,
Expand Down Expand Up @@ -85,6 +100,14 @@ Tensors predict(Tensors x,
int workers = 1,
bool use_multiprocessing = false);

public Tensors predict(IDatasetV2 dataset,
int batch_size = -1,
int verbose = 0,
int steps = -1,
int max_queue_size = 10,
int workers = 1,
bool use_multiprocessing = false);

void summary(int line_length = -1, float[] positions = null);

IKerasConfig get_config();
Expand Down
19 changes: 19 additions & 0 deletions src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ public ILayer Conv1D(int filters,
string kernel_initializer = "glorot_uniform",
string bias_initializer = "zeros");

public ILayer Conv2D(int filters,
Shape kernel_size = null,
Shape strides = null,
string padding = "valid"
);

public ILayer Conv2D(int filters,
Shape kernel_size = null,
Shape strides = null,
Expand Down Expand Up @@ -95,6 +101,19 @@ public ILayer Conv2D(int filters,
bool use_bias = true,
string kernel_initializer = "glorot_uniform",
string bias_initializer = "zeros");
public ILayer DepthwiseConv2D(Shape kernel_size = null,
Shape strides = null,
string padding = "valid",
string data_format = null,
Shape dilation_rate = null,
int groups = 1,
int depth_multiplier = 1,
string activation = null,
bool use_bias = false,
string kernel_initializer = "glorot_uniform",
string bias_initializer = "zeros",
string depthwise_initializer = "glorot_uniform"
);

public ILayer Dense(int units);
public ILayer Dense(int units,
Expand Down
43 changes: 32 additions & 11 deletions src/TensorFlowNET.Core/Operations/image_ops_impl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat
{
throw new ValueError("\'image\' must be fully defined.");
}
var dims = image_shape["-3:"];
var dims = new Shape(new[] {
image_shape.dims[image_shape.dims.Length - 3],
image_shape.dims[image_shape.dims.Length - 2],
image_shape.dims[image_shape.dims.Length - 1]});
foreach (var dim in dims.dims)
{
if (dim == 0)
Expand All @@ -112,16 +115,18 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat
}

var image_shape_last_three_elements = new Shape(new[] {
image_shape.dims[image_shape.dims.Length - 1],
image_shape.dims[image_shape.dims.Length - 3],
image_shape.dims[image_shape.dims.Length - 2],
image_shape.dims[image_shape.dims.Length - 3]});
image_shape.dims[image_shape.dims.Length - 1]});
if (!image_shape_last_three_elements.IsFullyDefined)
{
Tensor image_shape_ = array_ops.shape(image);
var image_shape_return = tf.constant(new[] {
image_shape_.dims[image_shape.dims.Length - 1],
image_shape_.dims[image_shape.dims.Length - 2],
image_shape_.dims[image_shape.dims.Length - 3]});
var image_shape_return = tf.slice(image_shape_, new[] { Math.Max(image_shape.dims.Length - 3, 0) }, new[] { 3 });

//var image_shape_return = tf.constant(new[] {
// image_shape_.dims[image_shape_.dims.Length - 3],
// image_shape_.dims[image_shape_.dims.Length - 2],
// image_shape_.dims[image_shape_.dims.Length - 1]});

return new Operation[] {
check_ops.assert_positive(
Expand Down Expand Up @@ -209,10 +214,10 @@ internal static Tensor _random_flip(Tensor image, int flip_index, int seed, stri
}

public static Tensor flip_left_right(Tensor image)
=> _flip(image, 0, "flip_left_right");
=> _flip(image, 1, "flip_left_right");

public static Tensor flip_up_down(Tensor image)
=> _flip(image, 1, "flip_up_down");
=> _flip(image, 0, "flip_up_down");

internal static Tensor _flip(Tensor image, int flip_index, string scope_name)
{
Expand All @@ -223,11 +228,11 @@ internal static Tensor _flip(Tensor image, int flip_index, string scope_name)
Shape shape = image.shape;
if (shape.ndim == 3 || shape.ndim == Unknown)
{
return fix_image_flip_shape(image, gen_array_ops.reverse(image, ops.convert_to_tensor(new int[] { flip_index })));
return fix_image_flip_shape(image, gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new int[] { flip_index })));
}
else if (shape.ndim == 4)
{
return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { (flip_index + 1) % 2 }));
return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { flip_index + 1 }));
}
else
{
Expand Down Expand Up @@ -2047,6 +2052,22 @@ internal static (Tensor, Tensor) non_max_suppression_padded_v1(Tensor boxes, Ten
});
}

public static Tensor encode_jpeg(Tensor contents, string name = null)
{
return tf_with(ops.name_scope(name, "encode_jpeg"), scope =>
{
return gen_ops.encode_jpeg(contents, name:name);
});
}

public static Tensor encode_png(Tensor contents, string name = null)
{
return tf_with(ops.name_scope(name, "encode_png"), scope =>
{
return gen_ops.encode_png(contents, name: name);
});
}

public static Tensor is_jpeg(Tensor contents, string name = null)
{
return tf_with(ops.name_scope(name, "is_jpeg"), scope =>
Expand Down
5 changes: 4 additions & 1 deletion src/TensorFlowNET.Core/Tensors/tensor_util.cs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
case sbyte val:
tensor_proto.IntVal.AddRange(new[] { (int)val });
break;
case byte val:
tensor_proto.IntVal.AddRange(new[] { (int)val });
break;
case int val:
tensor_proto.IntVal.AddRange(new[] { val });
break;
Expand All @@ -262,7 +265,7 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
tensor_proto.DoubleVal.AddRange(new[] { val });
break;
default:
throw new Exception("make_tensor_proto Not Implemented");
throw new Exception($"make_tensor_proto Not Implemented {values.GetType().Name}");
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ Dictionary<string, float> evaluate(DataHandler data_handler, CallbackList callba
var end_step = step + data_handler.StepIncrement;
if (!is_val)
callbacks.on_test_batch_end(end_step, logs);
GC.Collect();
}
}
callbacks.on_test_end(logs);
Expand Down Expand Up @@ -167,7 +168,9 @@ Dictionary<string, float> test_step_multi_inputs_function(DataHandler data_handl
Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y)
{
(x,y) = data_handler.DataAdapter.Expand1d(x, y);

var y_pred = Apply(x, training: false);

var loss = compiled_loss.Call(y, y_pred);
compiled_metrics.update_state(y, y_pred);
return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2);
Expand Down
12 changes: 6 additions & 6 deletions src/TensorFlowNET.Keras/Engine/Model.Fit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public ICallback fit(NDArray x, NDArray y,
List<ICallback> callbacks = null,
float validation_split = 0f,
ValidationDataPack validation_data = null,
int validation_step = 10,
bool shuffle = true,
Dictionary<int, float> class_weight = null,
NDArray sample_weight = null,
Expand Down Expand Up @@ -147,7 +148,7 @@ public ICallback fit(IEnumerable<NDArray> x, NDArray y,
}
}

public History fit(IDatasetV2 dataset,
public ICallback fit(IDatasetV2 dataset,
int batch_size = -1,
int epochs = 1,
int verbose = 1,
Expand All @@ -156,7 +157,6 @@ public History fit(IDatasetV2 dataset,
int validation_step = 10,
bool shuffle = true,
Dictionary<int, float> class_weight = null,
NDArray sample_weight = null,
int initial_epoch = 0,
int max_queue_size = 10,
int workers = 1,
Expand All @@ -170,7 +170,7 @@ public History fit(IDatasetV2 dataset,
InitialEpoch = initial_epoch,
Epochs = epochs,
Shuffle = shuffle,
SampleWeight = sample_weight,
ClassWeight = class_weight,
MaxQueueSize = max_queue_size,
Workers = workers,
UseMultiprocessing = use_multiprocessing,
Expand Down Expand Up @@ -218,6 +218,7 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
var end_step = step + data_handler.StepIncrement;
End_step = end_step;
callbacks.on_train_batch_end(end_step, logs);
GC.Collect();
}

if (validation_data != null)
Expand All @@ -233,11 +234,10 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
callbacks.on_train_batch_end(End_step, logs);
}

GC.Collect();

callbacks.on_epoch_end(epoch, logs);

GC.Collect();
GC.WaitForPendingFinalizers();
if (stop_training)
{
break;
Expand Down Expand Up @@ -282,6 +282,7 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
var end_step = step + data_handler.StepIncrement;
End_step = end_step;
callbacks.on_train_batch_end(end_step, logs);
GC.Collect();
}

if (validation_data != null)
Expand All @@ -301,7 +302,6 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
callbacks.on_epoch_end(epoch, logs);

GC.Collect();
GC.WaitForPendingFinalizers();
if (stop_training)
{
break;
Expand Down
2 changes: 1 addition & 1 deletion src/TensorFlowNET.Keras/Engine/Model.Predict.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ Tensors PredictInternal(DataHandler data_handler, int verbose)
for (int i = 0; i < batch_outputs.Length; i++)
batch_outputs[i] = tf.concat(new Tensor[] { batch_outputs[i], tmp_batch_outputs[i] }, axis: 0);
}

var end_step = step + data_handler.StepIncrement;
callbacks.on_predict_batch_end(end_step, new Dictionary<string, Tensors> { { "outputs", batch_outputs } });
GC.Collect();
}
}

Expand Down
Loading

0 comments on commit 090dc1e

Please sign in to comment.