forked from abhi-kumar/Caffe-Python-Basic-Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtry_h5.jl
More file actions
61 lines (50 loc) · 1.98 KB
/
try_h5.jl
File metadata and controls
61 lines (50 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# convert binary into HDF5 data
using HDF5
datasets = [("train", ["Dataset/Train"]),
("test", ["Dataset/Test"])]
const width = 28
const height = 28
const channels = 3
const batch_size = 10000
mean_model = zeros(Float32, width, height, channels, 1)
for (key, sources) in datasets
h5open("$key.hdf5", "w") do h5
dset_data = d_create(h5, "data", datatype(Float32),
dataspace(width, height, channels, batch_size * length(sources)))
dset_label = d_create(h5, "label", datatype(Float32),
dataspace(1, batch_size * length(sources)))
for n = 1:length(sources)
open("$(sources[n])") do f
println("Processing $(sources[n])...")
mat = readbytes(f, (1 + width*height*channels) * batch_size)
mat = reshape(mat, 1+width*height*channels, batch_size)
# random shuffle within batch
rp = randperm(batch_size)
label = convert(Array{Float32},mat[1, rp])
# If I divide by 256 as in the MNIST example, then
# training on the giving DNN gives me random
# performance: objective function not changing,
# and test performance is always 10%...
# The same results could be observed when
# running Caffe, as our HDF5 dataset is
# compatible with Caffe.
img = convert(Array{Float32},mat[2:end, rp])
img = reshape(img, width, height, channels, batch_size)
if key == "train"
# only accumulate mean from the training data
global mean_model
mean_model = (batch_size*mean_model + sum(img, 4)) / (n*batch_size)
end
index = (n-1)*batch_size+1:n*batch_size
dset_data[:,:,:,index] = img
dset_label[:,index] = label
end
end
# but apply mean subtraction for both training and testing data
println("Subtracting the mean...")
for n = 1:length(sources)
index = (n-1)*batch_size+1:n*batch_size
dset_data[:,:,:,index] = dset_data[:,:,:,index] .- mean_model
end
end
end