diff --git a/thesis/Main.pdf b/thesis/Main.pdf index 682a17d..89b8e17 100644 Binary files a/thesis/Main.pdf and b/thesis/Main.pdf differ diff --git a/thesis/Main.tex b/thesis/Main.tex index 45d860e..dd4afdf 100755 --- a/thesis/Main.tex +++ b/thesis/Main.tex @@ -1027,7 +1027,7 @@ The LeNet-inspired autoencoder can be split into an encoder network (figure~\ref The LeNet-inspired encoder network (see figure~\ref{fig:setup_arch_lenet_encoder}) is a compact convolutional neural network that reduces image data into a lower-dimensional latent space. It consists of two stages of convolution, normalization, non-linear activation, and pooling, followed by a dense layer that defines the latent representation. Conceptually, the convolutional layers learn small filters that detect visual patterns in the input (such as edges or textures). Batch normalization ensures that these learned signals remain numerically stable during training, while a LeakyReLU activation introduces non-linearity, allowing the network to capture more complex relationships. Pooling operations then downsample the feature maps, which reduces the spatial size of the data and emphasizes the most important features. Finally, a dense layer transforms the extracted feature maps into the latent space, which serves as the datas' representation in the reduced dimensionality latent space. -Concretely, the first convolutional layer uses a $3\times 3$ kernel with 8 output channels, corresponding to 8 learnable filters. For input images of size $1\times 2048\times 32$, this produces an intermediate representation of shape $8\times 2048\times 32$, which is reduced to $8\times 1024\times 16$ by a $2\times 2$ pooling layer. The second convolutional layer again applies a $3\times 3$ kernel but outputs 4 channels, followed by another pooling step, resulting in a feature map of shape $4\times 512\times 8$. This feature map is flattened and passed into a fully connected layer. The dimensionality of the output of this layer corresponds to the latent space, whose size is a tunable hyperparameter chosen according to the needs of the application. +%Concretely, the first convolutional layer uses a $5\times 5$ kernel with 8 output channels, corresponding to 8 learnable filters. For input images of size $1\times 2048\times 32$, this produces an intermediate representation of shape $8\times 2048\times 32$, which is reduced to $8\times 1024\times 16$ by a $2\times 2$ pooling layer. The second convolutional layer again applies a $5\times 5$ kernel but outputs 4 channels, followed by another pooling step, resulting in a feature map of shape $4\times 512\times 8$. This feature map is flattened and passed into a fully connected layer. The dimensionality of the output of this layer corresponds to the latent space, whose size is a tunable hyperparameter chosen according to the needs of the application. % Its decoder network (see figure~\ref{fig:setup_arch_lenet_decoder}) is a mirrored version of the encoder, with a dense layer after the latent space and two pairs of 2x2 upsampling and transpose convolution layers which use 4 and 8 input channels respectively with the second one reducing its output to one channel resulting in the 2048x32x1 output dimensionality, equal to the input's, which is required for the autoencoding objective to be possible. diff --git a/thesis/diagrams/arch_ef_decoder.pdf b/thesis/diagrams/arch_ef_decoder.pdf index b1fe401..c0771bd 100644 Binary files a/thesis/diagrams/arch_ef_decoder.pdf and b/thesis/diagrams/arch_ef_decoder.pdf differ diff --git a/thesis/diagrams/arch_ef_encoder.pdf b/thesis/diagrams/arch_ef_encoder.pdf index bbc4d1b..aad9c33 100644 Binary files a/thesis/diagrams/arch_ef_encoder.pdf and b/thesis/diagrams/arch_ef_encoder.pdf differ diff --git a/thesis/diagrams/arch_lenet_decoder.pdf b/thesis/diagrams/arch_lenet_decoder.pdf index 4da78ab..0f05f99 100644 Binary files a/thesis/diagrams/arch_lenet_decoder.pdf and b/thesis/diagrams/arch_lenet_decoder.pdf differ diff --git a/thesis/diagrams/arch_lenet_encoder.pdf b/thesis/diagrams/arch_lenet_encoder.pdf index 48fa4c6..3967660 100644 Binary files a/thesis/diagrams/arch_lenet_encoder.pdf and b/thesis/diagrams/arch_lenet_encoder.pdf differ diff --git a/thesis/third_party/PlotNeuralNet/deepsad/Makefile b/thesis/third_party/PlotNeuralNet/deepsad/Makefile index 1efed0a..8309bfc 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/Makefile +++ b/thesis/third_party/PlotNeuralNet/deepsad/Makefile @@ -1,6 +1,6 @@ # ====== CONFIG ====== # Add names (without extension). Example: NAMES = report thesis notes -NAMES = subter_lenet_arch subter_ef_arch arch_ef_encoder arch_ef_decoder arch_lenet_encoder arch_lenet_decoder +NAMES = arch_ef_encoder arch_ef_decoder arch_lenet_encoder arch_lenet_decoder TEX = $(NAMES:%=%.tex) PDF = $(NAMES:%=%.pdf) @@ -10,7 +10,7 @@ PDF = $(NAMES:%=%.pdf) .PRECIOUS: %.tex # Default: build all PDFs -all: $(PDF) +all: $(PDF) $(TEX) # ====== Rules ====== # Generate {name}.tex from {name}.py diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.py b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.py index cb44f02..ea32b65 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.py +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.py @@ -38,6 +38,7 @@ arch = [ to_fc( "fc3", n_filer="{{8×128×8}}", + zlabeloffset=0.5, offset="(2,0,0)", to="(latent-east)", height=H1, @@ -47,20 +48,22 @@ arch = [ ), to_Conv( "unsqueeze", - s_filer="", + s_filer="{{128×8}}", + zlabeloffset=0.4, n_filer=32, offset="(2,0,0)", to="(fc3-east)", height=H8, depth=D128, width=W32, - caption="unsqueeze", + caption="Unsqueeze", ), # to_connection("latent", "fc3"), # Reshape to 4×8×512 to_UnPool( "up1", offset="(2,0,0)", + n_filer=32, to="(unsqueeze-east)", height=H16, depth=D256, @@ -76,11 +79,12 @@ arch = [ height=H16, depth=D256, width=W1, - caption="deconv1", + caption="Deconv1", ), to_Conv( "dwdeconv2", s_filer="{{256×16}}", + zlabeloffset=0.4, n_filer=32, offset="(0,0,0)", to="(dwdeconv1-east)", @@ -93,6 +97,7 @@ arch = [ "up2", offset="(2,0,0)", to="(dwdeconv2-east)", + n_filer=32, height=H16, depth=D1024, width=W32, @@ -107,11 +112,12 @@ arch = [ height=H16, depth=D1024, width=W1, - caption="deconv2", + caption="Deconv2", ), to_Conv( "dwdeconv4", s_filer="{{1024×16}}", + zlabeloffset=0.17, n_filer=16, offset="(0,0,0)", to="(dwdeconv3-east)", @@ -123,6 +129,7 @@ arch = [ to_UnPool( "up3", offset="(2,0,0)", + n_filer=16, to="(dwdeconv4-east)", height=H32, depth=D2048, @@ -138,11 +145,12 @@ arch = [ height=H32, depth=D2048, width=W1, - caption="deconv3", + caption="Deconv3", ), to_Conv( "dwdeconv6", s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=8, offset="(0,0,0)", to="(dwdeconv5-east)", @@ -154,26 +162,28 @@ arch = [ to_Conv( "outconv", s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=1, offset="(2,0,0)", to="(dwdeconv6-east)", height=H32, depth=D2048, width=W1, - caption="deconv4", + caption="Deconv4", ), # to_connection("up2", "deconv2"), # Output to_Conv( "out", s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=1, offset="(2,0,0)", to="(outconv-east)", height=H32, depth=D2048, width=W1, - caption="output", + caption="Output", ), # to_connection("deconv2", "out"), to_end(), diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.tex b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.tex index 254c5ba..86b682d 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.tex +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_decoder.tex @@ -29,6 +29,7 @@ name=latent, caption=Latent Space, xlabel={{, }}, + zlabeloffset=0.3, zlabel=latent dim, fill=\ConvColor, height=19.200000000000003, @@ -43,6 +44,7 @@ name=fc3, caption=FC, xlabel={{" ","dummy"}}, + zlabeloffset=0.5, zlabel={{8×128×8}}, fill=\FcColor, opacity=0.8, @@ -56,9 +58,10 @@ \pic[shift={(2,0,0)}] at (fc3-east) {Box={ name=unsqueeze, - caption=unsqueeze, + caption=Unsqueeze, xlabel={{32, }}, - zlabel=, + zlabeloffset=0.4, + zlabel={{128×8}}, fill=\ConvColor, height=12, width=8, @@ -73,6 +76,7 @@ caption=, fill=\UnpoolColor, opacity=0.5, + xlabel={{32, }}, height=18, width=8, depth=12 @@ -83,8 +87,9 @@ \pic[shift={(0,0,0)}] at (up1-east) {Box={ name=dwdeconv1, - caption=deconv1, + caption=Deconv1, xlabel={{1, }}, + zlabeloffset=0.3, zlabel=, fill=\ConvColor, height=18, @@ -99,6 +104,7 @@ name=dwdeconv2, caption=, xlabel={{32, }}, + zlabeloffset=0.4, zlabel={{256×16}}, fill=\ConvColor, height=18, @@ -114,6 +120,7 @@ caption=, fill=\UnpoolColor, opacity=0.5, + xlabel={{32, }}, height=18, width=8, depth=36 @@ -124,8 +131,9 @@ \pic[shift={(0,0,0)}] at (up2-east) {Box={ name=dwdeconv3, - caption=deconv2, + caption=Deconv2, xlabel={{1, }}, + zlabeloffset=0.3, zlabel=, fill=\ConvColor, height=18, @@ -140,6 +148,7 @@ name=dwdeconv4, caption=, xlabel={{16, }}, + zlabeloffset=0.17, zlabel={{1024×16}}, fill=\ConvColor, height=18, @@ -155,6 +164,7 @@ caption=, fill=\UnpoolColor, opacity=0.5, + xlabel={{16, }}, height=26, width=4, depth=52 @@ -165,8 +175,9 @@ \pic[shift={(0,0,0)}] at (up3-east) {Box={ name=dwdeconv5, - caption=deconv3, + caption=Deconv3, xlabel={{1, }}, + zlabeloffset=0.3, zlabel=, fill=\ConvColor, height=26, @@ -181,6 +192,7 @@ name=dwdeconv6, caption=, xlabel={{8, }}, + zlabeloffset=0.15, zlabel={{2048×32}}, fill=\ConvColor, height=26, @@ -193,8 +205,9 @@ \pic[shift={(2,0,0)}] at (dwdeconv6-east) {Box={ name=outconv, - caption=deconv4, + caption=Deconv4, xlabel={{1, }}, + zlabeloffset=0.15, zlabel={{2048×32}}, fill=\ConvColor, height=26, @@ -207,8 +220,9 @@ \pic[shift={(2,0,0)}] at (outconv-east) {Box={ name=out, - caption=output, + caption=Output, xlabel={{1, }}, + zlabeloffset=0.15, zlabel={{2048×32}}, fill=\ConvColor, height=26, diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.py b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.py index 1bb7b26..4cbb489 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.py +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.py @@ -25,6 +25,7 @@ arch = [ # Input 1×32×2048 (caption carries H×W; s_filer is numeric) to_Conv( "input", + zlabeloffset=0.2, s_filer="{{2048×32}}", n_filer=1, offset="(0,0,0)", @@ -32,7 +33,7 @@ arch = [ height=H32, depth=D2048, width=W1, - caption="input", + caption="Input", ), # Conv1 (5x5, same): 1->8, 32×2048 to_Conv( @@ -48,20 +49,23 @@ arch = [ ), to_Conv( "dwconv2", - s_filer="", + s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=16, offset="(0,0,0)", to="(dwconv1-east)", height=H32, depth=D2048, width=W16, - caption="conv1", + caption="Conv1", ), # Pool1 2×2: 32×2048 -> 16×1024 # to_connection("input", "conv1"), to_Pool( "pool1", offset="(0,0,0)", + zlabeloffset=0.3, + s_filer="{{512×32}}", to="(dwconv2-east)", height=H32, depth=D512, @@ -82,20 +86,23 @@ arch = [ ), to_Conv( "dwconv4", - s_filer="", n_filer=32, + zlabeloffset=0.3, + s_filer="{{512×32}}", offset="(0,0,0)", to="(dwconv3-east)", height=H32, depth=D512, width=W32, - caption="conv2", + caption="Conv2", ), # Pool2 2×2: 16×1024 -> 8×512 # to_connection("pool1", "conv2"), to_Pool( "pool2", offset="(0,0,0)", + zlabeloffset=0.45, + s_filer="{{256×16}}", to="(dwconv4-east)", height=H16, depth=D256, @@ -105,6 +112,8 @@ arch = [ to_Pool( "pool3", offset="(0,0,0)", + zlabeloffset=0.45, + s_filer="{{128×8}}", to="(pool2-east)", height=H8, depth=D128, @@ -113,19 +122,21 @@ arch = [ ), to_Conv( "squeeze", - s_filer="", n_filer=8, + zlabeloffset=0.45, + s_filer="{{128×8}}", offset="(2,0,0)", to="(pool3-east)", height=H8, depth=D128, width=W8, - caption="squeeze", + caption="Squeeze", ), # FC -> rep_dim (use numeric n_filer) to_fc( "fc1", n_filer="{{8×128×8}}", + zlabeloffset=0.5, offset="(2,0,0)", to="(squeeze-east)", height=H1, diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.tex b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.tex index 7f84007..7e812f2 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.tex +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_ef_encoder.tex @@ -27,8 +27,9 @@ \pic[shift={(0,0,0)}] at (0,0,0) {Box={ name=input, - caption=input, + caption=Input, xlabel={{1, }}, + zlabeloffset=0.2, zlabel={{2048×32}}, fill=\ConvColor, height=26, @@ -43,6 +44,7 @@ name=dwconv1, caption=, xlabel={{1, }}, + zlabeloffset=0.3, zlabel=, fill=\ConvColor, height=26, @@ -55,9 +57,10 @@ \pic[shift={(0,0,0)}] at (dwconv1-east) {Box={ name=dwconv2, - caption=conv1, + caption=Conv1, xlabel={{16, }}, - zlabel=, + zlabeloffset=0.15, + zlabel={{2048×32}}, fill=\ConvColor, height=26, width=4, @@ -69,6 +72,9 @@ \pic[shift={ (0,0,0) }] at (dwconv2-east) {Box={ name=pool1, + xlabel={{, }}, + zlabeloffset=0.3, + zlabel={{512×32}}, caption=, fill=\PoolColor, opacity=0.5, @@ -84,6 +90,7 @@ name=dwconv3, caption=, xlabel={{1, }}, + zlabeloffset=0.3, zlabel=, fill=\ConvColor, height=26, @@ -96,9 +103,10 @@ \pic[shift={(0,0,0)}] at (dwconv3-east) {Box={ name=dwconv4, - caption=conv2, + caption=Conv2, xlabel={{32, }}, - zlabel=, + zlabeloffset=0.3, + zlabel={{512×32}}, fill=\ConvColor, height=26, width=8, @@ -110,6 +118,9 @@ \pic[shift={ (0,0,0) }] at (dwconv4-east) {Box={ name=pool2, + xlabel={{, }}, + zlabeloffset=0.45, + zlabel={{256×16}}, caption=, fill=\PoolColor, opacity=0.5, @@ -123,6 +134,9 @@ \pic[shift={ (0,0,0) }] at (pool2-east) {Box={ name=pool3, + xlabel={{, }}, + zlabeloffset=0.45, + zlabel={{128×8}}, caption=, fill=\PoolColor, opacity=0.5, @@ -136,9 +150,10 @@ \pic[shift={(2,0,0)}] at (pool3-east) {Box={ name=squeeze, - caption=squeeze, + caption=Squeeze, xlabel={{8, }}, - zlabel=, + zlabeloffset=0.45, + zlabel={{128×8}}, fill=\ConvColor, height=12, width=2, @@ -152,6 +167,7 @@ name=fc1, caption=FC, xlabel={{" ","dummy"}}, + zlabeloffset=0.5, zlabel={{8×128×8}}, fill=\FcColor, opacity=0.8, @@ -167,6 +183,7 @@ name=latent, caption=Latent Space, xlabel={{, }}, + zlabeloffset=0.3, zlabel=latent dim, fill=\ConvColor, height=19.200000000000003, diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.py b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.py index 1bfa877..50f1400 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.py +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.py @@ -38,6 +38,7 @@ arch = [ to_fc( "fc3", n_filer="{{4×512×8}}", + zlabeloffset=0.35, offset="(2,0,0)", to="(latent-east)", height=1.3, @@ -49,6 +50,7 @@ arch = [ # Reshape to 4×8×512 to_UnPool( "up1", + n_filer=4, offset="(2,0,0)", to="(fc3-east)", height=H16, @@ -61,6 +63,7 @@ arch = [ to_Conv( "deconv1", s_filer="{{1024×16}}", + zlabeloffset=0.2, n_filer=8, offset="(0,0,0)", to="(up1-east)", @@ -74,6 +77,7 @@ arch = [ to_UnPool( "up2", offset="(2,0,0)", + n_filer=8, to="(deconv1-east)", height=H32, depth=D2048, @@ -85,6 +89,7 @@ arch = [ to_Conv( "deconv2", s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=1, offset="(0,0,0)", to="(up2-east)", @@ -98,6 +103,7 @@ arch = [ to_Conv( "out", s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=1, offset="(2,0,0)", to="(deconv2-east)", diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.tex b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.tex index 81ffeff..e56e824 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.tex +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_decoder.tex @@ -29,6 +29,7 @@ name=latent, caption=Latent Space, xlabel={{, }}, + zlabeloffset=0.3, zlabel=latent dim, fill=\ConvColor, height=19.200000000000003, @@ -43,6 +44,7 @@ name=fc3, caption=FC, xlabel={{" ","dummy"}}, + zlabeloffset=0.35, zlabel={{4×512×8}}, fill=\FcColor, opacity=0.8, @@ -59,6 +61,7 @@ caption=, fill=\UnpoolColor, opacity=0.5, + xlabel={{4, }}, height=18, width=2, depth=36 @@ -71,6 +74,7 @@ name=deconv1, caption=Deconv1, xlabel={{8, }}, + zlabeloffset=0.2, zlabel={{1024×16}}, fill=\ConvColor, height=18, @@ -86,6 +90,7 @@ caption=, fill=\UnpoolColor, opacity=0.5, + xlabel={{8, }}, height=26, width=4, depth=52 @@ -98,6 +103,7 @@ name=deconv2, caption=Deconv2, xlabel={{1, }}, + zlabeloffset=0.15, zlabel={{2048×32}}, fill=\ConvColor, height=26, @@ -112,6 +118,7 @@ name=out, caption=Output, xlabel={{1, }}, + zlabeloffset=0.15, zlabel={{2048×32}}, fill=\ConvColor, height=26, diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.py b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.py index 35ece2c..401d8a3 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.py +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.py @@ -26,6 +26,7 @@ arch = [ to_Conv( "input", s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=1, offset="(0,0,0)", to="(0,0,0)", @@ -37,7 +38,8 @@ arch = [ # Conv1 (5x5, same): 1->8, 32×2048 to_Conv( "conv1", - s_filer="{{1024×16}}", + s_filer="{{2048×32}}", + zlabeloffset=0.15, n_filer=8, offset="(2,0,0)", to="(input-east)", @@ -51,6 +53,8 @@ arch = [ to_Pool( "pool1", offset="(0,0,0)", + s_filer="{{1024×16}}", + zlabeloffset=0.3, to="(conv1-east)", height=H16, depth=D1024, @@ -60,7 +64,8 @@ arch = [ # Conv2 (5x5, same): 8->4, stays 16×1024 to_Conv( "conv2", - s_filer="{{512×8}}", + s_filer="{{1024×16\hspace{2.5em}512×8}}", + zlabeloffset=0.4, n_filer=4, offset="(2,0,0)", to="(pool1-east)", @@ -73,7 +78,9 @@ arch = [ # to_connection("pool1", "conv2"), to_Pool( "pool2", + s_filer="{{}}", offset="(0,0,0)", + zlabeloffset=0.3, to="(conv2-east)", height=H8, depth=D512, @@ -85,6 +92,7 @@ arch = [ "fc1", n_filer="{{4×512×8}}", offset="(2,0,0)", + zlabeloffset=0.5, to="(pool2-east)", height=1.3, depth=D512, diff --git a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.tex b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.tex index c85a2d6..478f525 100644 --- a/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.tex +++ b/thesis/third_party/PlotNeuralNet/deepsad/arch_lenet_encoder.tex @@ -29,6 +29,7 @@ name=input, caption=Input, xlabel={{1, }}, + zlabeloffset=0.15, zlabel={{2048×32}}, fill=\ConvColor, height=26, @@ -43,7 +44,8 @@ name=conv1, caption=Conv1, xlabel={{8, }}, - zlabel={{1024×16}}, + zlabeloffset=0.15, + zlabel={{2048×32}}, fill=\ConvColor, height=26, width=4, @@ -55,6 +57,9 @@ \pic[shift={ (0,0,0) }] at (conv1-east) {Box={ name=pool1, + xlabel={{, }}, + zlabeloffset=0.3, + zlabel={{1024×16}}, caption=, fill=\PoolColor, opacity=0.5, @@ -70,7 +75,8 @@ name=conv2, caption=Conv2, xlabel={{4, }}, - zlabel={{512×8}}, + zlabeloffset=0.4, + zlabel={{1024×16\hspace{2.5em}512×8}}, fill=\ConvColor, height=18, width=2, @@ -82,6 +88,9 @@ \pic[shift={ (0,0,0) }] at (conv2-east) {Box={ name=pool2, + xlabel={{, }}, + zlabeloffset=0.3, + zlabel={{}}, caption=, fill=\PoolColor, opacity=0.5, @@ -97,6 +106,7 @@ name=fc1, caption=FC, xlabel={{" ","dummy"}}, + zlabeloffset=0.5, zlabel={{4×512×8}}, fill=\FcColor, opacity=0.8, @@ -112,6 +122,7 @@ name=latent, caption=Latent Space, xlabel={{, }}, + zlabeloffset=0.3, zlabel=latent dim, fill=\ConvColor, height=19.200000000000003, diff --git a/thesis/third_party/PlotNeuralNet/layers/Box.sty b/thesis/third_party/PlotNeuralNet/layers/Box.sty index 87c40b5..9c9609e 100644 --- a/thesis/third_party/PlotNeuralNet/layers/Box.sty +++ b/thesis/third_party/PlotNeuralNet/layers/Box.sty @@ -42,7 +42,16 @@ \coordinate (a1) at (0 , \y/2 , \z/2); \coordinate (b1) at (0 ,-\y/2 , \z/2); -\tikzstyle{depthlabel}=[pos=0.2,text width=14*\z,text centered,sloped] + +\tikzset{depthlabel/.style={pos=\zlabeloffset, text width=14*\z, text centered, sloped}} + +%\tikzstyle{depthlabel}=[pos=0.3,text width=14*\z,text centered,sloped] +%\tikzstyle{depthlabel0}=[pos=0,text width=14*\z,text centered,sloped] +%\tikzstyle{depthlabel1}=[pos=0.1,text width=14*\z,text centered,sloped] +%\tikzstyle{depthlabel2}=[pos=0.2,text width=14*\z,text centered,sloped] +%\tikzstyle{depthlabel3}=[pos=0.3,text width=14*\z,text centered,sloped] +%\tikzstyle{depthlabel4}=[pos=0.4,text width=14*\z,text centered,sloped] +%\tikzstyle{depthlabel5}=[pos=0.5,text width=14*\z,text centered,sloped] \path (c) edge ["\small\zlabel"',depthlabel](f); %depth label \path (b1) edge ["\ylabel",midway] (a1); %height label @@ -92,6 +101,7 @@ scale/.store in=\scale, xlabel/.store in=\boxlabels, ylabel/.store in=\ylabel, zlabel/.store in=\zlabel, +zlabeloffset/.store in=\zlabeloffset, caption/.store in=\caption, name/.store in=\name, fill/.store in=\fill, @@ -105,6 +115,7 @@ scale=.2, xlabel={{"","","","","","","","","",""}}, ylabel=, zlabel=, +zlabeloffset=0.3, caption=, name=, } diff --git a/thesis/third_party/PlotNeuralNet/pycore/tikzeng.py b/thesis/third_party/PlotNeuralNet/pycore/tikzeng.py index 51c17ba..0d67102 100644 --- a/thesis/third_party/PlotNeuralNet/pycore/tikzeng.py +++ b/thesis/third_party/PlotNeuralNet/pycore/tikzeng.py @@ -69,6 +69,7 @@ def to_Conv( s_filer=256, n_filer=64, offset="(0,0,0)", + zlabeloffset=0.3, to="(0,0,0)", width=1, height=40, @@ -92,6 +93,9 @@ def to_Conv( xlabel={{""" + str(n_filer) + """, }}, + zlabeloffset=""" + + str(zlabeloffset) + + """, zlabel=""" + str(s_filer) + """, @@ -168,7 +172,10 @@ def to_ConvConvRelu( # Pool def to_Pool( name, + n_filer="", + s_filer="", offset="(0,0,0)", + zlabeloffset=0.3, to="(0,0,0)", width=1, height=32, @@ -187,6 +194,15 @@ def to_Pool( name=""" + name + """, + xlabel={{""" + + str(n_filer) + + """, }}, + zlabeloffset=""" + + str(zlabeloffset) + + """, + zlabel=""" + + str(s_filer) + + """, caption=""" + caption + r""", @@ -212,6 +228,7 @@ def to_Pool( # unpool4, def to_UnPool( name, + n_filer="", offset="(0,0,0)", to="(0,0,0)", width=1, @@ -238,6 +255,9 @@ def to_UnPool( opacity=""" + str(opacity) + """, + xlabel={{""" + + str(n_filer) + + """, }}, height=""" + str(height) + """, @@ -360,6 +380,7 @@ def to_SoftMax( depth=25, opacity=0.8, caption=" ", + z_label_offset=0, ): return ( r""" @@ -428,6 +449,7 @@ def to_fc( name, n_filer=120, offset="(0,0,0)", + zlabeloffset=0.3, to="(0,0,0)", width=2, height=2, @@ -450,6 +472,9 @@ def to_fc( + caption + """, xlabel={{" ","dummy"}}, + zlabeloffset=""" + + str(zlabeloffset) + + """, zlabel=""" + str(n_filer) + """,