import os
os.environ['KAGGLE_USERNAME'] = 'yunseopsong'
os.environ['KAGGLE_KEY'] = 'af83501befeb0bedb4e8100f474816a7'
!kaggle datasets download -d thedagger/pokemon-generation-one
Dataset URL: https://www.kaggle.com/datasets/thedagger/pokemon-generation-one License(s): GPL-2.0 pokemon-generation-one.zip: Skipping, found more recently modified local copy (use --force to force download)
!unzip -q /content/pokemon-generation-one.zip
!kaggle datasets download -d hlrhegemony/pokemon-image-dataset
Dataset URL: https://www.kaggle.com/datasets/hlrhegemony/pokemon-image-dataset License(s): CC0-1.0 pokemon-image-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)
!unzip -q /content/pokemon-image-dataset.zip
!mv dataset train
!rm -rf train/dataset
!mv images validation
train_labels = os.listdir('train')
print(train_labels)
print(len(train_labels))
['Arcanine', 'Gyarados', 'Ponyta', 'Jigglypuff', 'Gloom', 'Lickitung', 'Hitmonlee', 'MrMime', 'Chansey', 'Weezing', 'Arbok', 'Kadabra', 'Exeggutor', 'Rhydon', 'Hypno', 'Oddish', 'Scyther', 'Abra', 'Graveler', 'Starmie', 'Ditto', 'Zapdos', 'Golem', 'Pinsir', 'Rhyhorn', 'Jolteon', 'Gengar', 'Articuno', 'Venomoth', 'Nidoqueen', 'Seaking', 'Onix', 'Persian', 'Aerodactyl', 'Tentacool', 'Charizard', 'Snorlax', 'Drowzee', 'Sandslash', 'Hitmonchan', 'Growlithe', 'Magneton', 'Flareon', 'Farfetchd', 'Clefable', 'Clefairy', 'Kabutops', 'Poliwhirl', 'Charmeleon', 'Spearow', 'Venonat', 'Slowpoke', 'Wigglytuff', 'Omanyte', 'Lapras', 'Wartortle', 'Cloyster', 'Gastly', 'Machamp', 'Nidoking', 'Haunter', 'Magmar', 'Krabby', 'Mewtwo', 'Porygon', 'Bellsprout', 'Pidgeotto', 'Geodude', 'Meowth', 'Kabuto', 'Golbat', 'Weedle', 'Seel', 'Dodrio', 'Dewgong', 'Butterfree', 'Pidgeot', 'Psyduck', 'Jynx', 'Victreebel', 'Slowbro', 'Grimer', 'Mew', 'Mankey', 'Shellder', 'Raticate', 'Fearow', 'Dragonair', 'Marowak', 'Parasect', 'Metapod', 'Venusaur', 'Muk', 'Tauros', 'Eevee', 'Exeggcute', 'Raichu', 'Voltorb', 'Magikarp', 'Pidgey', 'Pikachu', 'Poliwrath', 'Staryu', 'Tangela', 'Vileplume', 'Machoke', 'Koffing', 'Doduo', 'Kakuna', 'Electrode', 'Machop', 'Goldeen', 'Blastoise', 'Primeape', 'Alakazam', 'Seadra', 'Diglett', 'Tentacruel', 'Nidorina', 'Weepinbell', 'Ninetales', 'Vulpix', 'Rattata', 'Rapidash', 'Charmander', 'Golduck', 'Kingler', 'Moltres', 'Sandshrew', 'Caterpie', 'Dugtrio', 'Electabuzz', 'Nidorino', 'Squirtle', 'Kangaskhan', 'Vaporeon', 'Zubat', 'Magnemite', 'Paras', 'Dratini', 'Ekans', 'Ivysaur', 'Horsea', 'Beedrill', 'Omastar', 'Cubone', 'Dragonite', 'Bulbasaur', 'Poliwag'] 149
val_labels = os.listdir('validation')
print(val_labels)
print(len(val_labels))
['Arcanine', 'Gyarados', 'Ponyta', 'Jigglypuff', 'Gloom', 'Lickitung', 'Hitmonlee', 'MrMime', 'Chansey', 'Weezing', 'Arbok', 'Kadabra', 'Exeggutor', 'Rhydon', 'Hypno', 'Oddish', 'Scyther', 'Abra', 'Graveler', 'Starmie', 'Ditto', 'Zapdos', 'Golem', 'Pinsir', 'Rhyhorn', 'Jolteon', 'Gengar', 'Articuno', 'Venomoth', 'Nidoqueen', 'Seaking', 'Onix', 'Persian', 'Aerodactyl', 'Tentacool', 'Charizard', 'Snorlax', 'Drowzee', 'Sandslash', 'Hitmonchan', 'Growlithe', 'Magneton', 'Flareon', 'Farfetchd', 'Clefable', 'Clefairy', 'Kabutops', 'Poliwhirl', 'Charmeleon', 'Spearow', 'Venonat', 'Slowpoke', 'Wigglytuff', 'Omanyte', 'Lapras', 'images', 'Wartortle', 'Cloyster', 'Gastly', 'Machamp', 'Nidoking', 'Haunter', 'Magmar', 'Krabby', 'Mewtwo', 'Porygon', 'Bellsprout', 'Pidgeotto', 'Geodude', 'Meowth', 'Kabuto', 'Golbat', 'Weedle', 'Seel', 'Dodrio', 'Dewgong', 'Butterfree', 'Pidgeot', 'Psyduck', 'Jynx', 'Victreebel', 'Slowbro', 'Grimer', 'Mew', 'Mankey', 'Shellder', 'Raticate', 'Fearow', 'Dragonair', 'Marowak', 'Parasect', 'Metapod', 'Venusaur', 'Muk', 'Tauros', 'Eevee', 'Exeggcute', 'Raichu', 'Voltorb', 'Magikarp', 'Pidgey', 'Pikachu', 'Poliwrath', 'Staryu', 'Tangela', 'Vileplume', 'Machoke', 'Koffing', 'Doduo', 'Kakuna', 'Electrode', 'Machop', 'Goldeen', 'Blastoise', 'Primeape', 'Alakazam', 'Seadra', 'Diglett', 'Tentacruel', 'Nidorina', 'Weepinbell', 'Ninetales', 'Vulpix', 'Rattata', 'Rapidash', 'Charmander', 'Golduck', 'Kingler', 'Moltres', 'Sandshrew', 'Caterpie', 'Dugtrio', 'Electabuzz', 'Nidorino', 'Squirtle', 'Kangaskhan', 'Vaporeon', 'Zubat', 'Magnemite', 'Paras', 'Dratini', 'Ekans', 'Ivysaur', 'Horsea', 'Beedrill', 'Omastar', 'Cubone', 'Dragonite', 'Bulbasaur', 'Poliwag'] 150
import shutil
for val_label in val_labels:
if val_label not in train_labels:
shutil.rmtree(os.path.join('validation', val_label))
val_labels = os.listdir('validation')
print(val_labels)
print(len(val_labels))
['Arcanine', 'Gyarados', 'Ponyta', 'Jigglypuff', 'Gloom', 'Lickitung', 'Hitmonlee', 'MrMime', 'Chansey', 'Weezing', 'Arbok', 'Kadabra', 'Exeggutor', 'Rhydon', 'Hypno', 'Oddish', 'Scyther', 'Abra', 'Graveler', 'Starmie', 'Ditto', 'Zapdos', 'Golem', 'Pinsir', 'Rhyhorn', 'Jolteon', 'Gengar', 'Articuno', 'Venomoth', 'Nidoqueen', 'Seaking', 'Onix', 'Persian', 'Aerodactyl', 'Tentacool', 'Charizard', 'Snorlax', 'Drowzee', 'Sandslash', 'Hitmonchan', 'Growlithe', 'Magneton', 'Flareon', 'Farfetchd', 'Clefable', 'Clefairy', 'Kabutops', 'Poliwhirl', 'Charmeleon', 'Spearow', 'Venonat', 'Slowpoke', 'Wigglytuff', 'Omanyte', 'Lapras', 'Wartortle', 'Cloyster', 'Gastly', 'Machamp', 'Nidoking', 'Haunter', 'Magmar', 'Krabby', 'Mewtwo', 'Porygon', 'Bellsprout', 'Pidgeotto', 'Geodude', 'Meowth', 'Kabuto', 'Golbat', 'Weedle', 'Seel', 'Dodrio', 'Dewgong', 'Butterfree', 'Pidgeot', 'Psyduck', 'Jynx', 'Victreebel', 'Slowbro', 'Grimer', 'Mew', 'Mankey', 'Shellder', 'Raticate', 'Fearow', 'Dragonair', 'Marowak', 'Parasect', 'Metapod', 'Venusaur', 'Muk', 'Tauros', 'Eevee', 'Exeggcute', 'Raichu', 'Voltorb', 'Magikarp', 'Pidgey', 'Pikachu', 'Poliwrath', 'Staryu', 'Tangela', 'Vileplume', 'Machoke', 'Koffing', 'Doduo', 'Kakuna', 'Electrode', 'Machop', 'Goldeen', 'Blastoise', 'Primeape', 'Alakazam', 'Seadra', 'Diglett', 'Tentacruel', 'Nidorina', 'Weepinbell', 'Ninetales', 'Vulpix', 'Rattata', 'Rapidash', 'Charmander', 'Golduck', 'Kingler', 'Moltres', 'Sandshrew', 'Caterpie', 'Dugtrio', 'Electabuzz', 'Nidorino', 'Squirtle', 'Kangaskhan', 'Vaporeon', 'Zubat', 'Magnemite', 'Paras', 'Dratini', 'Ekans', 'Ivysaur', 'Horsea', 'Beedrill', 'Omastar', 'Cubone', 'Dragonite', 'Bulbasaur', 'Poliwag'] 149
for train_label in train_labels:
if train_label not in val_labels:
print(train_label)
for train_label in train_labels:
if train_label not in val_labels:
print(train_label)
os.makedirs(os.path.join('validation', train_label), exist_ok=True)
val_labels = os.listdir('validation')
len(val_labels)
149
import torch
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader
from torchvision import transforms, models, datasets
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
cpu
data_transforms = {
'train': transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
]),
'validation': transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
])
}
image_datasets = {
'train': datasets.ImageFolder('train', data_transforms['train']),
'validation': datasets.ImageFolder('validation', data_transforms['validation'])
}
dataloaders = {
'train': DataLoader(
image_datasets['train'],
batch_size=32,
shuffle=True
),
'validation': DataLoader(
image_datasets['validation'],
batch_size=32,
shuffle=False
)
}
print(len(image_datasets['train']), len(image_datasets['validation']))
10657 661
imgs, labels = next(iter(dataloaders['train']))
fig, axes = plt.subplots(4, 8, figsize=(16, 8))
for ax, img, label in zip(axes.flatten(), imgs, labels):
ax.imshow(img.permute(1, 2, 0))
ax.set_title(label.item())
ax.axis('off')
image_datasets['train'].classes[61]
'Kabutops'
2. EfficientNet¶
- 구글의 연구팀이 개발한 이미지 분류, 객체 검출 등 컴퓨터 비전 작업에서 높은 성능을 보여주는 신경망 모델
- 신경망의 깊이, 너비, 해상도를 동시에 확장하는 방법을 통해 효율성과 성능을 극대화한 것이 특징
- EfficientnetB4는 EfficientNet 시리즈의 중간 크기 모델
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
from torchvision.models._api import WeightsEnum
from torch.hub import load_state_dict_from_url
def get_state_dict(self, *args, **kwargs):
kwargs.pop("check_hash")
return load_state_dict_from_url(self.url, *args, **kwargs)
WeightsEnum.get_state_dict = get_state_dict
model = efficientnet_b4(weights=EfficientNet_B4_Weights.IMAGENET1K_V1).to(device)
model
EfficientNet( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.00625, mode=row) ) ) (2): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0125, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.018750000000000003, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.025, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.03125, mode=row) ) ) (3): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.037500000000000006, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.043750000000000004, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05625, mode=row) ) ) (4): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0625, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06875, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07500000000000001, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08125, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08750000000000001, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09375, mode=row) ) ) (5): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10625000000000001, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1125, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11875000000000001, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.125, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13125, mode=row) ) ) (6): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1375, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14375000000000002, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15000000000000002, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15625, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1625, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.16875, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17500000000000002, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.18125000000000002, mode=row) ) ) (7): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1875, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(448, 2688, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2688, 2688, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2688, bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2688, 112, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(112, 2688, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2688, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.19375, mode=row) ) ) (8): Conv2dNormActivation( (0): Conv2d(448, 1792, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1792, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): Dropout(p=0.4, inplace=True) (1): Linear(in_features=1792, out_features=1000, bias=True) ) )
for param in model.parameters():
param.requires_grad = False
model.classifier = nn.Sequential(
nn.Linear(1792, 512),
nn.ReLU(),
nn.Linear(512, 149)
).to(device)
print(model)
EfficientNet( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.00625, mode=row) ) ) (2): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0125, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.018750000000000003, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.025, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.03125, mode=row) ) ) (3): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.037500000000000006, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.043750000000000004, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05625, mode=row) ) ) (4): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0625, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06875, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07500000000000001, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08125, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08750000000000001, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09375, mode=row) ) ) (5): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10625000000000001, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1125, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11875000000000001, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.125, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13125, mode=row) ) ) (6): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1375, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14375000000000002, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15000000000000002, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15625, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1625, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.16875, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17500000000000002, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.18125000000000002, mode=row) ) ) (7): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1875, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(448, 2688, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2688, 2688, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2688, bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2688, 112, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(112, 2688, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2688, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.19375, mode=row) ) ) (8): Conv2dNormActivation( (0): Conv2d(448, 1792, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1792, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): Linear(in_features=1792, out_features=512, bias=True) (1): ReLU() (2): Linear(in_features=512, out_features=149, bias=True) ) )
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
epochs = 10
for epoch in range(epochs):
for phase in ['train', 'validation']:
if phase == 'train':
model.train()
else:
model.eval()
sum_losses = 0
sum_accs = 0
for x_batch, y_batch in dataloaders[phase]:
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
y_pred = model(x_batch)
loss = nn.CrossEntropyLoss()(y_pred, y_batch)
if phase == 'train':
optimizer.zero_grad()
loss.backward()
optimizer.step()
sum_losses = sum_losses + loss
y_prob = nn.Softmax(1)(y_pred)
y_pred_index = torch.argmax(y_prob, axis=1)
acc = (y_batch == y_pred_index).float().sum() / len(y_batch) * 100
sum_accs = sum_accs + acc
avg_loss = sum_losses / len(dataloaders[phase])
avg_acc = sum_accs / len(dataloaders[phase])
print(f'{phase:10s}: Epoch {epoch+1:4d}/{epochs} Loss: {avg_loss:.4f} Accuracy: {avg_acc:.2f}%')
# 학습된 모델 파일 저장
torch.save(model.state_dict(), 'model.pth') # mode.h5
torch.save(model.state_dict(), '/content/drive/MyDrive/KDT/model.pth') # mode.pt
model = models.efficientnet_b4().to(device)
model.classifier = nn.Sequential(
nn.Linear(1792, 512),
nn.ReLU(),
nn.Linear(512, 149)
).to(device)
print(model)
EfficientNet( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.00625, mode=row) ) ) (2): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0125, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.018750000000000003, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.025, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.03125, mode=row) ) ) (3): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.037500000000000006, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.043750000000000004, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05625, mode=row) ) ) (4): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0625, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06875, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07500000000000001, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08125, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08750000000000001, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09375, mode=row) ) ) (5): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10625000000000001, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1125, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11875000000000001, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.125, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13125, mode=row) ) ) (6): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1375, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14375000000000002, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15000000000000002, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15625, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1625, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.16875, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17500000000000002, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.18125000000000002, mode=row) ) ) (7): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1875, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(448, 2688, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2688, 2688, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2688, bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2688, 112, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(112, 2688, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2688, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.19375, mode=row) ) ) (8): Conv2dNormActivation( (0): Conv2d(448, 1792, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1792, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): Linear(in_features=1792, out_features=512, bias=True) (1): ReLU() (2): Linear(in_features=512, out_features=149, bias=True) ) )
model.load_state_dict(torch.load('/content/drive/MyDrive/KDT/model.pth', map_location=torch.device('cpu')))
<All keys matched successfully>
model.eval()
EfficientNet( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(24, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 24, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (2): Conv2dNormActivation( (0): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.00625, mode=row) ) ) (2): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False) (1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0125, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.018750000000000003, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.025, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.03125, mode=row) ) ) (3): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(192, 192, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=192, bias=False) (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(192, 8, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(8, 192, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(192, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.037500000000000006, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.043750000000000004, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.05625, mode=row) ) ) (4): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(56, 336, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(336, 336, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=336, bias=False) (1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(336, 14, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(14, 336, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(336, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.0625, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.06875, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.07500000000000001, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08125, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.08750000000000001, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.09375, mode=row) ) ) (5): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.10625000000000001, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1125, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.11875000000000001, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.125, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.13125, mode=row) ) ) (6): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(960, 960, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=960, bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(960, 40, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(40, 960, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(960, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1375, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.14375000000000002, mode=row) ) (2): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15000000000000002, mode=row) ) (3): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.15625, mode=row) ) (4): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1625, mode=row) ) (5): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.16875, mode=row) ) (6): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.17500000000000002, mode=row) ) (7): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 272, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(272, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.18125000000000002, mode=row) ) ) (7): Sequential( (0): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(272, 1632, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(1632, 1632, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1632, bias=False) (1): BatchNorm2d(1632, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(1632, 68, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(68, 1632, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(1632, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.1875, mode=row) ) (1): MBConv( (block): Sequential( (0): Conv2dNormActivation( (0): Conv2d(448, 2688, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (1): Conv2dNormActivation( (0): Conv2d(2688, 2688, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2688, bias=False) (1): BatchNorm2d(2688, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) (2): SqueezeExcitation( (avgpool): AdaptiveAvgPool2d(output_size=1) (fc1): Conv2d(2688, 112, kernel_size=(1, 1), stride=(1, 1)) (fc2): Conv2d(112, 2688, kernel_size=(1, 1), stride=(1, 1)) (activation): SiLU(inplace=True) (scale_activation): Sigmoid() ) (3): Conv2dNormActivation( (0): Conv2d(2688, 448, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (stochastic_depth): StochasticDepth(p=0.19375, mode=row) ) ) (8): Conv2dNormActivation( (0): Conv2d(448, 1792, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(1792, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): SiLU(inplace=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): Linear(in_features=1792, out_features=512, bias=True) (1): ReLU() (2): Linear(in_features=512, out_features=149, bias=True) ) )
from PIL import Image
img1 = Image.open('/content/validation/Snorlax/4.jpg')
img2 = Image.open('/content/validation/Diglett/0.jpg')
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
axes[0].imshow(img1)
axes[0].axis('off')
axes[1].imshow(img2)
axes[1].axis('off')
plt.show()
img1_input = data_transforms['validation'](img1)
img2_input = data_transforms['validation'](img2)
print(img1_input.shape, img2_input.shape)
torch.Size([3, 224, 224]) torch.Size([3, 224, 224])
test_batch = torch.stack([img1_input, img2_input])
test_batch = test_batch.to(device)
test_batch.shape
torch.Size([2, 3, 224, 224])
y_pred = model(test_batch)
y_pred
tensor([[-12.3355, -19.1657, -17.3220, -11.4766, -17.5933, -19.7273, -16.4533, -12.4838, -10.6494, -6.0002, -14.4120, -10.5674, -4.8804, -12.8456, -11.0897, -9.9797, -13.7729, -9.7524, -13.7067, -12.2937, -8.7874, -7.7965, -5.9157, -14.0524, -14.4868, -12.1902, -9.1547, -9.5367, -13.0657, -9.9462, -15.0470, -10.8846, -22.7273, -10.3076, -12.3195, -8.3360, -14.5196, -17.5471, -14.5633, -11.3744, -9.8083, -18.2458, -8.6211, -14.4517, -10.5110, -11.5894, -15.1230, -18.1716, -9.6653, -12.4287, -15.7958, -14.6574, -15.1740, -13.2210, -12.2254, -13.4726, -12.2067, -8.5887, -16.4669, -8.4709, -12.0692, -11.1317, -16.4824, -17.5232, -9.0371, -13.0496, -7.2943, -10.1757, -7.9661, -7.5792, -15.7986, -14.2540, -10.8185, -10.2317, -12.9899, -9.9184, -15.4790, -14.5658, -12.4241, -11.7794, -15.0605, -10.9818, -9.5735, -15.1559, -4.9230, -10.2876, -11.0175, -7.3400, -10.0570, -12.5725, -16.4764, -10.8594, -8.7421, -8.0056, -18.1494, -8.9928, -4.9692, -13.9577, -14.0431, -12.7145, -12.2622, -13.6984, -14.4218, -12.2230, -12.8535, -10.0103, -15.4253, -9.9938, -16.9759, -6.2636, -10.2474, -14.5638, -11.7147, -13.6770, -11.2658, -11.6618, -13.9839, -15.4932, -15.6473, -11.5801, -11.8283, -9.2160, -5.9149, -7.9887, -2.2132, 3.8272, -12.3023, -8.1957, -14.3656, -14.7489, -6.6971, -13.1221, -10.0852, -9.1065, -6.3734, -15.0059, -12.8915, -6.7055, -7.7615, -6.4845, -14.5464, -11.7838, -12.5831, -7.7781, -7.5725, -13.5920, -3.5962, -25.9976, -14.0492], [-16.2727, -18.7167, -18.7392, -12.5516, -11.9426, -20.8614, -17.8482, -10.3764, -10.1265, -7.1094, -14.3529, -7.3418, -7.3802, -17.1600, -7.1597, -12.7398, -18.7406, -13.2550, -15.3226, -10.3470, -7.7511, 7.9072, -4.8184, -12.9666, -12.5754, -9.2445, -12.1475, -11.9683, -13.3296, -0.1591, -15.4851, -5.0147, -21.6745, -10.7952, -11.3993, -7.6683, -12.4426, -13.3459, -14.0375, -10.8187, -9.0651, -11.9760, -10.9302, -14.7082, -10.3312, -8.8412, -10.2700, -13.5210, -7.3868, -10.6135, -12.9469, -16.6278, -12.0151, -11.2764, -9.7771, -12.9919, -7.4512, -11.7525, -14.9059, -5.4688, -3.8530, -9.5220, -15.3787, -9.0475, -10.0005, -15.5271, -7.4470, -7.8919, -8.8868, -8.1974, -14.5496, -12.0078, -6.0108, -10.0669, -13.0927, -10.1803, -16.2601, -12.8575, -13.5189, -13.7591, -11.1196, -11.2202, -12.8063, -16.2829, -12.0218, -11.0037, -14.2206, -10.5580, -10.1511, -13.6731, -12.1477, -6.8230, -3.9422, -8.9773, -12.2872, -7.2390, -5.7074, -14.3909, -14.9422, -13.8911, -9.6644, -14.8214, -15.1606, -8.9261, -10.5647, -11.5769, -14.4764, -7.2245, -13.3629, -4.0707, -11.3441, -16.5330, -14.3366, -10.8749, -12.8245, -11.0989, -13.5967, -12.7565, -18.7619, -10.5617, -15.2602, -7.3629, -6.2198, -9.6861, -5.1062, -7.2331, -11.1115, -2.9232, -11.9054, -10.0559, -5.9422, -10.6976, -7.0246, -7.2119, -11.4718, -15.5052, -6.0530, -5.4094, -9.4900, -6.9022, -10.9662, -15.0237, -10.3787, -5.2442, -7.5502, -14.6810, -8.0357, -21.8233, -15.0981]], grad_fn=<AddmmBackward0>)
y_prob = nn.Softmax(1)(y_pred)
y_prob
tensor([[9.5259e-08, 1.0294e-10, 6.5055e-10, 2.2486e-07, 4.9600e-10, 5.8709e-11, 1.5509e-09, 8.2128e-08, 5.1423e-07, 5.3740e-05, 1.1942e-08, 5.5818e-07, 1.6467e-04, 5.7196e-08, 3.3109e-07, 1.0046e-06, 2.2628e-08, 1.2610e-06, 2.4178e-08, 9.9330e-08, 3.3100e-06, 8.9163e-06, 5.8476e-05, 1.7110e-08, 1.1082e-08, 1.1016e-07, 2.2924e-06, 1.5646e-06, 4.5898e-08, 1.0389e-06, 6.3290e-09, 4.0648e-07, 2.9229e-12, 7.2379e-07, 9.6799e-08, 5.1983e-06, 1.0724e-08, 5.1943e-10, 1.0266e-08, 2.4906e-07, 1.1925e-06, 2.5828e-10, 3.9088e-06, 1.1478e-08, 5.9059e-07, 2.0088e-07, 5.8657e-09, 2.7817e-10, 1.3758e-06, 8.6782e-08, 2.9931e-09, 9.3438e-09, 5.5740e-09, 3.9297e-08, 1.0635e-07, 3.0556e-08, 1.0835e-07, 4.0376e-06, 1.5299e-09, 4.5424e-06, 1.2433e-07, 3.1747e-07, 1.5064e-09, 5.3203e-10, 2.5785e-06, 4.6644e-08, 1.4732e-05, 8.2580e-07, 7.5251e-06, 1.1080e-05, 2.9847e-09, 1.3987e-08, 4.3426e-07, 7.8086e-07, 4.9513e-08, 1.0682e-06, 4.1085e-09, 1.0240e-08, 8.7182e-08, 1.6612e-07, 6.2441e-09, 3.6883e-07, 1.5081e-06, 5.6758e-09, 1.5780e-04, 7.3840e-07, 3.5589e-07, 1.4075e-05, 9.2996e-07, 7.5158e-08, 1.5155e-09, 4.1686e-07, 3.4634e-06, 7.2336e-06, 2.8443e-10, 2.6954e-06, 1.5068e-04, 1.8811e-08, 1.7271e-08, 6.5209e-08, 1.0251e-07, 2.4379e-08, 1.1826e-08, 1.0660e-07, 5.6749e-08, 9.7439e-07, 4.3352e-09, 9.9054e-07, 9.1960e-10, 4.1295e-05, 7.6869e-07, 1.0260e-08, 1.7722e-07, 2.4906e-08, 2.7764e-07, 1.8685e-07, 1.8324e-08, 4.0508e-09, 3.4721e-09, 2.0276e-07, 1.5820e-07, 2.1563e-06, 5.8525e-05, 7.3572e-06, 2.3712e-03, 9.9606e-01, 9.8479e-08, 5.9811e-06, 1.2510e-08, 8.5270e-09, 2.6769e-05, 4.3379e-08, 9.0405e-07, 2.4058e-06, 3.7000e-05, 6.5946e-09, 5.4633e-08, 2.6545e-05, 9.2340e-06, 3.3111e-05, 1.0441e-08, 1.6539e-07, 7.4367e-08, 9.0817e-06, 1.1155e-05, 2.7117e-08, 5.9473e-04, 1.1105e-13, 1.7165e-08], [3.1524e-11, 2.7367e-12, 2.6759e-12, 1.3023e-09, 2.3943e-09, 3.2047e-13, 6.5223e-12, 1.1465e-08, 1.4720e-08, 3.0073e-07, 2.1498e-10, 2.3837e-07, 2.2941e-07, 1.2980e-11, 2.8598e-07, 1.0789e-09, 2.6720e-12, 6.4444e-10, 8.1522e-11, 1.1807e-08, 1.5831e-07, 9.9962e-01, 2.9727e-06, 8.5993e-10, 1.2716e-09, 3.5557e-08, 1.9506e-09, 2.3334e-09, 5.9812e-10, 3.1383e-04, 6.9293e-11, 2.4431e-06, 1.4213e-13, 7.5422e-09, 4.1222e-09, 1.7199e-07, 1.4522e-09, 5.8848e-10, 2.9469e-10, 7.3667e-09, 4.2545e-08, 2.3155e-09, 6.5898e-09, 1.5069e-10, 1.1995e-08, 5.3225e-08, 1.2752e-08, 4.9393e-10, 2.2790e-07, 9.0449e-09, 8.7701e-10, 2.2101e-11, 2.2268e-09, 4.6613e-09, 2.0875e-08, 8.3843e-10, 2.1368e-07, 2.8956e-09, 1.2366e-10, 1.5514e-06, 7.8064e-06, 2.6942e-08, 7.7071e-11, 4.3301e-08, 1.6696e-08, 6.6443e-11, 2.1458e-07, 1.3752e-07, 5.0850e-08, 1.0132e-07, 1.7659e-10, 2.2432e-09, 9.0226e-07, 1.5623e-08, 7.5803e-10, 1.3948e-08, 3.1922e-11, 9.5902e-10, 4.9497e-10, 3.8931e-10, 5.4523e-09, 4.9307e-09, 1.0094e-09, 3.1204e-11, 2.2120e-09, 6.1228e-09, 2.4538e-10, 9.5609e-09, 1.4361e-08, 4.2426e-10, 1.9504e-09, 4.0048e-07, 7.1399e-06, 4.6450e-08, 1.6963e-09, 2.6420e-07, 1.2221e-06, 2.0696e-10, 1.1925e-10, 3.4114e-10, 2.3365e-08, 1.3456e-10, 9.5858e-11, 4.8890e-08, 9.4966e-09, 3.4515e-09, 1.9001e-10, 2.6805e-07, 5.7853e-10, 6.2791e-06, 4.3561e-09, 2.4299e-11, 2.1850e-10, 6.9640e-09, 9.9124e-10, 5.5666e-09, 4.5793e-10, 1.0610e-09, 2.6158e-12, 9.5257e-09, 8.6769e-11, 2.3341e-07, 7.3204e-07, 2.2864e-08, 2.2295e-06, 2.6576e-07, 5.4969e-09, 1.9781e-05, 2.4849e-09, 1.5796e-08, 9.6633e-07, 8.3148e-09, 3.2738e-07, 2.7146e-07, 3.8341e-09, 6.7913e-11, 8.6500e-07, 1.6462e-06, 2.7818e-08, 3.6998e-07, 6.3562e-09, 1.0991e-10, 1.1439e-08, 1.9420e-06, 1.9354e-07, 1.5485e-10, 1.1910e-07, 1.2247e-13, 1.0203e-10]], grad_fn=<SoftmaxBackward0>)
probs, idx = torch.topk(y_prob, k=3)
print(probs)
print(idx)
tensor([[9.9606e-01, 2.3712e-03, 5.9473e-04], [9.9962e-01, 3.1383e-04, 1.9781e-05]], grad_fn=<TopkBackward0>) tensor([[125, 124, 146], [ 21, 29, 127]])
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
axes[0].set_title('{:.2f}% {}, {:.2f}%{}, {:.2f}%{}'.format(
probs[0][0] * 100,
image_datasets['validation'].classes[idx[0][0]],
probs[0][1] * 100,
image_datasets['validation'].classes[idx[0][1]],
probs[0][2] * 100,
image_datasets['validation'].classes[idx[0][2]],
))
axes[0].imshow(img1)
axes[0].axis('off')
axes[1].set_title('{:.2f}% {}, {:.2f}%{}, {:.2f}%{}'.format(
probs[1][0] * 100,
image_datasets['validation'].classes[idx[1][0]],
probs[1][1] * 100,
image_datasets['validation'].classes[idx[1][1]],
probs[1][2] * 100,
image_datasets['validation'].classes[idx[1][2]],
))
axes[1].imshow(img2)
axes[1].axis('off')
(-0.5, 359.5, 332.5, -0.5)
'코딩 > 머신러닝과 딥러닝' 카테고리의 다른 글
전이학습 (0) | 2024.07.17 |
---|---|
간단한 CNN 모델 만들기 (0) | 2024.07.17 |
CNN 기초 (0) | 2024.07.17 |
비선형 활성화 함수 (1) | 2024.07.17 |
딥러닝 (0) | 2024.07.17 |