Spaces:

FatemehT
/

apec-segment

Sleeping

App Files Files Community

FatemehT commited on Oct 18

Commit

8e6512c

1 Parent(s): a8ab7ac

style: run pre-commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +9 -0
angioPyFunctions.py +70 -61
normalize_k1.py +3 -1
predict.py +30 -14
segmentation_models_pytorch/.github/workflows/tests.yml +2 -2
segmentation_models_pytorch/.gitignore +1 -1
segmentation_models_pytorch/HALLOFFAME.md +30 -31
segmentation_models_pytorch/README.md +8 -8
segmentation_models_pytorch/__init__.py +1 -1
segmentation_models_pytorch/docker/Dockerfile +1 -1
segmentation_models_pytorch/docs/conf.py +35 -26
segmentation_models_pytorch/docs/insights.rst +8 -8
segmentation_models_pytorch/docs/install.rst +1 -1
segmentation_models_pytorch/docs/losses.rst +1 -1
segmentation_models_pytorch/docs/models.rst +0 -2
segmentation_models_pytorch/docs/quickstart.rst +1 -1
segmentation_models_pytorch/docs/requirements.txt +1 -1
segmentation_models_pytorch/misc/generate_table.py +6 -2
segmentation_models_pytorch/segmentation_models_pytorch/__init__.py +34 -23
segmentation_models_pytorch/segmentation_models_pytorch/__version__.py +1 -1
segmentation_models_pytorch/segmentation_models_pytorch/base/__init__.py +2 -11
segmentation_models_pytorch/segmentation_models_pytorch/base/heads.py +20 -9
segmentation_models_pytorch/segmentation_models_pytorch/base/initialization.py +0 -1
segmentation_models_pytorch/segmentation_models_pytorch/base/model.py +1 -1
segmentation_models_pytorch/segmentation_models_pytorch/base/modules.py +55 -34
segmentation_models_pytorch/segmentation_models_pytorch/deeplabv3/__init__.py +1 -1
segmentation_models_pytorch/segmentation_models_pytorch/deeplabv3/decoder.py +19 -14
segmentation_models_pytorch/segmentation_models_pytorch/deeplabv3/model.py +40 -45
segmentation_models_pytorch/segmentation_models_pytorch/efficientunetplusplus/decoder.py +96 -61
segmentation_models_pytorch/segmentation_models_pytorch/efficientunetplusplus/model.py +21 -19
segmentation_models_pytorch/segmentation_models_pytorch/encoders/__init__.py +23 -14
segmentation_models_pytorch/segmentation_models_pytorch/encoders/_base.py +5 -4
segmentation_models_pytorch/segmentation_models_pytorch/encoders/_preprocessing.py +0 -1
segmentation_models_pytorch/segmentation_models_pytorch/encoders/densenet.py +20 -11
segmentation_models_pytorch/segmentation_models_pytorch/encoders/dpn.py +7 -6
segmentation_models_pytorch/segmentation_models_pytorch/encoders/efficientnet.py +8 -10
segmentation_models_pytorch/segmentation_models_pytorch/encoders/inceptionresnetv2.py +8 -5
segmentation_models_pytorch/segmentation_models_pytorch/encoders/inceptionv4.py +13 -9
segmentation_models_pytorch/segmentation_models_pytorch/encoders/mobilenet.py +1 -2
segmentation_models_pytorch/segmentation_models_pytorch/encoders/resnet.py +10 -13
segmentation_models_pytorch/segmentation_models_pytorch/encoders/senet.py +2 -2
segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_regnet.py +189 -178
segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_res2net.py +82 -81
segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_resnest.py +130 -129
segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_sknet.py +57 -44
segmentation_models_pytorch/segmentation_models_pytorch/encoders/vgg.py +5 -4
segmentation_models_pytorch/segmentation_models_pytorch/encoders/xception.py +31 -18
segmentation_models_pytorch/segmentation_models_pytorch/fpn/__init__.py +1 -1
segmentation_models_pytorch/segmentation_models_pytorch/fpn/decoder.py +35 -21
segmentation_models_pytorch/segmentation_models_pytorch/fpn/model.py +6 -5

README.md CHANGED Viewed

@@ -24,3 +24,12 @@ This software allows single arteries to be segmented given a few clicks on a sin
  ...a website should pop up in your browser!
  You need to create a /Dicom folder and put some angiography DICOMs in there

  ...a website should pop up in your browser!
  You need to create a /Dicom folder and put some angiography DICOMs in there
+# How to run the project
+## Create virtual environment and activate it
+```bash
+uv venv
+source .venv/bin/activate
+uv pip install -r requirements.txt
+```

angioPyFunctions.py CHANGED Viewed

@@ -1,90 +1,98 @@
 import os
 os.environ.setdefault("ASTROPY_SKIP_CONFIG_UPDATE", "1")
 import numpy
 import scipy.interpolate
-import skimage.filters
-import skimage.morphology
 import scipy.ndimage
 import scipy.optimize
-import predict
 from PIL import Image
-import astropy.config.configuration as _astro_config
 if not hasattr(_astro_config, "update_default_config"):
     def _noop_update_default_config(*args, **kwargs):
         return None
     _astro_config.update_default_config = _noop_update_default_config
-from fil_finder import FilFinder2D
 import astropy.units as u
-from tqdm import tqdm
-import pooch
-import utils.dataset
 import cv2
 colourTableHex = {
-                'LAD':       "#f03b20",
-                'D':         "#fd8d3c",
-                'CX':        "#31a354",
-                'OM':        "#74c476",
-                'RCA':       "#08519c",
-                'AM':        "#3182bd",
-                'LM':        "#984ea3",
-                }
 colourTableList = {}
 for item in colourTableHex.keys():
     ### WARNING HACK: The colours go in backwards here for some reason perhaps related to RGBA?
-    colourTableList[item] = [int(colourTableHex[item][5:7], 16),
-                             int(colourTableHex[item][3:5], 16),
-                             int(colourTableHex[item][1:3], 16)]
 def skeletonise(maskArray):
     # if len(maskArray.shape) == 3:
     maskArray = cv2.cvtColor(maskArray, cv2.COLOR_BGR2GRAY)
-    skeleton = skimage.morphology.skeletonize(maskArray.astype('bool'))
     # Process the skeleton and find the longest path
-    fil = FilFinder2D(skeleton.astype('uint8'),
-                    distance=250 * u.pc, mask=skeleton, beamwidth=10.0*u.pix)
     fil.preprocess_image(flatten_percent=85)
-    fil.create_mask(border_masking=True, verbose=False,
-                    use_existing_mask=True)
     fil.medskel(verbose=False)
-    fil.analyze_skeletons(branch_thresh=400 * u.pix,
-                        skel_thresh=10 * u.pix, prune_criteria='length')
     # add image arrays dictionary
     # tifffile.imwrite(os.path.join(arteryFolder, "skel.tif"), fil.skeleton.astype('<u1')*255)
-    skel = fil.skeleton.astype('<u1')*255
     return skel
 def skelEndpoints(skel):
-    #skel[skel!=0] = 1
-    skel = numpy.uint8(skel>0)
     # Apply the convolution.
-    kernel = numpy.uint8([[1,  1, 1],
-    [1, 10, 1],
-    [1,  1, 1]])
     src_depth = -1
-    filtered = cv2.filter2D(skel,src_depth,kernel)
     # Look through to find the value of 11.
     # This returns a mask of the endpoints, but if you
     # just want the coordinates, you could simply
     # return np.where(filtered==11)
     out = numpy.zeros_like(skel)
-    out[numpy.where(filtered==11)] = 1
-    endCoords = numpy.where(filtered==11)
     endCoords = list(zip(*endCoords))
     startPoint = endCoords[0]
     endPoint = endCoords[1]
@@ -109,16 +117,15 @@ def skelPointsInOrder(skel, startPoint=None):
     skelLength = len(skelPoints)
     # Loop through the skeleton starting with startPoint, deleting the starting point from the skelPoints list, and finding the closest pixel. This is appended to orderedPoints. startPoint now becomes the last point to be appended.
-    startPointCopy = startPoint # copied as we are going to loop and overwrite, but want to also keep the original startPoint
     orderedPoints = []
     while len(skelPoints) > 1:
         skelPoints.remove(startPointCopy)
         # Calculate the point that is closest to the start point
-        diffs = numpy.abs(numpy.array(skelPoints)-numpy.array(startPointCopy))
-        dists = numpy.sum(diffs,axis=1) #l1-distance
         closest_point_index = numpy.argmin(dists)
         closestPoint = skelPoints[closest_point_index]
         orderedPoints.append(closestPoint)
@@ -145,7 +152,7 @@ def skelSplinerWithThickness(skel, EDT, smoothing=50, order=3, decimation=2):
     x = x[::decimation]
     y = y[::decimation]
-    #NOTE: Should the EDT be median filtered? I wonder in fact if doing so will reduce the accuracy of the model.
     # EDT = skimage.filters.median(EDT)
     t = EDT[y, x]
@@ -156,8 +163,7 @@ def skelSplinerWithThickness(skel, EDT, smoothing=50, order=3, decimation=2):
     print(x.shape, y.shape, t.shape)
-    tcko, uo = scipy.interpolate.splprep(
-        [y, x, t], s=smoothing, k=order, per=False)
     return tcko
@@ -192,8 +198,12 @@ def arterySegmentation(inputImage, groundTruthPoints, segmentationModelWeights=N
         )
     if inputImage.shape[0] != 512 and inputImage.shape[1] != 512:
-        ratioYX = numpy.array([512./inputImage.shape[0], 512./inputImage.shape[1]])
-        print(f"arterySegmentation(): Rescaling image to 512x512 by {ratioYX=}, and also applying this to input points")
         inputImage = scipy.ndimage.zoom(inputImage, ratioYX)
         points = groundTruthPoints.copy() * ratioYX
         print(inputImage.shape)
@@ -202,33 +212,32 @@ def arterySegmentation(inputImage, groundTruthPoints, segmentationModelWeights=N
     imageSize = inputImage.shape
-    n_classes = 2 # binary output
     net = predict.smp.Unet(
-        encoder_name='inceptionresnetv2',
         encoder_weights="imagenet",
         in_channels=3,
-        classes=n_classes
     )
     net = predict.nn.DataParallel(net)
-    device = predict.torch.device('cuda' if predict.torch.cuda.is_available() else 'cpu')
     net.to(device=device)
     net.load_state_dict(
-        predict.torch.load(
-            segmentationModelWeights,
-            map_location=device
-        )
     )
     orig_image = Image.fromarray(inputImage)
-    image = predict.Image.new('RGB', imageSize, (0, 0, 0))
     image.paste(orig_image, (0, 0))
-    imageArray = numpy.array(image).astype('uint8')
     # Clear last channels
     imageArray[:, :, -1] = 0
@@ -242,13 +251,13 @@ def arterySegmentation(inputImage, groundTruthPoints, segmentationModelWeights=N
     for y, x in [startPoint, endPoint]:
         y = int(numpy.round(y))
         x = int(numpy.round(x))
-        imageArray[y-2:y+2, x-2:x+2, 1] = 255
     # All other points on Channel 2
     for y, x in points[1:-1]:
         y = int(numpy.round(y))
         x = int(numpy.round(x))
-        imageArray[y-2:y+ 2, x-2:x+2, 2] = 255
     image = Image.fromarray(imageArray.astype(numpy.uint8))
@@ -257,19 +266,19 @@ def arterySegmentation(inputImage, groundTruthPoints, segmentationModelWeights=N
         dataset_class=utils.dataset.CoronaryDataset,
         full_img=image,
         scale_factor=1,
-        device=device
     )
     return mask
 def maskOutliner(labelledArtery, outlineThickness=3):
     # Compute the boundary of the mask
-    contours, _ = cv2.findContours(labelledArtery, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
     tmp = numpy.zeros_like(labelledArtery)
-    boundary = cv2.drawContours(tmp, contours, -1, (255,255,255), outlineThickness)
     boundary = boundary > 0
     return boundary

 import os
 os.environ.setdefault("ASTROPY_SKIP_CONFIG_UPDATE", "1")
+import astropy.config.configuration as _astro_config
 import numpy
 import scipy.interpolate
 import scipy.ndimage
 import scipy.optimize
+import skimage.filters
+import skimage.morphology
 from PIL import Image
+import predict
 if not hasattr(_astro_config, "update_default_config"):
     def _noop_update_default_config(*args, **kwargs):
         return None
     _astro_config.update_default_config = _noop_update_default_config
 import astropy.units as u
 import cv2
+import pooch
+from fil_finder import FilFinder2D
+from tqdm import tqdm
+import utils.dataset
 colourTableHex = {
+    "LAD": "#f03b20",
+    "D": "#fd8d3c",
+    "CX": "#31a354",
+    "OM": "#74c476",
+    "RCA": "#08519c",
+    "AM": "#3182bd",
+    "LM": "#984ea3",
+}
 colourTableList = {}
 for item in colourTableHex.keys():
     ### WARNING HACK: The colours go in backwards here for some reason perhaps related to RGBA?
+    colourTableList[item] = [
+        int(colourTableHex[item][5:7], 16),
+        int(colourTableHex[item][3:5], 16),
+        int(colourTableHex[item][1:3], 16),
+    ]
 def skeletonise(maskArray):
     # if len(maskArray.shape) == 3:
     maskArray = cv2.cvtColor(maskArray, cv2.COLOR_BGR2GRAY)
+    skeleton = skimage.morphology.skeletonize(maskArray.astype("bool"))
     # Process the skeleton and find the longest path
+    fil = FilFinder2D(
+        skeleton.astype("uint8"),
+        distance=250 * u.pc,
+        mask=skeleton,
+        beamwidth=10.0 * u.pix,
+    )
     fil.preprocess_image(flatten_percent=85)
+    fil.create_mask(border_masking=True, verbose=False, use_existing_mask=True)
     fil.medskel(verbose=False)
+    fil.analyze_skeletons(
+        branch_thresh=400 * u.pix, skel_thresh=10 * u.pix, prune_criteria="length"
+    )
     # add image arrays dictionary
     # tifffile.imwrite(os.path.join(arteryFolder, "skel.tif"), fil.skeleton.astype('<u1')*255)
+    skel = fil.skeleton.astype("<u1") * 255
     return skel
 def skelEndpoints(skel):
+    # skel[skel!=0] = 1
+    skel = numpy.uint8(skel > 0)
     # Apply the convolution.
+    kernel = numpy.uint8([[1, 1, 1], [1, 10, 1], [1, 1, 1]])
     src_depth = -1
+    filtered = cv2.filter2D(skel, src_depth, kernel)
     # Look through to find the value of 11.
     # This returns a mask of the endpoints, but if you
     # just want the coordinates, you could simply
     # return np.where(filtered==11)
     out = numpy.zeros_like(skel)
+    out[numpy.where(filtered == 11)] = 1
+    endCoords = numpy.where(filtered == 11)
     endCoords = list(zip(*endCoords))
     startPoint = endCoords[0]
     endPoint = endCoords[1]
     skelLength = len(skelPoints)
     # Loop through the skeleton starting with startPoint, deleting the starting point from the skelPoints list, and finding the closest pixel. This is appended to orderedPoints. startPoint now becomes the last point to be appended.
+    startPointCopy = startPoint  # copied as we are going to loop and overwrite, but want to also keep the original startPoint
     orderedPoints = []
     while len(skelPoints) > 1:
         skelPoints.remove(startPointCopy)
         # Calculate the point that is closest to the start point
+        diffs = numpy.abs(numpy.array(skelPoints) - numpy.array(startPointCopy))
+        dists = numpy.sum(diffs, axis=1)  # l1-distance
         closest_point_index = numpy.argmin(dists)
         closestPoint = skelPoints[closest_point_index]
         orderedPoints.append(closestPoint)
     x = x[::decimation]
     y = y[::decimation]
+    # NOTE: Should the EDT be median filtered? I wonder in fact if doing so will reduce the accuracy of the model.
     # EDT = skimage.filters.median(EDT)
     t = EDT[y, x]
     print(x.shape, y.shape, t.shape)
+    tcko, uo = scipy.interpolate.splprep([y, x, t], s=smoothing, k=order, per=False)
     return tcko
         )
     if inputImage.shape[0] != 512 and inputImage.shape[1] != 512:
+        ratioYX = numpy.array(
+            [512.0 / inputImage.shape[0], 512.0 / inputImage.shape[1]]
+        )
+        print(
+            f"arterySegmentation(): Rescaling image to 512x512 by {ratioYX=}, and also applying this to input points"
+        )
         inputImage = scipy.ndimage.zoom(inputImage, ratioYX)
         points = groundTruthPoints.copy() * ratioYX
         print(inputImage.shape)
     imageSize = inputImage.shape
+    n_classes = 2  # binary output
     net = predict.smp.Unet(
+        encoder_name="inceptionresnetv2",
         encoder_weights="imagenet",
         in_channels=3,
+        classes=n_classes,
     )
     net = predict.nn.DataParallel(net)
+    device = predict.torch.device(
+        "cuda" if predict.torch.cuda.is_available() else "cpu"
+    )
     net.to(device=device)
     net.load_state_dict(
+        predict.torch.load(segmentationModelWeights, map_location=device)
     )
     orig_image = Image.fromarray(inputImage)
+    image = predict.Image.new("RGB", imageSize, (0, 0, 0))
     image.paste(orig_image, (0, 0))
+    imageArray = numpy.array(image).astype("uint8")
     # Clear last channels
     imageArray[:, :, -1] = 0
     for y, x in [startPoint, endPoint]:
         y = int(numpy.round(y))
         x = int(numpy.round(x))
+        imageArray[y - 2 : y + 2, x - 2 : x + 2, 1] = 255
     # All other points on Channel 2
     for y, x in points[1:-1]:
         y = int(numpy.round(y))
         x = int(numpy.round(x))
+        imageArray[y - 2 : y + 2, x - 2 : x + 2, 2] = 255
     image = Image.fromarray(imageArray.astype(numpy.uint8))
         dataset_class=utils.dataset.CoronaryDataset,
         full_img=image,
         scale_factor=1,
+        device=device,
     )
     return mask
 def maskOutliner(labelledArtery, outlineThickness=3):
     # Compute the boundary of the mask
+    contours, _ = cv2.findContours(
+        labelledArtery, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
+    )
     tmp = numpy.zeros_like(labelledArtery)
+    boundary = cv2.drawContours(tmp, contours, -1, (255, 255, 255), outlineThickness)
     boundary = boundary > 0
     return boundary

normalize_k1.py CHANGED Viewed

@@ -17,7 +17,9 @@ def normalize_image(
     img = img.resize(target_size, Image.Resampling.BICUBIC)
     arr = np.array(img, dtype=np.float32)
-    arr = exposure.rescale_intensity(arr, in_range="image", out_range=(png_low, png_high))
     arr = np.clip(arr, png_low, png_high)
     arr = ((arr - png_low) / (png_high - png_low) * 255.0).astype(np.uint8)

     img = img.resize(target_size, Image.Resampling.BICUBIC)
     arr = np.array(img, dtype=np.float32)
+    arr = exposure.rescale_intensity(
+        arr, in_range="image", out_range=(png_low, png_high)
+    )
     arr = np.clip(arr, png_low, png_high)
     arr = ((arr - png_low) / (png_high - png_low) * 255.0).astype(np.uint8)

predict.py CHANGED Viewed

@@ -5,17 +5,17 @@ import os
 import torch
 import torch.nn as nn
 from PIL import Image
 from torchvision import transforms
-from utils.dataset import CoronaryDataset
 import segmentation_models_pytorch.segmentation_models_pytorch as smp
-from torch.backends import cudnn
-'''
 This uses a pytorch coronary segmentation model (EfficientNetPLusPlus) that has been trained using a freely available dataset of labelled coronary angiograms from: http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms.html
-The input is a raw angiogram image, and the output is a segmentation mask of all the arteries. This output will be used as the 'first guess' to speed up artery annotation.
-'''
 def predict_img(net, dataset_class, full_img, device, scale_factor=1, n_classes=3):
     # NOTE n_classes is the number of possible values that can be predicted for a given pixel. In a standard binary segmentation task, this will be 2 i.e. black or white
@@ -41,11 +41,11 @@ def predict_img(net, dataset_class, full_img, device, scale_factor=1, n_classes=
             [
                 transforms.ToPILImage(),
                 transforms.Resize(full_img.size[1]),
-                transforms.ToTensor()
             ]
         )
-        full_mask = tf(probs.cpu())
     if n_classes > 1:
         return dataset_class.one_hot2mask(full_mask)
@@ -54,12 +54,28 @@ def predict_img(net, dataset_class, full_img, device, scale_factor=1, n_classes=
 def get_args():
-    parser = argparse.ArgumentParser(description='Predict masks from input images', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     # parser.add_argument('-d', '--dataset', type=str, help='Specifies the dataset to be used', dest='dataset', required=True)
-    parser.add_argument('--model', '-m', default='MODEL.pth', metavar='FILE', help="Specify the file in which the model is stored")
-    parser.add_argument('--input', '-i', metavar='INPUT', nargs='+', help='filenames of input images', required=True)
-    parser.add_argument('--output', '-o', metavar='INPUT', nargs='+', help='Filenames of output images')
     return parser.parse_args()

 import torch
 import torch.nn as nn
 from PIL import Image
+from torch.backends import cudnn
 from torchvision import transforms
 import segmentation_models_pytorch.segmentation_models_pytorch as smp
+from utils.dataset import CoronaryDataset
+"""
 This uses a pytorch coronary segmentation model (EfficientNetPLusPlus) that has been trained using a freely available dataset of labelled coronary angiograms from: http://personal.cimat.mx:8181/~ivan.cruz/DB_Angiograms.html
+The input is a raw angiogram image, and the output is a segmentation mask of all the arteries. This output will be used as the 'first guess' to speed up artery annotation.
+"""
 def predict_img(net, dataset_class, full_img, device, scale_factor=1, n_classes=3):
     # NOTE n_classes is the number of possible values that can be predicted for a given pixel. In a standard binary segmentation task, this will be 2 i.e. black or white
             [
                 transforms.ToPILImage(),
                 transforms.Resize(full_img.size[1]),
+                transforms.ToTensor(),
             ]
         )
+        full_mask = tf(probs.cpu())
     if n_classes > 1:
         return dataset_class.one_hot2mask(full_mask)
 def get_args():
+    parser = argparse.ArgumentParser(
+        description="Predict masks from input images",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     # parser.add_argument('-d', '--dataset', type=str, help='Specifies the dataset to be used', dest='dataset', required=True)
+    parser.add_argument(
+        "--model",
+        "-m",
+        default="MODEL.pth",
+        metavar="FILE",
+        help="Specify the file in which the model is stored",
+    )
+    parser.add_argument(
+        "--input",
+        "-i",
+        metavar="INPUT",
+        nargs="+",
+        help="filenames of input images",
+        required=True,
+    )
+    parser.add_argument(
+        "--output", "-o", metavar="INPUT", nargs="+", help="Filenames of output images"
+    )
     return parser.parse_args()

segmentation_models_pytorch/.github/workflows/tests.yml CHANGED Viewed

@@ -17,12 +17,12 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:
         python-version: 3.6
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:
         python-version: 3.6
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

segmentation_models_pytorch/.gitignore CHANGED Viewed

@@ -102,4 +102,4 @@ venv.bak/
 /site
 # mypy
-.mypy_cache/

 /site
 # mypy
+.mypy_cache/

segmentation_models_pytorch/HALLOFFAME.md CHANGED Viewed

@@ -5,7 +5,7 @@ Here you can find competitions, names of the winners and links to their solution
 Please, follow these rules, when adding a solution to the "Hall of Fame":
-1. Solution should be high rated (e.g. for Kaggle gold or silver medal)
 2. There should be a description of the solution (post at the forum / code / blog post / paper / pre-print)
@@ -13,78 +13,77 @@ Please, follow these rules, when adding a solution to the "Hall of Fame":
 ### [Severstal: Steel Defect Detection](https://www.kaggle.com/c/severstal-steel-defect-detection)
-- 1st place.
-[Wuxi Jiangsu](https://www.kaggle.com/rguo97),
-[Hongbo Zhu](https://www.kaggle.com/zhuhongbo),
-[Yizhuo Yu](https://www.kaggle.com/paffpaffyu)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114254#latest-675874)]
-- 5th place.
-[Guanshuo Xu](https://www.kaggle.com/wowfattie)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/117208#latest-675385)]
-- 9th place.
-[Jacek Poplawski](https://www.linkedin.com/in/jacekpoplawski/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114297#latest-660842)]
 - 10th place.
-[Alexey Rozhkov](https://www.linkedin.com/in/alexisrozhkov)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114465#latest-659615)]
-- 12th place.
-[Pavel Yakubovskiy](https://www.linkedin.com/in/pavel-yakubovskiy/),
-[Ilya Dobrynin](https://www.linkedin.com/in/ilya-dobrynin-79a89b106/),
-[Denis Kolpakov](https://www.linkedin.com/in/denis-kolpakov-ab3137197/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114309#latest-661404)]
-- 31st place.
-[Insaf Ashrapov](https://www.linkedin.com/in/iashrapov/),
-[Igor Krashenyi](https://www.linkedin.com/in/igor-krashenyi-38b89b98),
-[Pavel Pleskov](https://www.linkedin.com/in/ppleskov),
-[Anton Zakharenkov](https://www.linkedin.com/in/anton-zakharenkov/),
-[Nikolai Popov](https://www.linkedin.com/in/nikolai-popov-b2157370/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114383#latest-658438)]
 [[code](https://github.com/Diyago/Severstal-Steel-Defect-Detection)]
-- 55th place.
-[Karl Hornlund](https://www.linkedin.com/in/karl-hornlund/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114410#latest-672682)]
 [[code](https://github.com/khornlund/severstal-steel-defect-detection)]
 - Efficiency round 1st place.
-[Stefan Stefanov](https://www.linkedin.com/in/stefan-stefanov-63a77b1)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/117486#latest-674229)]
 ### [Understanding Clouds from Satellite Images](https://www.kaggle.com/c/understanding_cloud_organization)
 - 2nd place.
-[Andrey Kiryasov](https://www.kaggle.com/ekydna)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118255#latest-678189)]
 - 4th place.
-[Ching-Loong Seow](https://www.linkedin.com/in/clseow/)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118016#latest-677333)]
 - 34th place.
-[Karl Hornlund](https://www.linkedin.com/in/karl-hornlund/)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118250#latest-678176)]
 [[code](https://github.com/khornlund/understanding-cloud-organization)]
 - 55th place.
-[Pavel Yakubovskiy](https://www.linkedin.com/in/pavel-yakubovskiy/)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118019#latest-678626)]
 ## Other platforms
-### [MICCAI 2020 TN-SCUI challenge](https://tn-scui2020.grand-challenge.org/Home/)
 - 1st place.
-[Mingyu Wang](https://github.com/WAMAWAMA)
 [[description](https://github.com/WAMAWAMA/TNSCUI2020-Seg-Rank1st)]
 [[code](https://github.com/WAMAWAMA/TNSCUI2020-Seg-Rank1st)]
 ### [Open Cities AI Challenge: Segmenting Buildings for Disaster Resilience](https://www.drivendata.org/competitions/60/building-segmentation-disaster-resilience/)
  - 1st place.
-[Pavel Yakubovskiy](https://www.linkedin.com/in/pavel-yakubovskiy/).
 [[code and description](https://github.com/qubvel/open-cities-challenge)]

 Please, follow these rules, when adding a solution to the "Hall of Fame":
+1. Solution should be high rated (e.g. for Kaggle gold or silver medal)
 2. There should be a description of the solution (post at the forum / code / blog post / paper / pre-print)
 ### [Severstal: Steel Defect Detection](https://www.kaggle.com/c/severstal-steel-defect-detection)
+- 1st place.
+[Wuxi Jiangsu](https://www.kaggle.com/rguo97),
+[Hongbo Zhu](https://www.kaggle.com/zhuhongbo),
+[Yizhuo Yu](https://www.kaggle.com/paffpaffyu)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114254#latest-675874)]
+- 5th place.
+[Guanshuo Xu](https://www.kaggle.com/wowfattie)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/117208#latest-675385)]
+- 9th place.
+[Jacek Poplawski](https://www.linkedin.com/in/jacekpoplawski/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114297#latest-660842)]
 - 10th place.
+[Alexey Rozhkov](https://www.linkedin.com/in/alexisrozhkov)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114465#latest-659615)]
+- 12th place.
+[Pavel Yakubovskiy](https://www.linkedin.com/in/pavel-yakubovskiy/),
+[Ilya Dobrynin](https://www.linkedin.com/in/ilya-dobrynin-79a89b106/),
+[Denis Kolpakov](https://www.linkedin.com/in/denis-kolpakov-ab3137197/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114309#latest-661404)]
+- 31st place.
+[Insaf Ashrapov](https://www.linkedin.com/in/iashrapov/),
+[Igor Krashenyi](https://www.linkedin.com/in/igor-krashenyi-38b89b98),
+[Pavel Pleskov](https://www.linkedin.com/in/ppleskov),
+[Anton Zakharenkov](https://www.linkedin.com/in/anton-zakharenkov/),
+[Nikolai Popov](https://www.linkedin.com/in/nikolai-popov-b2157370/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114383#latest-658438)]
 [[code](https://github.com/Diyago/Severstal-Steel-Defect-Detection)]
+- 55th place.
+[Karl Hornlund](https://www.linkedin.com/in/karl-hornlund/)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114410#latest-672682)]
 [[code](https://github.com/khornlund/severstal-steel-defect-detection)]
 - Efficiency round 1st place.
+[Stefan Stefanov](https://www.linkedin.com/in/stefan-stefanov-63a77b1)
 [[description](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/117486#latest-674229)]
 ### [Understanding Clouds from Satellite Images](https://www.kaggle.com/c/understanding_cloud_organization)
 - 2nd place.
+[Andrey Kiryasov](https://www.kaggle.com/ekydna)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118255#latest-678189)]
 - 4th place.
+[Ching-Loong Seow](https://www.linkedin.com/in/clseow/)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118016#latest-677333)]
 - 34th place.
+[Karl Hornlund](https://www.linkedin.com/in/karl-hornlund/)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118250#latest-678176)]
 [[code](https://github.com/khornlund/understanding-cloud-organization)]
 - 55th place.
+[Pavel Yakubovskiy](https://www.linkedin.com/in/pavel-yakubovskiy/)
 [[description](https://www.kaggle.com/c/understanding_cloud_organization/discussion/118019#latest-678626)]
 ## Other platforms
+### [MICCAI 2020 TN-SCUI challenge](https://tn-scui2020.grand-challenge.org/Home/)
 - 1st place.
+[Mingyu Wang](https://github.com/WAMAWAMA)
 [[description](https://github.com/WAMAWAMA/TNSCUI2020-Seg-Rank1st)]
 [[code](https://github.com/WAMAWAMA/TNSCUI2020-Seg-Rank1st)]
 ### [Open Cities AI Challenge: Segmenting Buildings for Disaster Resilience](https://www.drivendata.org/competitions/60/building-segmentation-disaster-resilience/)
  - 1st place.
+[Pavel Yakubovskiy](https://www.linkedin.com/in/pavel-yakubovskiy/).
 [[code and description](https://github.com/qubvel/open-cities-challenge)]

segmentation_models_pytorch/README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 <div align="center">
-![logo](https://i.ibb.co/dc1XdhT/Segmentation-Models-V2-Side-1-1.png)
-**Python library with Neural Networks for Image
-Segmentation based on [PyTorch](https://pytorch.org/).**
 [![Documentation Status](https://readthedocs.org/projects/smp/badge/?version=latest)](https://segmentation-models-pytorch.readthedocs.io/en/latest/?badge=latest) <br> [![Generic badge](https://img.shields.io/badge/License-MIT-<COLOR>.svg)](https://shields.io/)
@@ -14,7 +14,7 @@ The main features of this library are:
  - 12 models architectures for binary and multi class segmentation (including legendary Unet)
  - 104 available encoders
  - All encoders have pre-trained weights for faster and better convergence
 ### [📚 Project Documentation 📚](http://smp.readthedocs.io/)
 Visit [Read The Docs Project Page](https://segmentation-models-pytorch.readthedocs.io/en/latest/) or read following README to know more about Segmentation Models Pytorch (SMP for short) library
@@ -346,11 +346,11 @@ model = smp.FPN('resnet34', in_channels=1)
 mask = model(torch.ones([1, 1, 64, 64]))
 ```
-##### Auxiliary classification output
-All models support `aux_params` parameters, which is default set to `None`.
 If `aux_params = None` then classification auxiliary output is not created, else
 model produce not only `mask`, but also `label` output with shape `NC`.
-Classification head consists of GlobalPooling->Dropout(optional)->Linear->Activation(optional) layers, which can be
 configured by `aux_params` as follows:
 ```python
 aux_params=dict(

 <div align="center">
+![logo](https://i.ibb.co/dc1XdhT/Segmentation-Models-V2-Side-1-1.png)
+**Python library with Neural Networks for Image
+Segmentation based on [PyTorch](https://pytorch.org/).**
 [![Documentation Status](https://readthedocs.org/projects/smp/badge/?version=latest)](https://segmentation-models-pytorch.readthedocs.io/en/latest/?badge=latest) <br> [![Generic badge](https://img.shields.io/badge/License-MIT-<COLOR>.svg)](https://shields.io/)
  - 12 models architectures for binary and multi class segmentation (including legendary Unet)
  - 104 available encoders
  - All encoders have pre-trained weights for faster and better convergence
 ### [📚 Project Documentation 📚](http://smp.readthedocs.io/)
 Visit [Read The Docs Project Page](https://segmentation-models-pytorch.readthedocs.io/en/latest/) or read following README to know more about Segmentation Models Pytorch (SMP for short) library
 mask = model(torch.ones([1, 1, 64, 64]))
 ```
+##### Auxiliary classification output
+All models support `aux_params` parameters, which is default set to `None`.
 If `aux_params = None` then classification auxiliary output is not created, else
 model produce not only `mask`, but also `label` output with shape `NC`.
+Classification head consists of GlobalPooling->Dropout(optional)->Linear->Activation(optional) layers, which can be
 configured by `aux_params` as follows:
 ```python
 aux_params=dict(

segmentation_models_pytorch/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from segmentation_models_pytorch import *


1	+ from segmentation_models_pytorch import *

segmentation_models_pytorch/docker/Dockerfile CHANGED Viewed

@@ -1,3 +1,3 @@
 FROM anibali/pytorch:cuda-9.0
-RUN pip install segmentation-models-pytorch


1	FROM anibali/pytorch:cuda-9.0
2
3	+ RUN pip install segmentation-models-pytorch

segmentation_models_pytorch/docs/conf.py CHANGED Viewed

@@ -14,24 +14,28 @@
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
 import os
 import re
 import sys
-import datetime
-sys.path.append('..')
 # -- Project information -----------------------------------------------------
-project = 'Segmentation Models'
-copyright = '{}, Pavel Yakubovskiy'.format(datetime.datetime.now().year)
-author = 'Pavel Yakubovskiy'
 def get_version():
-    sys.path.append('../segmentation_models_pytorch')
     from __version__ import __version__ as version
     sys.path.pop(-1)
     return version
 version = get_version()
 # -- General configuration ---------------------------------------------------
@@ -41,15 +45,15 @@ version = get_version()
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.coverage',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.mathjax',
 ]
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -64,12 +68,14 @@ exclude_patterns = []
 #
 import sphinx_rtd_theme
 html_theme = "sphinx_rtd_theme"
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 # import karma_sphinx_theme
 # html_theme = "karma_sphinx_theme"
 import faculty_sphinx_theme
 html_theme = "faculty_sphinx_theme"
 # import catalyst_sphinx_theme
@@ -81,7 +87,7 @@ html_logo = "logo.png"
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
 # -- Extension configuration -------------------------------------------------
@@ -91,30 +97,33 @@ napoleon_include_init_with_doc = True
 napoleon_numpy_docstring = False
 autodoc_mock_imports = [
-    'torch',
-    'tqdm',
-    'numpy',
-    'timm',
-    'pretrainedmodels',
-    'torchvision',
-    'efficientnet-pytorch',
-    'segmentation_models_pytorch.encoders',
-    'segmentation_models_pytorch.utils',
     # 'segmentation_models_pytorch.base',
 ]
-autoclass_content = 'both'
-autodoc_typehints = 'description'
 # --- Work around to make autoclass signatures not (*args, **kwargs) ----------
-class FakeSignature():
     def __getattribute__(self, *args):
         raise ValueError
 def f(app, obj, bound_method):
     if "__new__" in obj.__name__:
         obj.__signature__ = FakeSignature()
 def setup(app):
-    app.connect('autodoc-before-process-signature', f)

 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
+import datetime
 import os
 import re
 import sys
+sys.path.append("..")
 # -- Project information -----------------------------------------------------
+project = "Segmentation Models"
+copyright = "{}, Pavel Yakubovskiy".format(datetime.datetime.now().year)
+author = "Pavel Yakubovskiy"
 def get_version():
+    sys.path.append("../segmentation_models_pytorch")
     from __version__ import __version__ as version
     sys.path.pop(-1)
     return version
 version = get_version()
 # -- General configuration ---------------------------------------------------
 # ones.
 extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.coverage",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.mathjax",
 ]
 # Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 #
 import sphinx_rtd_theme
 html_theme = "sphinx_rtd_theme"
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 # import karma_sphinx_theme
 # html_theme = "karma_sphinx_theme"
 import faculty_sphinx_theme
 html_theme = "faculty_sphinx_theme"
 # import catalyst_sphinx_theme
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
 # -- Extension configuration -------------------------------------------------
 napoleon_numpy_docstring = False
 autodoc_mock_imports = [
+    "torch",
+    "tqdm",
+    "numpy",
+    "timm",
+    "pretrainedmodels",
+    "torchvision",
+    "efficientnet-pytorch",
+    "segmentation_models_pytorch.encoders",
+    "segmentation_models_pytorch.utils",
     # 'segmentation_models_pytorch.base',
 ]
+autoclass_content = "both"
+autodoc_typehints = "description"
 # --- Work around to make autoclass signatures not (*args, **kwargs) ----------
+class FakeSignature:
     def __getattribute__(self, *args):
         raise ValueError
 def f(app, obj, bound_method):
     if "__new__" in obj.__name__:
         obj.__signature__ = FakeSignature()
 def setup(app):
+    app.connect("autodoc-before-process-signature", f)

segmentation_models_pytorch/docs/insights.rst CHANGED Viewed

@@ -21,20 +21,20 @@ Each encoder should have following attributes and methods and be inherited from
 .. code-block:: python
     class MyEncoder(torch.nn.Module, EncoderMixin):
         def __init__(self, **kwargs):
             super().__init__()
             # A number of channels for each encoder feature tensor, list of integers
             self._out_channels: List[int] = [3, 16, 64, 128, 256, 512]
             # A number of stages in decoder (in other words number of downsampling operations), integer
             # use in in forward pass to reduce number of returning features
-            self._depth: int = 5
             # Default number of input channels in first Conv2d layer for encoder (usually 3)
-            self._in_channels: int = 3
             # Define encoder modules below
             ...
@@ -90,12 +90,12 @@ For better understanding see more examples of encoder in smp.encoders module.
 3. Aux classification output
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-All models support ``aux_params`` parameter, which is default set to ``None``.
 If ``aux_params = None`` than classification auxiliary output is not created, else
 model produce not only ``mask``, but also ``label`` output with shape ``(N, C)``.
 Classification head consist of following layers:
 1. GlobalPooling
 2. Dropout (optional)
 3. Linear
@@ -104,7 +104,7 @@ Classification head consist of following layers:
 Example:
 .. code-block:: python
     aux_params=dict(
         pooling='avg',             # one of 'avg', 'max'
         dropout=0.5,               # dropout ratio, default is None

 .. code-block:: python
     class MyEncoder(torch.nn.Module, EncoderMixin):
         def __init__(self, **kwargs):
             super().__init__()
             # A number of channels for each encoder feature tensor, list of integers
             self._out_channels: List[int] = [3, 16, 64, 128, 256, 512]
             # A number of stages in decoder (in other words number of downsampling operations), integer
             # use in in forward pass to reduce number of returning features
+            self._depth: int = 5
             # Default number of input channels in first Conv2d layer for encoder (usually 3)
+            self._in_channels: int = 3
             # Define encoder modules below
             ...
 3. Aux classification output
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All models support ``aux_params`` parameter, which is default set to ``None``.
 If ``aux_params = None`` than classification auxiliary output is not created, else
 model produce not only ``mask``, but also ``label`` output with shape ``(N, C)``.
 Classification head consist of following layers:
 1. GlobalPooling
 2. Dropout (optional)
 3. Linear
 Example:
 .. code-block:: python
     aux_params=dict(
         pooling='avg',             # one of 'avg', 'max'
         dropout=0.5,               # dropout ratio, default is None

segmentation_models_pytorch/docs/install.rst CHANGED Viewed

@@ -5,4 +5,4 @@ Latest version from source:
 .. code-block:: bash
-    $ pip install -U git+https://github.com/jlcsilva/segmentation_models.pytorch


5
6	.. code-block:: bash
7
8	+ $ pip install -U git+https://github.com/jlcsilva/segmentation_models.pytorch

segmentation_models_pytorch/docs/losses.rst CHANGED Viewed

@@ -1,7 +1,7 @@
 📉 Losses
 =========
-Collection of popular semantic segmentation losses. Adapted from
 an awesome repo with pytorch utils https://github.com/BloodAxe/pytorch-toolbelt
 Constants

 📉 Losses
 =========
+Collection of popular semantic segmentation losses. Adapted from
 an awesome repo with pytorch utils https://github.com/BloodAxe/pytorch-toolbelt
 Constants

segmentation_models_pytorch/docs/models.rst CHANGED Viewed

@@ -48,5 +48,3 @@ DeepLabV3
 DeepLabV3+
 ~~~~~~~~~~
 .. autoclass:: segmentation_models_pytorch.DeepLabV3Plus

 DeepLabV3+
 ~~~~~~~~~~
 .. autoclass:: segmentation_models_pytorch.DeepLabV3Plus

segmentation_models_pytorch/docs/quickstart.rst CHANGED Viewed

@@ -6,7 +6,7 @@
 Segmentation model is just a PyTorch nn.Module, which can be created as easy as:
 .. code-block:: python
     import segmentation_models_pytorch as smp
     model = smp.Unet(

 Segmentation model is just a PyTorch nn.Module, which can be created as easy as:
 .. code-block:: python
     import segmentation_models_pytorch as smp
     model = smp.Unet(

segmentation_models_pytorch/docs/requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	faculty-sphinx-theme==0.2.2
2	- six==1.15.0


1	faculty-sphinx-theme==0.2.2
2	+ six==1.15.0

segmentation_models_pytorch/misc/generate_table.py CHANGED Viewed

@@ -10,11 +10,15 @@ COLUMNS = [
     "Params, M",
 ]
 def wrap_row(r):
     return "|{}|".format(r)
-header = "|".join([column.ljust(WIDTH, ' ') for column in COLUMNS])
-separator = "|".join(["-" * WIDTH] + [":" + "-" * (WIDTH - 2) + ":"] * (len(COLUMNS) - 1))
 print(wrap_row(header))
 print(wrap_row(separator))

     "Params, M",
 ]
 def wrap_row(r):
     return "|{}|".format(r)
+header = "|".join([column.ljust(WIDTH, " ") for column in COLUMNS])
+separator = "|".join(
+    ["-" * WIDTH] + [":" + "-" * (WIDTH - 2) + ":"] * (len(COLUMNS) - 1)
+)
 print(wrap_row(header))
 print(wrap_row(separator))

segmentation_models_pytorch/segmentation_models_pytorch/__init__.py CHANGED Viewed

@@ -1,23 +1,20 @@
-from .unet import Unet
-from .unetplusplus import UnetPlusPlus
-from .manet import MAnet
-from .linknet import Linknet
-from .fpn import FPN
-from .pspnet import PSPNet
 from .deeplabv3 import DeepLabV3, DeepLabV3Plus
 from .pan import PAN
 from .resunet import ResUnet
 from .resunetplusplus import ResUnetPlusPlus
-from .efficientunetplusplus import EfficientUnetPlusPlus
-from . import encoders
-from . import utils
-from . import losses
-from .__version__ import __version__
-from typing import Optional
-import torch
 def create_model(
@@ -28,18 +25,32 @@ def create_model(
     classes: int = 1,
     **kwargs,
 ) -> torch.nn.Module:
-    """Models wrapper. Allows to create any model just with parametes
-    """
-    archs = [Unet, UnetPlusPlus, MAnet, Linknet, FPN, PSPNet, DeepLabV3, DeepLabV3Plus, PAN, ResUnet, EfficientUnetPlusPlus, ResUnetPlusPlus]
     archs_dict = {a.__name__.lower(): a for a in archs}
     try:
         model_class = archs_dict[arch.lower()]
     except KeyError:
-        raise KeyError("Wrong architecture type `{}`. Avalibale options are: {}".format(
-            arch, list(archs_dict.keys()),
-        ))
     return model_class(
         encoder_name=encoder_name,
         encoder_weights=encoder_weights,

+from typing import Optional
+import torch
+from . import encoders, losses, utils
+from .__version__ import __version__
 from .deeplabv3 import DeepLabV3, DeepLabV3Plus
+from .efficientunetplusplus import EfficientUnetPlusPlus
+from .fpn import FPN
+from .linknet import Linknet
+from .manet import MAnet
 from .pan import PAN
+from .pspnet import PSPNet
 from .resunet import ResUnet
 from .resunetplusplus import ResUnetPlusPlus
+from .unet import Unet
+from .unetplusplus import UnetPlusPlus
 def create_model(
     classes: int = 1,
     **kwargs,
 ) -> torch.nn.Module:
+    """Models wrapper. Allows to create any model just with parametes"""
+    archs = [
+        Unet,
+        UnetPlusPlus,
+        MAnet,
+        Linknet,
+        FPN,
+        PSPNet,
+        DeepLabV3,
+        DeepLabV3Plus,
+        PAN,
+        ResUnet,
+        EfficientUnetPlusPlus,
+        ResUnetPlusPlus,
+    ]
     archs_dict = {a.__name__.lower(): a for a in archs}
     try:
         model_class = archs_dict[arch.lower()]
     except KeyError:
+        raise KeyError(
+            "Wrong architecture type `{}`. Avalibale options are: {}".format(
+                arch,
+                list(archs_dict.keys()),
+            )
+        )
     return model_class(
         encoder_name=encoder_name,
         encoder_weights=encoder_weights,

segmentation_models_pytorch/segmentation_models_pytorch/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 VERSION = (0, 1, 3)
-__version__ = '.'.join(map(str, VERSION))


1	VERSION = (0, 1, 3)
2
3	+ __version__ = ".".join(map(str, VERSION))

segmentation_models_pytorch/segmentation_models_pytorch/base/__init__.py CHANGED Viewed

@@ -1,12 +1,3 @@
 from .model import SegmentationModel
-from .modules import (
-    PreActivatedConv2dReLU,
-    Conv2dReLU,
-    Attention,
-)
-from .heads import (
-    SegmentationHead,
-    ClassificationHead,
-)

+from .heads import ClassificationHead, SegmentationHead
 from .model import SegmentationModel
+from .modules import Attention, Conv2dReLU, PreActivatedConv2dReLU

segmentation_models_pytorch/segmentation_models_pytorch/base/heads.py CHANGED Viewed

@@ -1,22 +1,33 @@
 import torch.nn as nn
-from .modules import Flatten, Activation
-class SegmentationHead(nn.Sequential):
-    def __init__(self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1):
-        conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
-        upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
         activation = Activation(activation)
         super().__init__(conv2d, upsampling, activation)
 class ClassificationHead(nn.Sequential):
-    def __init__(self, in_channels, classes, pooling="avg", dropout=0.2, activation=None):
         if pooling not in ("max", "avg"):
-            raise ValueError("Pooling should be one of ('max', 'avg'), got {}.".format(pooling))
-        pool = nn.AdaptiveAvgPool2d(1) if pooling == 'avg' else nn.AdaptiveMaxPool2d(1)
         flatten = Flatten()
         dropout = nn.Dropout(p=dropout, inplace=True) if dropout else nn.Identity()
         linear = nn.Linear(in_channels, classes, bias=True)

 import torch.nn as nn
+from .modules import Activation, Flatten
+class SegmentationHead(nn.Sequential):
+    def __init__(
+        self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1
+    ):
+        conv2d = nn.Conv2d(
+            in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2
+        )
+        upsampling = (
+            nn.UpsamplingBilinear2d(scale_factor=upsampling)
+            if upsampling > 1
+            else nn.Identity()
+        )
         activation = Activation(activation)
         super().__init__(conv2d, upsampling, activation)
 class ClassificationHead(nn.Sequential):
+    def __init__(
+        self, in_channels, classes, pooling="avg", dropout=0.2, activation=None
+    ):
         if pooling not in ("max", "avg"):
+            raise ValueError(
+                "Pooling should be one of ('max', 'avg'), got {}.".format(pooling)
+            )
+        pool = nn.AdaptiveAvgPool2d(1) if pooling == "avg" else nn.AdaptiveMaxPool2d(1)
         flatten = Flatten()
         dropout = nn.Dropout(p=dropout, inplace=True) if dropout else nn.Identity()
         linear = nn.Linear(in_channels, classes, bias=True)

segmentation_models_pytorch/segmentation_models_pytorch/base/initialization.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch.nn as nn
 def initialize_decoder(module):
     for m in module.modules():
         if isinstance(m, nn.Conv2d):
             nn.init.kaiming_uniform_(m.weight, mode="fan_in", nonlinearity="relu")
             if m.bias is not None:

 def initialize_decoder(module):
     for m in module.modules():
         if isinstance(m, nn.Conv2d):
             nn.init.kaiming_uniform_(m.weight, mode="fan_in", nonlinearity="relu")
             if m.bias is not None:

segmentation_models_pytorch/segmentation_models_pytorch/base/model.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
 from . import initialization as init
 class SegmentationModel(torch.nn.Module):
     def initialize(self):
         init.initialize_decoder(self.decoder)
         init.initialize_head(self.segmentation_head)

 import torch
 from . import initialization as init
 class SegmentationModel(torch.nn.Module):
     def initialize(self):
         init.initialize_decoder(self.decoder)
         init.initialize_head(self.segmentation_head)

segmentation_models_pytorch/segmentation_models_pytorch/base/modules.py CHANGED Viewed

@@ -6,22 +6,23 @@ try:
 except ImportError:
     InPlaceABN = None
 class PreActivatedConv2dReLU(nn.Sequential):
     """
-    Pre-activated 2D convolution, as proposed in https://arxiv.org/pdf/1603.05027.pdf. Feature maps are processed by a normalization layer,
     followed by a ReLU activation and a 3x3 convolution.
     normalization
     """
     def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            padding=0,
-            stride=1,
-            use_batchnorm=True,
     ):
         if use_batchnorm == "inplace" and InPlaceABN is None:
             raise RuntimeError(
                 "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
@@ -47,20 +48,21 @@ class PreActivatedConv2dReLU(nn.Sequential):
         )
         super(PreActivatedConv2dReLU, self).__init__(conv, bn, relu)
 class Conv2dReLU(nn.Sequential):
     """
     Block composed of a 3x3 convolution followed by a normalization layer and ReLU activation.
     """
     def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            padding=0,
-            stride=1,
-            use_batchnorm=True,
     ):
         if use_batchnorm == "inplace" and InPlaceABN is None:
             raise RuntimeError(
                 "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
@@ -87,20 +89,33 @@ class Conv2dReLU(nn.Sequential):
         super(Conv2dReLU, self).__init__(conv, bn, relu)
 class DepthWiseConv2d(nn.Conv2d):
     "Depth-wise convolution operation"
     def __init__(self, channels, kernel_size=3, stride=1):
-        super().__init__(channels, channels, kernel_size, stride=stride, padding=kernel_size//2, groups=channels)
 class PointWiseConv2d(nn.Conv2d):
     "Point-wise (1x1) convolution operation"
     def __init__(self, in_channels, out_channels):
         super().__init__(in_channels, out_channels, kernel_size=1, stride=1)
 class SEModule(nn.Module):
     """
     Spatial squeeze & channel excitation attention module, as proposed in https://arxiv.org/abs/1709.01507.
     """
     def __init__(self, in_channels, reduction=16):
         super().__init__()
         self.cSE = nn.Sequential(
@@ -114,10 +129,12 @@ class SEModule(nn.Module):
     def forward(self, x):
         return x * self.cSE(x)
 class sSEModule(nn.Module):
     """
     Channel squeeze & spatial excitation attention module, as proposed in https://arxiv.org/abs/1808.08127.
     """
     def __init__(self, in_channels):
         super().__init__()
         self.sSE = nn.Sequential(nn.Conv2d(in_channels, 1, 1), nn.Sigmoid())
@@ -125,10 +142,12 @@ class sSEModule(nn.Module):
     def forward(self, x):
         return x * self.sSE(x)
 class SCSEModule(nn.Module):
     """
     Concurrent spatial and channel squeeze & excitation attention module, as proposed in https://arxiv.org/pdf/1803.02579.pdf.
     """
     def __init__(self, in_channels, reduction=16):
         super().__init__()
         self.cSE = nn.Sequential(
@@ -143,8 +162,8 @@ class SCSEModule(nn.Module):
     def forward(self, x):
         return x * self.cSE(x) + x * self.sSE(x)
-class ArgMax(nn.Module):
     def __init__(self, dim=None):
         super().__init__()
         self.dim = dim
@@ -154,46 +173,47 @@ class ArgMax(nn.Module):
 class Activation(nn.Module):
     def __init__(self, name, **params):
         super().__init__()
-        if name is None or name == 'identity':
             self.activation = nn.Identity(**params)
-        elif name == 'sigmoid':
             self.activation = nn.Sigmoid()
-        elif name == 'softmax2d':
             self.activation = nn.Softmax(dim=1, **params)
-        elif name == 'softmax':
             self.activation = nn.Softmax(**params)
-        elif name == 'logsoftmax':
             self.activation = nn.LogSoftmax(**params)
-        elif name == 'tanh':
             self.activation = nn.Tanh()
-        elif name == 'argmax':
             self.activation = ArgMax(**params)
-        elif name == 'argmax2d':
             self.activation = ArgMax(dim=1, **params)
         elif callable(name):
             self.activation = name(**params)
         else:
-            raise ValueError('Activation should be callable/sigmoid/softmax/logsoftmax/tanh/None; got {}'.format(name))
     def forward(self, x):
         return self.activation(x)
 class Attention(nn.Module):
     def __init__(self, name, **params):
         super().__init__()
         if name is None:
             self.attention = nn.Identity(**params)
-        elif name == 'scse':
             self.attention = SCSEModule(**params)
-        elif name == 'se':
             self.attention = SEModule(**params)
         else:
             raise ValueError("Attention {} is not implemented".format(name))
@@ -201,6 +221,7 @@ class Attention(nn.Module):
     def forward(self, x):
         return self.attention(x)
 class Flatten(nn.Module):
     def forward(self, x):
-        return x.view(x.shape[0], -1)

 except ImportError:
     InPlaceABN = None
 class PreActivatedConv2dReLU(nn.Sequential):
     """
+    Pre-activated 2D convolution, as proposed in https://arxiv.org/pdf/1603.05027.pdf. Feature maps are processed by a normalization layer,
     followed by a ReLU activation and a 3x3 convolution.
     normalization
     """
     def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        padding=0,
+        stride=1,
+        use_batchnorm=True,
     ):
         if use_batchnorm == "inplace" and InPlaceABN is None:
             raise RuntimeError(
                 "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
         )
         super(PreActivatedConv2dReLU, self).__init__(conv, bn, relu)
 class Conv2dReLU(nn.Sequential):
     """
     Block composed of a 3x3 convolution followed by a normalization layer and ReLU activation.
     """
     def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        padding=0,
+        stride=1,
+        use_batchnorm=True,
     ):
         if use_batchnorm == "inplace" and InPlaceABN is None:
             raise RuntimeError(
                 "In order to use `use_batchnorm='inplace'` inplace_abn package must be installed. "
         super(Conv2dReLU, self).__init__(conv, bn, relu)
 class DepthWiseConv2d(nn.Conv2d):
     "Depth-wise convolution operation"
     def __init__(self, channels, kernel_size=3, stride=1):
+        super().__init__(
+            channels,
+            channels,
+            kernel_size,
+            stride=stride,
+            padding=kernel_size // 2,
+            groups=channels,
+        )
 class PointWiseConv2d(nn.Conv2d):
     "Point-wise (1x1) convolution operation"
     def __init__(self, in_channels, out_channels):
         super().__init__(in_channels, out_channels, kernel_size=1, stride=1)
 class SEModule(nn.Module):
     """
     Spatial squeeze & channel excitation attention module, as proposed in https://arxiv.org/abs/1709.01507.
     """
     def __init__(self, in_channels, reduction=16):
         super().__init__()
         self.cSE = nn.Sequential(
     def forward(self, x):
         return x * self.cSE(x)
 class sSEModule(nn.Module):
     """
     Channel squeeze & spatial excitation attention module, as proposed in https://arxiv.org/abs/1808.08127.
     """
     def __init__(self, in_channels):
         super().__init__()
         self.sSE = nn.Sequential(nn.Conv2d(in_channels, 1, 1), nn.Sigmoid())
     def forward(self, x):
         return x * self.sSE(x)
 class SCSEModule(nn.Module):
     """
     Concurrent spatial and channel squeeze & excitation attention module, as proposed in https://arxiv.org/pdf/1803.02579.pdf.
     """
     def __init__(self, in_channels, reduction=16):
         super().__init__()
         self.cSE = nn.Sequential(
     def forward(self, x):
         return x * self.cSE(x) + x * self.sSE(x)
+class ArgMax(nn.Module):
     def __init__(self, dim=None):
         super().__init__()
         self.dim = dim
 class Activation(nn.Module):
     def __init__(self, name, **params):
         super().__init__()
+        if name is None or name == "identity":
             self.activation = nn.Identity(**params)
+        elif name == "sigmoid":
             self.activation = nn.Sigmoid()
+        elif name == "softmax2d":
             self.activation = nn.Softmax(dim=1, **params)
+        elif name == "softmax":
             self.activation = nn.Softmax(**params)
+        elif name == "logsoftmax":
             self.activation = nn.LogSoftmax(**params)
+        elif name == "tanh":
             self.activation = nn.Tanh()
+        elif name == "argmax":
             self.activation = ArgMax(**params)
+        elif name == "argmax2d":
             self.activation = ArgMax(dim=1, **params)
         elif callable(name):
             self.activation = name(**params)
         else:
+            raise ValueError(
+                "Activation should be callable/sigmoid/softmax/logsoftmax/tanh/None; got {}".format(
+                    name
+                )
+            )
     def forward(self, x):
         return self.activation(x)
 class Attention(nn.Module):
     def __init__(self, name, **params):
         super().__init__()
         if name is None:
             self.attention = nn.Identity(**params)
+        elif name == "scse":
             self.attention = SCSEModule(**params)
+        elif name == "se":
             self.attention = SEModule(**params)
         else:
             raise ValueError("Attention {} is not implemented".format(name))
     def forward(self, x):
         return self.attention(x)
 class Flatten(nn.Module):
     def forward(self, x):
+        return x.view(x.shape[0], -1)

segmentation_models_pytorch/segmentation_models_pytorch/deeplabv3/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .model import DeepLabV3, DeepLabV3Plus


1	+ from .model import DeepLabV3, DeepLabV3Plus

segmentation_models_pytorch/segmentation_models_pytorch/deeplabv3/decoder.py CHANGED Viewed

@@ -61,14 +61,18 @@ class DeepLabV3PlusDecoder(nn.Module):
     ):
         super().__init__()
         if output_stride not in {8, 16}:
-            raise ValueError("Output stride should be 8 or 16, got {}.".format(output_stride))
         self.out_channels = out_channels
         self.output_stride = output_stride
         self.aspp = nn.Sequential(
             ASPP(encoder_channels[-1], out_channels, atrous_rates, separable=True),
-            SeparableConv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(out_channels),
             nn.ReLU(),
         )
@@ -77,9 +81,11 @@ class DeepLabV3PlusDecoder(nn.Module):
         self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor)
         highres_in_channels = encoder_channels[-4]
-        highres_out_channels = 48   # proposed by authors of paper
         self.block1 = nn.Sequential(
-            nn.Conv2d(highres_in_channels, highres_out_channels, kernel_size=1, bias=False),
             nn.BatchNorm2d(highres_out_channels),
             nn.ReLU(),
         )
@@ -149,7 +155,7 @@ class ASPPPooling(nn.Sequential):
         size = x.shape[-2:]
         for mod in self:
             x = mod(x)
-        return F.interpolate(x, size=size, mode='bilinear', align_corners=False)
 class ASPP(nn.Module):
@@ -190,16 +196,15 @@ class ASPP(nn.Module):
 class SeparableConv2d(nn.Sequential):
     def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            padding=0,
-            dilation=1,
-            bias=True,
     ):
         dephtwise_conv = nn.Conv2d(
             in_channels,

     ):
         super().__init__()
         if output_stride not in {8, 16}:
+            raise ValueError(
+                "Output stride should be 8 or 16, got {}.".format(output_stride)
+            )
         self.out_channels = out_channels
         self.output_stride = output_stride
         self.aspp = nn.Sequential(
             ASPP(encoder_channels[-1], out_channels, atrous_rates, separable=True),
+            SeparableConv2d(
+                out_channels, out_channels, kernel_size=3, padding=1, bias=False
+            ),
             nn.BatchNorm2d(out_channels),
             nn.ReLU(),
         )
         self.up = nn.UpsamplingBilinear2d(scale_factor=scale_factor)
         highres_in_channels = encoder_channels[-4]
+        highres_out_channels = 48  # proposed by authors of paper
         self.block1 = nn.Sequential(
+            nn.Conv2d(
+                highres_in_channels, highres_out_channels, kernel_size=1, bias=False
+            ),
             nn.BatchNorm2d(highres_out_channels),
             nn.ReLU(),
         )
         size = x.shape[-2:]
         for mod in self:
             x = mod(x)
+        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
 class ASPP(nn.Module):
 class SeparableConv2d(nn.Sequential):
     def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        bias=True,
     ):
         dephtwise_conv = nn.Conv2d(
             in_channels,

segmentation_models_pytorch/segmentation_models_pytorch/deeplabv3/model.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import torch.nn as nn
-from typing import Optional
-from .decoder import DeepLabV3Decoder, DeepLabV3PlusDecoder
-from ..base import SegmentationModel, SegmentationHead, ClassificationHead
 from ..encoders import get_encoder
 class DeepLabV3(SegmentationModel):
@@ -12,11 +13,11 @@ class DeepLabV3(SegmentationModel):
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
             to extract features of different spatial resolution
-        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
             two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
             with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
-        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         decoder_channels: A number of convolution filters in ASPP module. Default is 256
         in_channels: A number of input channels for the model, default is 3 (RGB images)
@@ -25,7 +26,7 @@ class DeepLabV3(SegmentationModel):
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
         upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
-        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"
@@ -42,16 +43,16 @@ class DeepLabV3(SegmentationModel):
     """
     def __init__(
-            self,
-            encoder_name: str = "resnet34",
-            encoder_depth: int = 5,
-            encoder_weights: Optional[str] = "imagenet",
-            decoder_channels: int = 256,
-            in_channels: int = 3,
-            classes: int = 1,
-            activation: Optional[str] = None,
-            upsampling: int = 8,
-            aux_params: Optional[dict] = None,
     ):
         super().__init__()
@@ -61,10 +62,7 @@ class DeepLabV3(SegmentationModel):
             depth=encoder_depth,
             weights=encoder_weights,
         )
-        self.encoder.make_dilated(
-            stage_list=[4, 5],
-            dilation_list=[2, 4]
-        )
         self.decoder = DeepLabV3Decoder(
             in_channels=self.encoder.out_channels[-1],
@@ -90,15 +88,15 @@ class DeepLabV3(SegmentationModel):
 class DeepLabV3Plus(SegmentationModel):
     """DeepLabV3+ implementation from "Encoder-Decoder with Atrous Separable
     Convolution for Semantic Image Segmentation"
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
             to extract features of different spatial resolution
-        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
             two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
             with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
-        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation)
         decoder_atrous_rates: Dilation rates for ASPP module (should be a tuple of 3 integer values)
@@ -109,7 +107,7 @@ class DeepLabV3Plus(SegmentationModel):
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
         upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
-        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"
@@ -121,19 +119,20 @@ class DeepLabV3Plus(SegmentationModel):
     Reference:
         https://arxiv.org/abs/1802.02611v3
     """
     def __init__(
-            self,
-            encoder_name: str = "resnet34",
-            encoder_depth: int = 5,
-            encoder_weights: Optional[str] = "imagenet",
-            encoder_output_stride: int = 16,
-            decoder_channels: int = 256,
-            decoder_atrous_rates: tuple = (12, 24, 36),
-            in_channels: int = 3,
-            classes: int = 1,
-            activation: Optional[str] = None,
-            upsampling: int = 4,
-            aux_params: Optional[dict] = None,
     ):
         super().__init__()
@@ -145,19 +144,15 @@ class DeepLabV3Plus(SegmentationModel):
         )
         if encoder_output_stride == 8:
-            self.encoder.make_dilated(
-                stage_list=[4, 5],
-                dilation_list=[2, 4]
-            )
         elif encoder_output_stride == 16:
-            self.encoder.make_dilated(
-                stage_list=[5],
-                dilation_list=[2]
-            )
         else:
             raise ValueError(
-                "Encoder output stride should be 8 or 16, got {}".format(encoder_output_stride)
             )
         self.decoder = DeepLabV3PlusDecoder(

+from typing import Optional
 import torch.nn as nn
+from ..base import ClassificationHead, SegmentationHead, SegmentationModel
 from ..encoders import get_encoder
+from .decoder import DeepLabV3Decoder, DeepLabV3PlusDecoder
 class DeepLabV3(SegmentationModel):
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
             to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
             two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
             with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         decoder_channels: A number of convolution filters in ASPP module. Default is 256
         in_channels: A number of input channels for the model, default is 3 (RGB images)
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
         upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"
     """
     def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        decoder_channels: int = 256,
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 8,
+        aux_params: Optional[dict] = None,
     ):
         super().__init__()
             depth=encoder_depth,
             weights=encoder_weights,
         )
+        self.encoder.make_dilated(stage_list=[4, 5], dilation_list=[2, 4])
         self.decoder = DeepLabV3Decoder(
             in_channels=self.encoder.out_channels[-1],
 class DeepLabV3Plus(SegmentationModel):
     """DeepLabV3+ implementation from "Encoder-Decoder with Atrous Separable
     Convolution for Semantic Image Segmentation"
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
             to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
             two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
             with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation)
         decoder_atrous_rates: Dilation rates for ASPP module (should be a tuple of 3 integer values)
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
         upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"
     Reference:
         https://arxiv.org/abs/1802.02611v3
     """
     def __init__(
+        self,
+        encoder_name: str = "resnet34",
+        encoder_depth: int = 5,
+        encoder_weights: Optional[str] = "imagenet",
+        encoder_output_stride: int = 16,
+        decoder_channels: int = 256,
+        decoder_atrous_rates: tuple = (12, 24, 36),
+        in_channels: int = 3,
+        classes: int = 1,
+        activation: Optional[str] = None,
+        upsampling: int = 4,
+        aux_params: Optional[dict] = None,
     ):
         super().__init__()
         )
         if encoder_output_stride == 8:
+            self.encoder.make_dilated(stage_list=[4, 5], dilation_list=[2, 4])
         elif encoder_output_stride == 16:
+            self.encoder.make_dilated(stage_list=[5], dilation_list=[2])
         else:
             raise ValueError(
+                "Encoder output stride should be 8 or 16, got {}".format(
+                    encoder_output_stride
+                )
             )
         self.decoder = DeepLabV3PlusDecoder(

segmentation_models_pytorch/segmentation_models_pytorch/efficientunetplusplus/decoder.py CHANGED Viewed

@@ -1,76 +1,92 @@
 import torch
-from torch.functional import norm
 import torch.nn as nn
 import torch.nn.functional as F
 from ..base import modules as md
 class InvertedResidual(nn.Module):
     """
-    Inverted bottleneck residual block with an scSE block embedded into the residual layer, after the
     depthwise convolution. By default, uses batch normalization and Hardswish activation.
     """
-    def __init__(self, in_channels, out_channels, kernel_size = 3, stride = 1, expansion_ratio = 1, squeeze_ratio = 1, \
-        activation = nn.Hardswish(True), normalization = nn.BatchNorm2d):
         super().__init__()
         self.same_shape = in_channels == out_channels
-        self.mid_channels = expansion_ratio*in_channels
         self.block = nn.Sequential(
             md.PointWiseConv2d(in_channels, self.mid_channels),
             normalization(self.mid_channels),
             activation,
-            md.DepthWiseConv2d(self.mid_channels, kernel_size=kernel_size, stride=stride),
             normalization(self.mid_channels),
             activation,
-            #md.sSEModule(self.mid_channels),
-            md.SCSEModule(self.mid_channels, reduction = squeeze_ratio),
-            #md.SEModule(self.mid_channels, reduction = squeeze_ratio),
             md.PointWiseConv2d(self.mid_channels, out_channels),
-            normalization(out_channels)
         )
         if not self.same_shape:
-            # 1x1 convolution used to match the number of channels in the skip feature maps with that
             # of the residual feature maps
             self.skip_conv = nn.Sequential(
-                nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
-                normalization(out_channels)
             )
     def forward(self, x):
         residual = self.block(x)
         if not self.same_shape:
             x = self.skip_conv(x)
         return x + residual
 class DecoderBlock(nn.Module):
     def __init__(
-            self,
-            in_channels,
-            skip_channels,
-            out_channels,
-            squeeze_ratio=1,
-            expansion_ratio=1
     ):
         super().__init__()
         # Inverted Residual block convolutions
         self.conv1 = InvertedResidual(
-            in_channels=in_channels+skip_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=1,
-            expansion_ratio=expansion_ratio,
-            squeeze_ratio=squeeze_ratio
         )
         self.conv2 = InvertedResidual(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=1,
-            expansion_ratio=expansion_ratio,
-            squeeze_ratio=squeeze_ratio
         )
     def forward(self, x, skip=None):
@@ -82,14 +98,15 @@ class DecoderBlock(nn.Module):
         x = self.conv2(x)
         return x
 class EfficientUnetPlusPlusDecoder(nn.Module):
     def __init__(
-            self,
-            encoder_channels,
-            decoder_channels,
-            n_blocks=5,
-            squeeze_ratio=1,
-            expansion_ratio=1
     ):
         super().__init__()
         if n_blocks != len(decoder_channels):
@@ -99,8 +116,12 @@ class EfficientUnetPlusPlusDecoder(nn.Module):
                 )
             )
-        encoder_channels = encoder_channels[1:]  # remove first skip with same spatial resolution
-        encoder_channels = encoder_channels[::-1]  # reverse channels to start from head of encoder
         # computing blocks input and output channels
         head_channels = encoder_channels[0]
         self.in_channels = [head_channels] + list(decoder_channels[:-1])
@@ -112,37 +133,51 @@ class EfficientUnetPlusPlusDecoder(nn.Module):
         blocks = {}
         for layer_idx in range(len(self.in_channels) - 1):
-            for depth_idx in range(layer_idx+1):
                 if depth_idx == 0:
                     in_ch = self.in_channels[layer_idx]
-                    skip_ch = self.skip_channels[layer_idx] * (layer_idx+1)
                     out_ch = self.out_channels[layer_idx]
                 else:
                     out_ch = self.skip_channels[layer_idx]
-                    skip_ch = self.skip_channels[layer_idx] * (layer_idx+1-depth_idx)
                     in_ch = self.skip_channels[layer_idx - 1]
-                blocks[f'x_{depth_idx}_{layer_idx}'] = DecoderBlock(in_ch, skip_ch, out_ch, **kwargs)
-        blocks[f'x_{0}_{len(self.in_channels)-1}'] =\
-            DecoderBlock(self.in_channels[-1], 0, self.out_channels[-1], **kwargs)
         self.blocks = nn.ModuleDict(blocks)
         self.depth = len(self.in_channels) - 1
     def forward(self, *features):
-        features = features[1:]    # remove first skip with same spatial resolution
         features = features[::-1]  # reverse channels to start from head of encoder
         # start building dense connections
         dense_x = {}
-        for layer_idx in range(len(self.in_channels)-1):
-            for depth_idx in range(self.depth-layer_idx):
                 if layer_idx == 0:
-                    output = self.blocks[f'x_{depth_idx}_{depth_idx}'](features[depth_idx], features[depth_idx+1])
-                    dense_x[f'x_{depth_idx}_{depth_idx}'] = output
                 else:
                     dense_l_i = depth_idx + layer_idx
-                    cat_features = [dense_x[f'x_{idx}_{dense_l_i}'] for idx in range(depth_idx+1, dense_l_i+1)]
-                    cat_features = torch.cat(cat_features + [features[dense_l_i+1]], dim=1)
-                    dense_x[f'x_{depth_idx}_{dense_l_i}'] =\
-                        self.blocks[f'x_{depth_idx}_{dense_l_i}'](dense_x[f'x_{depth_idx}_{dense_l_i-1}'], cat_features)
-        dense_x[f'x_{0}_{self.depth}'] = self.blocks[f'x_{0}_{self.depth}'](dense_x[f'x_{0}_{self.depth-1}'])
-        return dense_x[f'x_{0}_{self.depth}']

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch.functional import norm
 from ..base import modules as md
 class InvertedResidual(nn.Module):
     """
+    Inverted bottleneck residual block with an scSE block embedded into the residual layer, after the
     depthwise convolution. By default, uses batch normalization and Hardswish activation.
     """
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        stride=1,
+        expansion_ratio=1,
+        squeeze_ratio=1,
+        activation=nn.Hardswish(True),
+        normalization=nn.BatchNorm2d,
+    ):
         super().__init__()
         self.same_shape = in_channels == out_channels
+        self.mid_channels = expansion_ratio * in_channels
         self.block = nn.Sequential(
             md.PointWiseConv2d(in_channels, self.mid_channels),
             normalization(self.mid_channels),
             activation,
+            md.DepthWiseConv2d(
+                self.mid_channels, kernel_size=kernel_size, stride=stride
+            ),
             normalization(self.mid_channels),
             activation,
+            # md.sSEModule(self.mid_channels),
+            md.SCSEModule(self.mid_channels, reduction=squeeze_ratio),
+            # md.SEModule(self.mid_channels, reduction = squeeze_ratio),
             md.PointWiseConv2d(self.mid_channels, out_channels),
+            normalization(out_channels),
         )
         if not self.same_shape:
+            # 1x1 convolution used to match the number of channels in the skip feature maps with that
             # of the residual feature maps
             self.skip_conv = nn.Sequential(
+                nn.Conv2d(
+                    in_channels=in_channels, out_channels=out_channels, kernel_size=1
+                ),
+                normalization(out_channels),
             )
     def forward(self, x):
         residual = self.block(x)
         if not self.same_shape:
             x = self.skip_conv(x)
         return x + residual
 class DecoderBlock(nn.Module):
     def __init__(
+        self,
+        in_channels,
+        skip_channels,
+        out_channels,
+        squeeze_ratio=1,
+        expansion_ratio=1,
     ):
         super().__init__()
         # Inverted Residual block convolutions
         self.conv1 = InvertedResidual(
+            in_channels=in_channels + skip_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            expansion_ratio=expansion_ratio,
+            squeeze_ratio=squeeze_ratio,
         )
         self.conv2 = InvertedResidual(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=1,
+            expansion_ratio=expansion_ratio,
+            squeeze_ratio=squeeze_ratio,
         )
     def forward(self, x, skip=None):
         x = self.conv2(x)
         return x
 class EfficientUnetPlusPlusDecoder(nn.Module):
     def __init__(
+        self,
+        encoder_channels,
+        decoder_channels,
+        n_blocks=5,
+        squeeze_ratio=1,
+        expansion_ratio=1,
     ):
         super().__init__()
         if n_blocks != len(decoder_channels):
                 )
             )
+        encoder_channels = encoder_channels[
+            1:
+        ]  # remove first skip with same spatial resolution
+        encoder_channels = encoder_channels[
+            ::-1
+        ]  # reverse channels to start from head of encoder
         # computing blocks input and output channels
         head_channels = encoder_channels[0]
         self.in_channels = [head_channels] + list(decoder_channels[:-1])
         blocks = {}
         for layer_idx in range(len(self.in_channels) - 1):
+            for depth_idx in range(layer_idx + 1):
                 if depth_idx == 0:
                     in_ch = self.in_channels[layer_idx]
+                    skip_ch = self.skip_channels[layer_idx] * (layer_idx + 1)
                     out_ch = self.out_channels[layer_idx]
                 else:
                     out_ch = self.skip_channels[layer_idx]
+                    skip_ch = self.skip_channels[layer_idx] * (
+                        layer_idx + 1 - depth_idx
+                    )
                     in_ch = self.skip_channels[layer_idx - 1]
+                blocks[f"x_{depth_idx}_{layer_idx}"] = DecoderBlock(
+                    in_ch, skip_ch, out_ch, **kwargs
+                )
+        blocks[f"x_{0}_{len(self.in_channels)-1}"] = DecoderBlock(
+            self.in_channels[-1], 0, self.out_channels[-1], **kwargs
+        )
         self.blocks = nn.ModuleDict(blocks)
         self.depth = len(self.in_channels) - 1
     def forward(self, *features):
+        features = features[1:]  # remove first skip with same spatial resolution
         features = features[::-1]  # reverse channels to start from head of encoder
         # start building dense connections
         dense_x = {}
+        for layer_idx in range(len(self.in_channels) - 1):
+            for depth_idx in range(self.depth - layer_idx):
                 if layer_idx == 0:
+                    output = self.blocks[f"x_{depth_idx}_{depth_idx}"](
+                        features[depth_idx], features[depth_idx + 1]
+                    )
+                    dense_x[f"x_{depth_idx}_{depth_idx}"] = output
                 else:
                     dense_l_i = depth_idx + layer_idx
+                    cat_features = [
+                        dense_x[f"x_{idx}_{dense_l_i}"]
+                        for idx in range(depth_idx + 1, dense_l_i + 1)
+                    ]
+                    cat_features = torch.cat(
+                        cat_features + [features[dense_l_i + 1]], dim=1
+                    )
+                    dense_x[f"x_{depth_idx}_{dense_l_i}"] = self.blocks[
+                        f"x_{depth_idx}_{dense_l_i}"
+                    ](dense_x[f"x_{depth_idx}_{dense_l_i-1}"], cat_features)
+        dense_x[f"x_{0}_{self.depth}"] = self.blocks[f"x_{0}_{self.depth}"](
+            dense_x[f"x_{0}_{self.depth-1}"]
+        )
+        return dense_x[f"x_{0}_{self.depth}"]

segmentation_models_pytorch/segmentation_models_pytorch/efficientunetplusplus/model.py CHANGED Viewed

@@ -1,28 +1,30 @@
-from typing import Optional, Union, List
-from .decoder import EfficientUnetPlusPlusDecoder
-from ..encoders import get_encoder
-from ..base import SegmentationModel
-from ..base import SegmentationHead, ClassificationHead
 from torchvision import transforms
 class EfficientUnetPlusPlus(SegmentationModel):
-    """The EfficientUNet++ is a fully convolutional neural network for ordinary and medical image semantic segmentation.
-    Consists of an *encoder* and a *decoder*, connected by *skip connections*. The encoder extracts features of
-    different spatial resolutions, which are fed to the decoder through skip connections. The decoder combines its
-    own feature maps with the ones from skip connections to produce accurate segmentations masks.  The EfficientUNet++
-    decoder architecture is based on the UNet++, a model composed of nested U-Net-like decoder sub-networks. To
-    increase performance and computational efficiency, the EfficientUNet++ replaces the UNet++'s blocks with
     inverted residual blocks with depthwise convolutions and embedded spatial and channel attention mechanisms.
     Synergizes well with EfficientNet encoders. Due to their efficient visual representations (i.e., using few channels
     to represent extracted features), EfficientNet encoders require few computation from the decoder.
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) to extract features
-        encoder_depth: Number of stages of the encoder, in range [3 ,5]. Each stage generate features two times smaller,
-            in spatial dimensions, than the previous one (e.g., for depth=0 features will haves shapes [(N, C, H, W)]),
             for depth 1 features will have shapes [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
-        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in the decoder.
             Length of the list should be the same as **encoder_depth**
@@ -31,7 +33,7 @@ class EfficientUnetPlusPlus(SegmentationModel):
         activation: An activation function to apply after the final convolution layer.
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
-        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is built
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"
@@ -71,7 +73,7 @@ class EfficientUnetPlusPlus(SegmentationModel):
             decoder_channels=decoder_channels,
             n_blocks=encoder_depth,
             squeeze_ratio=squeeze_ratio,
-            expansion_ratio=expansion_ratio
         )
         self.segmentation_head = SegmentationHead(
@@ -117,9 +119,9 @@ class EfficientUnetPlusPlus(SegmentationModel):
             [
                 transforms.ToPILImage(),
                 transforms.Resize(x.size[1]),
-                transforms.ToTensor()
             ]
         )
-        full_mask = tf(probs.cpu())
-        return full_mask

+from typing import List, Optional, Union
 from torchvision import transforms
+from ..base import ClassificationHead, SegmentationHead, SegmentationModel
+from ..encoders import get_encoder
+from .decoder import EfficientUnetPlusPlusDecoder
 class EfficientUnetPlusPlus(SegmentationModel):
+    """The EfficientUNet++ is a fully convolutional neural network for ordinary and medical image semantic segmentation.
+    Consists of an *encoder* and a *decoder*, connected by *skip connections*. The encoder extracts features of
+    different spatial resolutions, which are fed to the decoder through skip connections. The decoder combines its
+    own feature maps with the ones from skip connections to produce accurate segmentations masks.  The EfficientUNet++
+    decoder architecture is based on the UNet++, a model composed of nested U-Net-like decoder sub-networks. To
+    increase performance and computational efficiency, the EfficientUNet++ replaces the UNet++'s blocks with
     inverted residual blocks with depthwise convolutions and embedded spatial and channel attention mechanisms.
     Synergizes well with EfficientNet encoders. Due to their efficient visual representations (i.e., using few channels
     to represent extracted features), EfficientNet encoders require few computation from the decoder.
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone) to extract features
+        encoder_depth: Number of stages of the encoder, in range [3 ,5]. Each stage generate features two times smaller,
+            in spatial dimensions, than the previous one (e.g., for depth=0 features will haves shapes [(N, C, H, W)]),
             for depth 1 features will have shapes [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in the decoder.
             Length of the list should be the same as **encoder_depth**
         activation: An activation function to apply after the final convolution layer.
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is built
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"
             decoder_channels=decoder_channels,
             n_blocks=encoder_depth,
             squeeze_ratio=squeeze_ratio,
+            expansion_ratio=expansion_ratio,
         )
         self.segmentation_head = SegmentationHead(
             [
                 transforms.ToPILImage(),
                 transforms.Resize(x.size[1]),
+                transforms.ToTensor(),
             ]
         )
+        full_mask = tf(probs.cpu())
+        return full_mask

segmentation_models_pytorch/segmentation_models_pytorch/encoders/__init__.py CHANGED Viewed

@@ -1,22 +1,24 @@
 import functools
 import torch.utils.model_zoo as model_zoo
-from .resnet import resnet_encoders
-from .dpn import dpn_encoders
-from .vgg import vgg_encoders
-from .senet import senet_encoders
 from .densenet import densenet_encoders
 from .inceptionresnetv2 import inceptionresnetv2_encoders
 from .inceptionv4 import inceptionv4_encoders
-from .efficientnet import efficient_net_encoders
 from .mobilenet import mobilenet_encoders
-from .xception import xception_encoders
 # from .timm_efficientnet import timm_efficientnet_encoders
 from .timm_resnest import timm_resnest_encoders
-from .timm_res2net import timm_res2net_encoders
-from .timm_regnet import timm_regnet_encoders
 from .timm_sknet import timm_sknet_encoders
-from ._preprocessing import preprocess_input
 encoders = {}
 encoders.update(resnet_encoders)
@@ -37,11 +39,14 @@ encoders.update(timm_sknet_encoders)
 def get_encoder(name, in_channels=3, depth=5, weights=None):
     try:
         Encoder = encoders[name]["encoder"]
     except KeyError:
-        raise KeyError("Wrong encoder name `{}`, supported encoders: {}".format(name, list(encoders.keys())))
     params = encoders[name]["params"]
     params.update(depth=depth)
@@ -51,9 +56,13 @@ def get_encoder(name, in_channels=3, depth=5, weights=None):
         try:
             settings = encoders[name]["pretrained_settings"][weights]
         except KeyError:
-            raise KeyError("Wrong pretrained weights `{}` for encoder `{}`. Available options are: {}".format(
-                weights, name, list(encoders[name]["pretrained_settings"].keys()),
-            ))
         encoder.load_state_dict(model_zoo.load_url(settings["url"]))
     encoder.set_in_channels(in_channels)

 import functools
 import torch.utils.model_zoo as model_zoo
+from ._preprocessing import preprocess_input
 from .densenet import densenet_encoders
+from .dpn import dpn_encoders
+from .efficientnet import efficient_net_encoders
 from .inceptionresnetv2 import inceptionresnetv2_encoders
 from .inceptionv4 import inceptionv4_encoders
 from .mobilenet import mobilenet_encoders
+from .resnet import resnet_encoders
+from .senet import senet_encoders
+from .timm_regnet import timm_regnet_encoders
+from .timm_res2net import timm_res2net_encoders
 # from .timm_efficientnet import timm_efficientnet_encoders
 from .timm_resnest import timm_resnest_encoders
 from .timm_sknet import timm_sknet_encoders
+from .vgg import vgg_encoders
+from .xception import xception_encoders
 encoders = {}
 encoders.update(resnet_encoders)
 def get_encoder(name, in_channels=3, depth=5, weights=None):
     try:
         Encoder = encoders[name]["encoder"]
     except KeyError:
+        raise KeyError(
+            "Wrong encoder name `{}`, supported encoders: {}".format(
+                name, list(encoders.keys())
+            )
+        )
     params = encoders[name]["params"]
     params.update(depth=depth)
         try:
             settings = encoders[name]["pretrained_settings"][weights]
         except KeyError:
+            raise KeyError(
+                "Wrong pretrained weights `{}` for encoder `{}`. Available options are: {}".format(
+                    weights,
+                    name,
+                    list(encoders[name]["pretrained_settings"].keys()),
+                )
+            )
         encoder.load_state_dict(model_zoo.load_url(settings["url"]))
     encoder.set_in_channels(in_channels)

segmentation_models_pytorch/segmentation_models_pytorch/encoders/_base.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import torch
 import torch.nn as nn
-from typing import List
-from collections import OrderedDict
 from . import _utils as utils
 class EncoderMixin:
     """Add encoder functionality such as:
-        - output channels specification of feature tensors (produced by encoder)
-        - patching first convolution for arbitrary input channels
     """
     @property

+from collections import OrderedDict
+from typing import List
 import torch
 import torch.nn as nn
 from . import _utils as utils
 class EncoderMixin:
     """Add encoder functionality such as:
+    - output channels specification of feature tensors (produced by encoder)
+    - patching first convolution for arbitrary input channels
     """
     @property

segmentation_models_pytorch/segmentation_models_pytorch/encoders/_preprocessing.py CHANGED Viewed

@@ -4,7 +4,6 @@ import numpy as np
 def preprocess_input(
     x, mean=None, std=None, input_space="RGB", input_range=None, **kwargs
 ):
     if input_space == "BGR":
         x = x[..., ::-1].copy()

 def preprocess_input(
     x, mean=None, std=None, input_space="RGB", input_range=None, **kwargs
 ):
     if input_space == "BGR":
         x = x[..., ::-1].copy()

segmentation_models_pytorch/segmentation_models_pytorch/encoders/densenet.py CHANGED Viewed

@@ -24,8 +24,8 @@ Methods:
 """
 import re
-import torch.nn as nn
 from pretrainedmodels.models.torchvision_models import pretrained_settings
 from torchvision.models.densenet import DenseNet
@@ -33,7 +33,6 @@ from ._base import EncoderMixin
 class TransitionWithSkip(nn.Module):
     def __init__(self, module):
         super().__init__()
         self.module = module
@@ -55,22 +54,32 @@ class DenseNetEncoder(DenseNet, EncoderMixin):
         del self.classifier
     def make_dilated(self, stage_list, dilation_list):
-        raise ValueError("DenseNet encoders do not support dilated mode "
-                         "due to pooling operation for downsampling!")
     def get_stages(self):
         return [
             nn.Identity(),
-            nn.Sequential(self.features.conv0, self.features.norm0, self.features.relu0),
-            nn.Sequential(self.features.pool0, self.features.denseblock1,
-                          TransitionWithSkip(self.features.transition1)),
-            nn.Sequential(self.features.denseblock2, TransitionWithSkip(self.features.transition2)),
-            nn.Sequential(self.features.denseblock3, TransitionWithSkip(self.features.transition3)),
-            nn.Sequential(self.features.denseblock4, self.features.norm5)
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

 """
 import re
+import torch.nn as nn
 from pretrainedmodels.models.torchvision_models import pretrained_settings
 from torchvision.models.densenet import DenseNet
 class TransitionWithSkip(nn.Module):
     def __init__(self, module):
         super().__init__()
         self.module = module
         del self.classifier
     def make_dilated(self, stage_list, dilation_list):
+        raise ValueError(
+            "DenseNet encoders do not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
     def get_stages(self):
         return [
             nn.Identity(),
+            nn.Sequential(
+                self.features.conv0, self.features.norm0, self.features.relu0
+            ),
+            nn.Sequential(
+                self.features.pool0,
+                self.features.denseblock1,
+                TransitionWithSkip(self.features.transition1),
+            ),
+            nn.Sequential(
+                self.features.denseblock2, TransitionWithSkip(self.features.transition2)
+            ),
+            nn.Sequential(
+                self.features.denseblock3, TransitionWithSkip(self.features.transition3)
+            ),
+            nn.Sequential(self.features.denseblock4, self.features.norm5),
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

segmentation_models_pytorch/segmentation_models_pytorch/encoders/dpn.py CHANGED Viewed

@@ -26,9 +26,7 @@ Methods:
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from pretrainedmodels.models.dpn import DPN
-from pretrainedmodels.models.dpn import pretrained_settings
 from ._base import EncoderMixin
@@ -46,15 +44,18 @@ class DPNEncoder(DPN, EncoderMixin):
     def get_stages(self):
         return [
             nn.Identity(),
-            nn.Sequential(self.features[0].conv, self.features[0].bn, self.features[0].act),
-            nn.Sequential(self.features[0].pool, self.features[1 : self._stage_idxs[0]]),
             self.features[self._stage_idxs[0] : self._stage_idxs[1]],
             self.features[self._stage_idxs[1] : self._stage_idxs[2]],
             self.features[self._stage_idxs[2] : self._stage_idxs[3]],
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from pretrainedmodels.models.dpn import DPN, pretrained_settings
 from ._base import EncoderMixin
     def get_stages(self):
         return [
             nn.Identity(),
+            nn.Sequential(
+                self.features[0].conv, self.features[0].bn, self.features[0].act
+            ),
+            nn.Sequential(
+                self.features[0].pool, self.features[1 : self._stage_idxs[0]]
+            ),
             self.features[self._stage_idxs[0] : self._stage_idxs[1]],
             self.features[self._stage_idxs[1] : self._stage_idxs[2]],
             self.features[self._stage_idxs[2] : self._stage_idxs[3]],
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

segmentation_models_pytorch/segmentation_models_pytorch/encoders/efficientnet.py CHANGED Viewed

@@ -24,14 +24,13 @@ Methods:
 """
 import torch.nn as nn
 from efficientnet_pytorch import EfficientNet
-from efficientnet_pytorch.utils import url_map, url_map_advprop, get_model_params
 from ._base import EncoderMixin
 class EfficientNetEncoder(EfficientNet, EncoderMixin):
     def __init__(self, stage_idxs, out_channels, model_name, depth=5):
         blocks_args, global_params = get_model_params(model_name, override_params=None)
         super().__init__(blocks_args, global_params)
@@ -46,21 +45,20 @@ class EfficientNetEncoder(EfficientNet, EncoderMixin):
         return [
             nn.Identity(),
             nn.Sequential(self._conv_stem, self._bn0, self._swish),
-            self._blocks[:self._stage_idxs[0]],
-            self._blocks[self._stage_idxs[0]:self._stage_idxs[1]],
-            self._blocks[self._stage_idxs[1]:self._stage_idxs[2]],
-            self._blocks[self._stage_idxs[2]:],
         ]
     def forward(self, x):
         stages = self.get_stages()
-        block_number = 0.
         drop_connect_rate = self._global_params.drop_connect_rate
         features = []
         for i in range(self._depth + 1):
             # Identity and Sequential stages
             if i < 2:
                 x = stages[i](x)
@@ -69,7 +67,7 @@ class EfficientNetEncoder(EfficientNet, EncoderMixin):
             else:
                 for module in stages[i]:
                     drop_connect = drop_connect_rate * block_number / len(self._blocks)
-                    block_number += 1.
                     x = module(x, drop_connect)
             features.append(x)
@@ -97,7 +95,7 @@ def _get_pretrained_settings(encoder):
             "url": url_map_advprop[encoder],
             "input_space": "RGB",
             "input_range": [0, 1],
-        }
     }
     return pretrained_settings

 """
 import torch.nn as nn
 from efficientnet_pytorch import EfficientNet
+from efficientnet_pytorch.utils import get_model_params, url_map, url_map_advprop
 from ._base import EncoderMixin
 class EfficientNetEncoder(EfficientNet, EncoderMixin):
     def __init__(self, stage_idxs, out_channels, model_name, depth=5):
         blocks_args, global_params = get_model_params(model_name, override_params=None)
         super().__init__(blocks_args, global_params)
         return [
             nn.Identity(),
             nn.Sequential(self._conv_stem, self._bn0, self._swish),
+            self._blocks[: self._stage_idxs[0]],
+            self._blocks[self._stage_idxs[0] : self._stage_idxs[1]],
+            self._blocks[self._stage_idxs[1] : self._stage_idxs[2]],
+            self._blocks[self._stage_idxs[2] :],
         ]
     def forward(self, x):
         stages = self.get_stages()
+        block_number = 0.0
         drop_connect_rate = self._global_params.drop_connect_rate
         features = []
         for i in range(self._depth + 1):
             # Identity and Sequential stages
             if i < 2:
                 x = stages[i](x)
             else:
                 for module in stages[i]:
                     drop_connect = drop_connect_rate * block_number / len(self._blocks)
+                    block_number += 1.0
                     x = module(x, drop_connect)
             features.append(x)
             "url": url_map_advprop[encoder],
             "input_space": "RGB",
             "input_range": [0, 1],
+        },
     }
     return pretrained_settings

segmentation_models_pytorch/segmentation_models_pytorch/encoders/inceptionresnetv2.py CHANGED Viewed

@@ -24,8 +24,10 @@ Methods:
 """
 import torch.nn as nn
-from pretrainedmodels.models.inceptionresnetv2 import InceptionResNetV2
-from pretrainedmodels.models.inceptionresnetv2 import pretrained_settings
 from ._base import EncoderMixin
@@ -51,8 +53,10 @@ class InceptionResNetV2Encoder(InceptionResNetV2, EncoderMixin):
         del self.last_linear
     def make_dilated(self, stage_list, dilation_list):
-        raise ValueError("InceptionResnetV2 encoder does not support dilated mode "
-                         "due to pooling operation for downsampling!")
     def get_stages(self):
         return [
@@ -65,7 +69,6 @@ class InceptionResNetV2Encoder(InceptionResNetV2, EncoderMixin):
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

 """
 import torch.nn as nn
+from pretrainedmodels.models.inceptionresnetv2 import (
+    InceptionResNetV2,
+    pretrained_settings,
+)
 from ._base import EncoderMixin
         del self.last_linear
     def make_dilated(self, stage_list, dilation_list):
+        raise ValueError(
+            "InceptionResnetV2 encoder does not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
     def get_stages(self):
         return [
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

segmentation_models_pytorch/segmentation_models_pytorch/encoders/inceptionv4.py CHANGED Viewed

@@ -24,8 +24,11 @@ Methods:
 """
 import torch.nn as nn
-from pretrainedmodels.models.inceptionv4 import InceptionV4, BasicConv2d
-from pretrainedmodels.models.inceptionv4 import pretrained_settings
 from ._base import EncoderMixin
@@ -50,21 +53,22 @@ class InceptionV4Encoder(InceptionV4, EncoderMixin):
         del self.last_linear
     def make_dilated(self, stage_list, dilation_list):
-        raise ValueError("InceptionV4 encoder does not support dilated mode "
-                         "due to pooling operation for downsampling!")
     def get_stages(self):
         return [
             nn.Identity(),
             self.features[: self._stage_idxs[0]],
-            self.features[self._stage_idxs[0]: self._stage_idxs[1]],
-            self.features[self._stage_idxs[1]: self._stage_idxs[2]],
-            self.features[self._stage_idxs[2]: self._stage_idxs[3]],
-            self.features[self._stage_idxs[3]:],
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

 """
 import torch.nn as nn
+from pretrainedmodels.models.inceptionv4 import (
+    BasicConv2d,
+    InceptionV4,
+    pretrained_settings,
+)
 from ._base import EncoderMixin
         del self.last_linear
     def make_dilated(self, stage_list, dilation_list):
+        raise ValueError(
+            "InceptionV4 encoder does not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
     def get_stages(self):
         return [
             nn.Identity(),
             self.features[: self._stage_idxs[0]],
+            self.features[self._stage_idxs[0] : self._stage_idxs[1]],
+            self.features[self._stage_idxs[1] : self._stage_idxs[2]],
+            self.features[self._stage_idxs[2] : self._stage_idxs[3]],
+            self.features[self._stage_idxs[3] :],
         ]
     def forward(self, x):
         stages = self.get_stages()
         features = []

segmentation_models_pytorch/segmentation_models_pytorch/encoders/mobilenet.py CHANGED Viewed

@@ -23,14 +23,13 @@ Methods:
         depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
 """
-import torchvision
 import torch.nn as nn
 from ._base import EncoderMixin
 class MobileNetV2Encoder(torchvision.models.MobileNetV2, EncoderMixin):
     def __init__(self, out_channels, depth=5, **kwargs):
         super().__init__(**kwargs)
         self._depth = depth

         depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
 """
 import torch.nn as nn
+import torchvision
 from ._base import EncoderMixin
 class MobileNetV2Encoder(torchvision.models.MobileNetV2, EncoderMixin):
     def __init__(self, out_channels, depth=5, **kwargs):
         super().__init__(**kwargs)
         self._depth = depth

segmentation_models_pytorch/segmentation_models_pytorch/encoders/resnet.py CHANGED Viewed

@@ -25,11 +25,8 @@ Methods:
 from copy import deepcopy
 import torch.nn as nn
-from torchvision.models.resnet import ResNet
-from torchvision.models.resnet import BasicBlock
-from torchvision.models.resnet import Bottleneck
 from pretrainedmodels.models.torchvision_models import pretrained_settings
 from ._base import EncoderMixin
@@ -73,11 +70,11 @@ class ResNetEncoder(ResNet, EncoderMixin):
 new_settings = {
     "resnet18": {
         "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth",
-        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth"
     },
     "resnet50": {
         "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth",
-        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth"
     },
     "resnext50_32x4d": {
         "imagenet": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
@@ -86,7 +83,7 @@ new_settings = {
     },
     "resnext101_32x4d": {
         "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth",
-        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth"
     },
     "resnext101_32x8d": {
         "imagenet": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
@@ -104,7 +101,7 @@ new_settings = {
     },
     "resnext101_32x48d": {
         "instagram": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth",
-    }
 }
 pretrained_settings = deepcopy(pretrained_settings)
@@ -115,11 +112,11 @@ for model_name, sources in new_settings.items():
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
         }

 from copy import deepcopy
 import torch.nn as nn
 from pretrainedmodels.models.torchvision_models import pretrained_settings
+from torchvision.models.resnet import BasicBlock, Bottleneck, ResNet
 from ._base import EncoderMixin
 new_settings = {
     "resnet18": {
         "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet18-d92f0530.pth",
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet18-118f1556.pth",
     },
     "resnet50": {
         "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnet50-08389792.pth",
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnet50-16a12f1b.pth",
     },
     "resnext50_32x4d": {
         "imagenet": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
     },
     "resnext101_32x4d": {
         "ssl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x4-dc43570a.pth",
+        "swsl": "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x4-3f87e46b.pth",
     },
     "resnext101_32x8d": {
         "imagenet": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
     },
     "resnext101_32x48d": {
         "instagram": "https://download.pytorch.org/models/ig_resnext101_32x48-3e41cc8a.pth",
+    },
 }
 pretrained_settings = deepcopy(pretrained_settings)
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }

segmentation_models_pytorch/segmentation_models_pytorch/encoders/senet.py CHANGED Viewed

@@ -24,14 +24,14 @@ Methods:
 """
 import torch.nn as nn
 from pretrainedmodels.models.senet import (
-    SENet,
     SEBottleneck,
     SEResNetBottleneck,
     SEResNeXtBottleneck,
     pretrained_settings,
 )
 from ._base import EncoderMixin

 """
 import torch.nn as nn
 from pretrainedmodels.models.senet import (
     SEBottleneck,
+    SENet,
     SEResNetBottleneck,
     SEResNeXtBottleneck,
     pretrained_settings,
 )
 from ._base import EncoderMixin

segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_regnet.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from ._base import EncoderMixin
-from timm.models.regnet import RegNet
 import torch.nn as nn
 class RegNetEncoder(RegNet, EncoderMixin):
@@ -39,78 +40,78 @@ class RegNetEncoder(RegNet, EncoderMixin):
 regnet_weights = {
-    'timm-regnetx_002': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth',
     },
-    'timm-regnetx_004': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth',
     },
-    'timm-regnetx_006':  {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth',
     },
-    'timm-regnetx_008': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth',
     },
-    'timm-regnetx_016': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth',
     },
-    'timm-regnetx_032': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth',
     },
-    'timm-regnetx_040': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth',
     },
-    'timm-regnetx_064': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth',
     },
-    'timm-regnetx_080': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth',
     },
-    'timm-regnetx_120': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth',
     },
-    'timm-regnetx_160': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth',
     },
-    'timm-regnetx_320': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth',
     },
-    'timm-regnety_002': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth',
     },
-    'timm-regnety_004': {
-         'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth',
     },
-    'timm-regnety_006': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth',
     },
-    'timm-regnety_008': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth',
     },
-    'timm-regnety_016': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth',
     },
-    'timm-regnety_032': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth'
     },
-    'timm-regnety_040': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth'
     },
-    'timm-regnety_064': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth'
     },
-    'timm-regnety_080': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth',
     },
-    'timm-regnety_120': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth',
     },
-    'timm-regnety_160': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth',
     },
-    'timm-regnety_320': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth'
-    }
 }
 pretrained_settings = {}
@@ -119,214 +120,224 @@ for model_name, sources in regnet_weights.items():
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
         }
 # at this point I am too lazy to copy configs, so I just used the same configs from timm's repo
 def _mcfg(**kwargs):
-    cfg = dict(se_ratio=0., bottle_ratio=1., stem_width=32)
     cfg.update(**kwargs)
     return cfg
 timm_regnet_encoders = {
-    'timm-regnetx_002': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_002"],
-        'params': {
-            'out_channels': (3, 32, 24, 56, 152, 368),
-            'cfg': _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13)
         },
     },
-    'timm-regnetx_004': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_004"],
-        'params': {
-            'out_channels': (3, 32, 32, 64, 160, 384),
-            'cfg': _mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22)
         },
     },
-    'timm-regnetx_006': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_006"],
-        'params': {
-            'out_channels': (3, 32, 48, 96, 240, 528),
-            'cfg': _mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16)
         },
     },
-    'timm-regnetx_008': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_008"],
-        'params': {
-            'out_channels': (3, 32, 64, 128, 288, 672),
-            'cfg': _mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16)
         },
     },
-    'timm-regnetx_016': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_016"],
-        'params': {
-            'out_channels': (3, 32, 72, 168, 408, 912),
-            'cfg': _mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18)
         },
     },
-    'timm-regnetx_032': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_032"],
-        'params': {
-            'out_channels': (3, 32, 96, 192, 432, 1008),
-            'cfg': _mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25)
         },
     },
-    'timm-regnetx_040': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_040"],
-        'params': {
-            'out_channels': (3, 32, 80, 240, 560, 1360),
-            'cfg': _mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23)
         },
     },
-    'timm-regnetx_064': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_064"],
-        'params': {
-            'out_channels': (3, 32, 168, 392, 784, 1624),
-            'cfg': _mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17)
         },
     },
-    'timm-regnetx_080': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_080"],
-        'params': {
-            'out_channels': (3, 32, 80, 240, 720, 1920),
-            'cfg': _mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23)
         },
     },
-    'timm-regnetx_120': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_120"],
-        'params': {
-            'out_channels': (3, 32, 224, 448, 896, 2240),
-            'cfg': _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19)
         },
     },
-    'timm-regnetx_160': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_160"],
-        'params': {
-            'out_channels': (3, 32, 256, 512, 896, 2048),
-            'cfg': _mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22)
         },
     },
-    'timm-regnetx_320': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_320"],
-        'params': {
-            'out_channels': (3, 32, 336, 672, 1344, 2520),
-            'cfg': _mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23)
         },
     },
-    #regnety
-    'timm-regnety_002': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_002"],
-        'params': {
-            'out_channels': (3, 32, 24, 56, 152, 368),
-            'cfg': _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25)
         },
     },
-    'timm-regnety_004': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_004"],
-        'params': {
-            'out_channels': (3, 32, 48, 104, 208, 440),
-            'cfg': _mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25)
         },
     },
-    'timm-regnety_006': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_006"],
-        'params': {
-            'out_channels': (3, 32, 48, 112, 256, 608),
-            'cfg': _mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25)
         },
     },
-    'timm-regnety_008': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_008"],
-        'params': {
-            'out_channels': (3, 32, 64, 128, 320, 768),
-            'cfg': _mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25)
         },
     },
-    'timm-regnety_016': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_016"],
-        'params': {
-            'out_channels': (3, 32, 48, 120, 336, 888),
-            'cfg': _mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25)
         },
     },
-    'timm-regnety_032': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_032"],
-        'params': {
-            'out_channels': (3, 32, 72, 216, 576, 1512),
-            'cfg': _mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25)
         },
     },
-    'timm-regnety_040': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_040"],
-        'params': {
-            'out_channels': (3, 32, 128, 192, 512, 1088),
-            'cfg': _mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25)
         },
     },
-    'timm-regnety_064': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_064"],
-        'params': {
-            'out_channels': (3, 32, 144, 288, 576, 1296),
-            'cfg': _mcfg(w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25)
         },
     },
-    'timm-regnety_080': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_080"],
-        'params': {
-            'out_channels': (3, 32, 168, 448, 896, 2016),
-            'cfg': _mcfg(w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25)
         },
     },
-    'timm-regnety_120': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_120"],
-        'params': {
-            'out_channels': (3, 32, 224, 448, 896, 2240),
-            'cfg': _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25)
         },
     },
-    'timm-regnety_160': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_160"],
-        'params': {
-            'out_channels': (3, 32, 224, 448, 1232, 3024),
-            'cfg': _mcfg(w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25)
         },
     },
-    'timm-regnety_320': {
-        'encoder': RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_320"],
-        'params': {
-            'out_channels': (3, 32, 232, 696, 1392, 3712),
-            'cfg': _mcfg(w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25)
         },
     },
 }

 import torch.nn as nn
+from timm.models.regnet import RegNet
+from ._base import EncoderMixin
 class RegNetEncoder(RegNet, EncoderMixin):
 regnet_weights = {
+    "timm-regnetx_002": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_002-e7e85e5c.pth",
+    },
+    "timm-regnetx_004": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_004-7d0e9424.pth",
     },
+    "timm-regnetx_006": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_006-85ec1baa.pth",
     },
+    "timm-regnetx_008": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_008-d8b470eb.pth",
     },
+    "timm-regnetx_016": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_016-65ca972a.pth",
     },
+    "timm-regnetx_032": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_032-ed0c7f7e.pth",
     },
+    "timm-regnetx_040": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_040-73c2a654.pth",
     },
+    "timm-regnetx_064": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_064-29278baa.pth",
     },
+    "timm-regnetx_080": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_080-7c7fcab1.pth",
     },
+    "timm-regnetx_120": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_120-65d5521e.pth",
     },
+    "timm-regnetx_160": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_160-c98c4112.pth",
     },
+    "timm-regnetx_320": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnetx_320-8ea38b93.pth",
     },
+    "timm-regnety_002": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_002-e68ca334.pth",
     },
+    "timm-regnety_004": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_004-0db870e6.pth",
     },
+    "timm-regnety_006": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_006-c67e57ec.pth",
     },
+    "timm-regnety_008": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_008-dc900dbe.pth",
     },
+    "timm-regnety_016": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_016-54367f74.pth",
     },
+    "timm-regnety_032": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/regnety_032_ra-7f2439f9.pth"
     },
+    "timm-regnety_040": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth"
     },
+    "timm-regnety_064": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth"
     },
+    "timm-regnety_080": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_080-e7f3eb93.pth",
     },
+    "timm-regnety_120": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_120-721ba79a.pth",
     },
+    "timm-regnety_160": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_160-d64013cd.pth",
     },
+    "timm-regnety_320": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_320-ba464b29.pth"
     },
 }
 pretrained_settings = {}
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
 # at this point I am too lazy to copy configs, so I just used the same configs from timm's repo
 def _mcfg(**kwargs):
+    cfg = dict(se_ratio=0.0, bottle_ratio=1.0, stem_width=32)
     cfg.update(**kwargs)
     return cfg
 timm_regnet_encoders = {
+    "timm-regnetx_002": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_002"],
+        "params": {
+            "out_channels": (3, 32, 24, 56, 152, 368),
+            "cfg": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13),
         },
     },
+    "timm-regnetx_004": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_004"],
+        "params": {
+            "out_channels": (3, 32, 32, 64, 160, 384),
+            "cfg": _mcfg(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22),
         },
     },
+    "timm-regnetx_006": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_006"],
+        "params": {
+            "out_channels": (3, 32, 48, 96, 240, 528),
+            "cfg": _mcfg(w0=48, wa=36.97, wm=2.24, group_w=24, depth=16),
         },
     },
+    "timm-regnetx_008": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_008"],
+        "params": {
+            "out_channels": (3, 32, 64, 128, 288, 672),
+            "cfg": _mcfg(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16),
         },
     },
+    "timm-regnetx_016": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_016"],
+        "params": {
+            "out_channels": (3, 32, 72, 168, 408, 912),
+            "cfg": _mcfg(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18),
         },
     },
+    "timm-regnetx_032": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_032"],
+        "params": {
+            "out_channels": (3, 32, 96, 192, 432, 1008),
+            "cfg": _mcfg(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25),
         },
     },
+    "timm-regnetx_040": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_040"],
+        "params": {
+            "out_channels": (3, 32, 80, 240, 560, 1360),
+            "cfg": _mcfg(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23),
         },
     },
+    "timm-regnetx_064": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_064"],
+        "params": {
+            "out_channels": (3, 32, 168, 392, 784, 1624),
+            "cfg": _mcfg(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17),
         },
     },
+    "timm-regnetx_080": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_080"],
+        "params": {
+            "out_channels": (3, 32, 80, 240, 720, 1920),
+            "cfg": _mcfg(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23),
         },
     },
+    "timm-regnetx_120": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_120"],
+        "params": {
+            "out_channels": (3, 32, 224, 448, 896, 2240),
+            "cfg": _mcfg(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19),
         },
     },
+    "timm-regnetx_160": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_160"],
+        "params": {
+            "out_channels": (3, 32, 256, 512, 896, 2048),
+            "cfg": _mcfg(w0=216, wa=55.59, wm=2.1, group_w=128, depth=22),
         },
     },
+    "timm-regnetx_320": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnetx_320"],
+        "params": {
+            "out_channels": (3, 32, 336, 672, 1344, 2520),
+            "cfg": _mcfg(w0=320, wa=69.86, wm=2.0, group_w=168, depth=23),
         },
     },
+    # regnety
+    "timm-regnety_002": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_002"],
+        "params": {
+            "out_channels": (3, 32, 24, 56, 152, 368),
+            "cfg": _mcfg(w0=24, wa=36.44, wm=2.49, group_w=8, depth=13, se_ratio=0.25),
         },
     },
+    "timm-regnety_004": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_004"],
+        "params": {
+            "out_channels": (3, 32, 48, 104, 208, 440),
+            "cfg": _mcfg(w0=48, wa=27.89, wm=2.09, group_w=8, depth=16, se_ratio=0.25),
         },
     },
+    "timm-regnety_006": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_006"],
+        "params": {
+            "out_channels": (3, 32, 48, 112, 256, 608),
+            "cfg": _mcfg(w0=48, wa=32.54, wm=2.32, group_w=16, depth=15, se_ratio=0.25),
         },
     },
+    "timm-regnety_008": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_008"],
+        "params": {
+            "out_channels": (3, 32, 64, 128, 320, 768),
+            "cfg": _mcfg(w0=56, wa=38.84, wm=2.4, group_w=16, depth=14, se_ratio=0.25),
         },
     },
+    "timm-regnety_016": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_016"],
+        "params": {
+            "out_channels": (3, 32, 48, 120, 336, 888),
+            "cfg": _mcfg(w0=48, wa=20.71, wm=2.65, group_w=24, depth=27, se_ratio=0.25),
         },
     },
+    "timm-regnety_032": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_032"],
+        "params": {
+            "out_channels": (3, 32, 72, 216, 576, 1512),
+            "cfg": _mcfg(w0=80, wa=42.63, wm=2.66, group_w=24, depth=21, se_ratio=0.25),
         },
     },
+    "timm-regnety_040": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_040"],
+        "params": {
+            "out_channels": (3, 32, 128, 192, 512, 1088),
+            "cfg": _mcfg(w0=96, wa=31.41, wm=2.24, group_w=64, depth=22, se_ratio=0.25),
         },
     },
+    "timm-regnety_064": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_064"],
+        "params": {
+            "out_channels": (3, 32, 144, 288, 576, 1296),
+            "cfg": _mcfg(
+                w0=112, wa=33.22, wm=2.27, group_w=72, depth=25, se_ratio=0.25
+            ),
         },
     },
+    "timm-regnety_080": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_080"],
+        "params": {
+            "out_channels": (3, 32, 168, 448, 896, 2016),
+            "cfg": _mcfg(
+                w0=192, wa=76.82, wm=2.19, group_w=56, depth=17, se_ratio=0.25
+            ),
         },
     },
+    "timm-regnety_120": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_120"],
+        "params": {
+            "out_channels": (3, 32, 224, 448, 896, 2240),
+            "cfg": _mcfg(
+                w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, se_ratio=0.25
+            ),
         },
     },
+    "timm-regnety_160": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_160"],
+        "params": {
+            "out_channels": (3, 32, 224, 448, 1232, 3024),
+            "cfg": _mcfg(
+                w0=200, wa=106.23, wm=2.48, group_w=112, depth=18, se_ratio=0.25
+            ),
         },
     },
+    "timm-regnety_320": {
+        "encoder": RegNetEncoder,
         "pretrained_settings": pretrained_settings["timm-regnety_320"],
+        "params": {
+            "out_channels": (3, 32, 232, 696, 1392, 3712),
+            "cfg": _mcfg(
+                w0=232, wa=115.89, wm=2.53, group_w=232, depth=20, se_ratio=0.25
+            ),
         },
     },
 }

segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_res2net.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from ._base import EncoderMixin
-from timm.models.resnet import ResNet
-from timm.models.res2net import Bottle2neck
 import torch.nn as nn
 class Res2NetEncoder(ResNet, EncoderMixin):
@@ -44,27 +45,27 @@ class Res2NetEncoder(ResNet, EncoderMixin):
 res2net_weights = {
-    'timm-res2net50_26w_4s': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_4s-06e79181.pth'
     },
-    'timm-res2net50_48w_2s': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_48w_2s-afed724a.pth'
     },
-    'timm-res2net50_14w_8s': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_14w_8s-6527dddc.pth',
     },
-    'timm-res2net50_26w_6s': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_6s-19041792.pth',
     },
-    'timm-res2net50_26w_8s': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_8s-2c7c9f12.pth',
     },
-    'timm-res2net101_26w_4s': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net101_26w_4s-02a759a1.pth',
     },
-    'timm-res2next50': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2next50_4s-6ef7e7bf.pth',
-    }
 }
 pretrained_settings = {}
@@ -73,91 +74,91 @@ for model_name, sources in res2net_weights.items():
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
         }
 timm_res2net_encoders = {
-    'timm-res2net50_26w_4s': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_26w_4s"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 6, 3],
-            'base_width': 26,
-            'block_args': {'scale': 4}
         },
     },
-    'timm-res2net101_26w_4s': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net101_26w_4s"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 23, 3],
-            'base_width': 26,
-            'block_args': {'scale': 4}
         },
     },
-    'timm-res2net50_26w_6s': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_26w_6s"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 6, 3],
-            'base_width': 26,
-            'block_args': {'scale': 6}
         },
     },
-    'timm-res2net50_26w_8s': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_26w_8s"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 6, 3],
-            'base_width': 26,
-            'block_args': {'scale': 8}
         },
     },
-    'timm-res2net50_48w_2s': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_48w_2s"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 6, 3],
-            'base_width': 48,
-            'block_args': {'scale': 2}
         },
     },
-    'timm-res2net50_14w_8s': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_14w_8s"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 6, 3],
-            'base_width': 14,
-            'block_args': {'scale': 8}
         },
     },
-    'timm-res2next50': {
-        'encoder': Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2next50"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': Bottle2neck,
-            'layers': [3, 4, 6, 3],
-            'base_width': 4,
-            'cardinality': 8,
-            'block_args': {'scale': 4}
         },
-    }
 }

 import torch.nn as nn
+from timm.models.res2net import Bottle2neck
+from timm.models.resnet import ResNet
+from ._base import EncoderMixin
 class Res2NetEncoder(ResNet, EncoderMixin):
 res2net_weights = {
+    "timm-res2net50_26w_4s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_4s-06e79181.pth"
     },
+    "timm-res2net50_48w_2s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_48w_2s-afed724a.pth"
     },
+    "timm-res2net50_14w_8s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_14w_8s-6527dddc.pth",
     },
+    "timm-res2net50_26w_6s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_6s-19041792.pth",
     },
+    "timm-res2net50_26w_8s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net50_26w_8s-2c7c9f12.pth",
     },
+    "timm-res2net101_26w_4s": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2net101_26w_4s-02a759a1.pth",
+    },
+    "timm-res2next50": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-res2net/res2next50_4s-6ef7e7bf.pth",
     },
 }
 pretrained_settings = {}
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
 timm_res2net_encoders = {
+    "timm-res2net50_26w_4s": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_26w_4s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 26,
+            "block_args": {"scale": 4},
         },
     },
+    "timm-res2net101_26w_4s": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net101_26w_4s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 23, 3],
+            "base_width": 26,
+            "block_args": {"scale": 4},
         },
     },
+    "timm-res2net50_26w_6s": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_26w_6s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 26,
+            "block_args": {"scale": 6},
         },
     },
+    "timm-res2net50_26w_8s": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_26w_8s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 26,
+            "block_args": {"scale": 8},
         },
     },
+    "timm-res2net50_48w_2s": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_48w_2s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 48,
+            "block_args": {"scale": 2},
         },
     },
+    "timm-res2net50_14w_8s": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2net50_14w_8s"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 14,
+            "block_args": {"scale": 8},
         },
     },
+    "timm-res2next50": {
+        "encoder": Res2NetEncoder,
         "pretrained_settings": pretrained_settings["timm-res2next50"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": Bottle2neck,
+            "layers": [3, 4, 6, 3],
+            "base_width": 4,
+            "cardinality": 8,
+            "block_args": {"scale": 4},
         },
+    },
 }

segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_resnest.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from ._base import EncoderMixin
-from timm.models.resnet import ResNet
-from timm.models.resnest import ResNestBottleneck
 import torch.nn as nn
 class ResNestEncoder(ResNet, EncoderMixin):
@@ -44,30 +45,30 @@ class ResNestEncoder(ResNet, EncoderMixin):
 resnest_weights = {
-    'timm-resnest14d': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest14-9c8fe254.pth'
     },
-    'timm-resnest26d': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest26-50eb607c.pth'
     },
-    'timm-resnest50d': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50-528c19ca.pth',
     },
-    'timm-resnest101e': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest101-22405ba7.pth',
     },
-    'timm-resnest200e': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest200-75117900.pth',
     },
-    'timm-resnest269e': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest269-0cc87c48.pth',
     },
-    'timm-resnest50d_4s2x40d': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_4s2x40d-41d14ed0.pth',
     },
-    'timm-resnest50d_1s4x24d': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_1s4x24d-d4a4f76f.pth',
-    }
 }
 pretrained_settings = {}
@@ -76,133 +77,133 @@ for model_name, sources in resnest_weights.items():
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
         }
 timm_resnest_encoders = {
-    'timm-resnest14d': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest14d"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [1, 1, 1, 1],
-            'stem_type': 'deep',
-            'stem_width': 32,
-            'avg_down': True,
-            'base_width': 64,
-            'cardinality': 1,
-            'block_args': {'radix': 2, 'avd': True, 'avd_first': False}
-        }
     },
-    'timm-resnest26d': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest26d"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [2, 2, 2, 2],
-            'stem_type': 'deep',
-            'stem_width': 32,
-            'avg_down': True,
-            'base_width': 64,
-            'cardinality': 1,
-            'block_args': {'radix': 2, 'avd': True, 'avd_first': False}
-        }
     },
-    'timm-resnest50d': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest50d"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [3, 4, 6, 3],
-            'stem_type': 'deep',
-            'stem_width': 32,
-            'avg_down': True,
-            'base_width': 64,
-            'cardinality': 1,
-            'block_args': {'radix': 2, 'avd': True, 'avd_first': False}
-        }
     },
-    'timm-resnest101e': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest101e"],
-        'params': {
-            'out_channels': (3, 128, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [3, 4, 23, 3],
-            'stem_type': 'deep',
-            'stem_width': 64,
-            'avg_down': True,
-            'base_width': 64,
-            'cardinality': 1,
-            'block_args': {'radix': 2, 'avd': True, 'avd_first': False}
-        }
     },
-    'timm-resnest200e': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest200e"],
-        'params': {
-            'out_channels': (3, 128, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [3, 24, 36, 3],
-            'stem_type': 'deep',
-            'stem_width': 64,
-            'avg_down': True,
-            'base_width': 64,
-            'cardinality': 1,
-            'block_args': {'radix': 2, 'avd': True, 'avd_first': False}
-        }
     },
-    'timm-resnest269e': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest269e"],
-        'params': {
-            'out_channels': (3, 128, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [3, 30, 48, 8],
-            'stem_type': 'deep',
-            'stem_width': 64,
-            'avg_down': True,
-            'base_width': 64,
-            'cardinality': 1,
-            'block_args': {'radix': 2, 'avd': True, 'avd_first': False}
         },
     },
-    'timm-resnest50d_4s2x40d': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest50d_4s2x40d"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [3, 4, 6, 3],
-            'stem_type': 'deep',
-            'stem_width': 32,
-            'avg_down': True,
-            'base_width': 40,
-            'cardinality': 2,
-            'block_args': {'radix': 4, 'avd': True, 'avd_first': True}
-        }
     },
-    'timm-resnest50d_1s4x24d': {
-        'encoder': ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest50d_1s4x24d"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': ResNestBottleneck,
-            'layers': [3, 4, 6, 3],
-            'stem_type': 'deep',
-            'stem_width': 32,
-            'avg_down': True,
-            'base_width': 24,
-            'cardinality': 4,
-            'block_args': {'radix': 1, 'avd': True, 'avd_first': True}
-        }
-    }
 }

 import torch.nn as nn
+from timm.models.resnest import ResNestBottleneck
+from timm.models.resnet import ResNet
+from ._base import EncoderMixin
 class ResNestEncoder(ResNet, EncoderMixin):
 resnest_weights = {
+    "timm-resnest14d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest14-9c8fe254.pth"
+    },
+    "timm-resnest26d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gluon_resnest26-50eb607c.pth"
     },
+    "timm-resnest50d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50-528c19ca.pth",
     },
+    "timm-resnest101e": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest101-22405ba7.pth",
     },
+    "timm-resnest200e": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest200-75117900.pth",
     },
+    "timm-resnest269e": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest269-0cc87c48.pth",
     },
+    "timm-resnest50d_4s2x40d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_4s2x40d-41d14ed0.pth",
     },
+    "timm-resnest50d_1s4x24d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-resnest/resnest50_fast_1s4x24d-d4a4f76f.pth",
     },
 }
 pretrained_settings = {}
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
 timm_resnest_encoders = {
+    "timm-resnest14d": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest14d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [1, 1, 1, 1],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
     },
+    "timm-resnest26d": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest26d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [2, 2, 2, 2],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
     },
+    "timm-resnest50d": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest50d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 6, 3],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
     },
+    "timm-resnest101e": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest101e"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 23, 3],
+            "stem_type": "deep",
+            "stem_width": 64,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
     },
+    "timm-resnest200e": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest200e"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 24, 36, 3],
+            "stem_type": "deep",
+            "stem_width": 64,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
+        },
     },
+    "timm-resnest269e": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest269e"],
+        "params": {
+            "out_channels": (3, 128, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 30, 48, 8],
+            "stem_type": "deep",
+            "stem_width": 64,
+            "avg_down": True,
+            "base_width": 64,
+            "cardinality": 1,
+            "block_args": {"radix": 2, "avd": True, "avd_first": False},
         },
     },
+    "timm-resnest50d_4s2x40d": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest50d_4s2x40d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 6, 3],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 40,
+            "cardinality": 2,
+            "block_args": {"radix": 4, "avd": True, "avd_first": True},
+        },
     },
+    "timm-resnest50d_1s4x24d": {
+        "encoder": ResNestEncoder,
         "pretrained_settings": pretrained_settings["timm-resnest50d_1s4x24d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": ResNestBottleneck,
+            "layers": [3, 4, 6, 3],
+            "stem_type": "deep",
+            "stem_width": 32,
+            "avg_down": True,
+            "base_width": 24,
+            "cardinality": 4,
+            "block_args": {"radix": 1, "avd": True, "avd_first": True},
+        },
+    },
 }

segmentation_models_pytorch/segmentation_models_pytorch/encoders/timm_sknet.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from ._base import EncoderMixin
-from timm.models.resnet import ResNet
-from timm.models.sknet import SelectiveKernelBottleneck, SelectiveKernelBasic
 import torch.nn as nn
 class SkNetEncoder(ResNet, EncoderMixin):
@@ -41,15 +42,15 @@ class SkNetEncoder(ResNet, EncoderMixin):
 sknet_weights = {
-    'timm-skresnet18': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet18_ra-4eec2804.pth'
     },
-    'timm-skresnet34': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet34_ra-bdc0ccde.pth'
     },
-    'timm-skresnext50_32x4d': {
-        'imagenet': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnext50_ra-f40e40bf.pth',
-    }
 }
 pretrained_settings = {}
@@ -58,46 +59,58 @@ for model_name, sources in sknet_weights.items():
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
-            'input_size': [3, 224, 224],
-            'input_range': [0, 1],
-            'mean': [0.485, 0.456, 0.406],
-            'std': [0.229, 0.224, 0.225],
-            'num_classes': 1000
         }
 timm_sknet_encoders = {
-    'timm-skresnet18': {
-        'encoder': SkNetEncoder,
         "pretrained_settings": pretrained_settings["timm-skresnet18"],
-        'params': {
-            'out_channels': (3, 64, 64, 128, 256, 512),
-            'block': SelectiveKernelBasic,
-            'layers': [2, 2, 2, 2],
-            'zero_init_last_bn': False,
-            'block_args': {'sk_kwargs': {'min_attn_channels': 16, 'attn_reduction': 8, 'split_input': True}}
-        }
     },
-    'timm-skresnet34': {
-        'encoder': SkNetEncoder,
         "pretrained_settings": pretrained_settings["timm-skresnet34"],
-        'params': {
-            'out_channels': (3, 64, 64, 128, 256, 512),
-            'block': SelectiveKernelBasic,
-            'layers': [3, 4, 6, 3],
-            'zero_init_last_bn': False,
-            'block_args': {'sk_kwargs': {'min_attn_channels': 16, 'attn_reduction': 8, 'split_input': True}}
-        }
     },
-    'timm-skresnext50_32x4d': {
-        'encoder': SkNetEncoder,
         "pretrained_settings": pretrained_settings["timm-skresnext50_32x4d"],
-        'params': {
-            'out_channels': (3, 64, 256, 512, 1024, 2048),
-            'block': SelectiveKernelBottleneck,
-            'layers': [3, 4, 6, 3],
-            'zero_init_last_bn': False,
-            'cardinality': 32,
-            'base_width': 4
-        }
-    }
 }

 import torch.nn as nn
+from timm.models.resnet import ResNet
+from timm.models.sknet import SelectiveKernelBasic, SelectiveKernelBottleneck
+from ._base import EncoderMixin
 class SkNetEncoder(ResNet, EncoderMixin):
 sknet_weights = {
+    "timm-skresnet18": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet18_ra-4eec2804.pth"
     },
+    "timm-skresnet34": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnet34_ra-bdc0ccde.pth"
+    },
+    "timm-skresnext50_32x4d": {
+        "imagenet": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/skresnext50_ra-f40e40bf.pth",
     },
 }
 pretrained_settings = {}
     for source_name, source_url in sources.items():
         pretrained_settings[model_name][source_name] = {
             "url": source_url,
+            "input_size": [3, 224, 224],
+            "input_range": [0, 1],
+            "mean": [0.485, 0.456, 0.406],
+            "std": [0.229, 0.224, 0.225],
+            "num_classes": 1000,
         }
 timm_sknet_encoders = {
+    "timm-skresnet18": {
+        "encoder": SkNetEncoder,
         "pretrained_settings": pretrained_settings["timm-skresnet18"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": SelectiveKernelBasic,
+            "layers": [2, 2, 2, 2],
+            "zero_init_last_bn": False,
+            "block_args": {
+                "sk_kwargs": {
+                    "min_attn_channels": 16,
+                    "attn_reduction": 8,
+                    "split_input": True,
+                }
+            },
+        },
     },
+    "timm-skresnet34": {
+        "encoder": SkNetEncoder,
         "pretrained_settings": pretrained_settings["timm-skresnet34"],
+        "params": {
+            "out_channels": (3, 64, 64, 128, 256, 512),
+            "block": SelectiveKernelBasic,
+            "layers": [3, 4, 6, 3],
+            "zero_init_last_bn": False,
+            "block_args": {
+                "sk_kwargs": {
+                    "min_attn_channels": 16,
+                    "attn_reduction": 8,
+                    "split_input": True,
+                }
+            },
+        },
     },
+    "timm-skresnext50_32x4d": {
+        "encoder": SkNetEncoder,
         "pretrained_settings": pretrained_settings["timm-skresnext50_32x4d"],
+        "params": {
+            "out_channels": (3, 64, 256, 512, 1024, 2048),
+            "block": SelectiveKernelBottleneck,
+            "layers": [3, 4, 6, 3],
+            "zero_init_last_bn": False,
+            "cardinality": 32,
+            "base_width": 4,
+        },
+    },
 }

segmentation_models_pytorch/segmentation_models_pytorch/encoders/vgg.py CHANGED Viewed

@@ -24,9 +24,8 @@ Methods:
 """
 import torch.nn as nn
-from torchvision.models.vgg import VGG
-from torchvision.models.vgg import make_layers
 from pretrainedmodels.models.torchvision_models import pretrained_settings
 from ._base import EncoderMixin
@@ -49,8 +48,10 @@ class VGGEncoder(VGG, EncoderMixin):
         del self.classifier
     def make_dilated(self, stage_list, dilation_list):
-        raise ValueError("'VGG' models do not support dilated mode due to Max Pooling"
-                         " operations for downsampling!")
     def get_stages(self):
         stages = []

 """
 import torch.nn as nn
 from pretrainedmodels.models.torchvision_models import pretrained_settings
+from torchvision.models.vgg import VGG, make_layers
 from ._base import EncoderMixin
         del self.classifier
     def make_dilated(self, stage_list, dilation_list):
+        raise ValueError(
+            "'VGG' models do not support dilated mode due to Max Pooling"
+            " operations for downsampling!"
+        )
     def get_stages(self):
         stages = []

segmentation_models_pytorch/segmentation_models_pytorch/encoders/xception.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import re
-import torch.nn as nn
-from pretrainedmodels.models.xception import pretrained_settings
-from pretrainedmodels.models.xception import Xception
 from ._base import EncoderMixin
 class XceptionEncoder(Xception, EncoderMixin):
     def __init__(self, out_channels, *args, depth=5, **kwargs):
         super().__init__(*args, **kwargs)
@@ -23,18 +21,33 @@ class XceptionEncoder(Xception, EncoderMixin):
         del self.fc
     def make_dilated(self, stage_list, dilation_list):
-        raise ValueError("Xception encoder does not support dilated mode "
-                         "due to pooling operation for downsampling!")
     def get_stages(self):
         return [
             nn.Identity(),
-            nn.Sequential(self.conv1, self.bn1, self.relu, self.conv2, self.bn2, self.relu),
             self.block1,
             self.block2,
-            nn.Sequential(self.block3, self.block4, self.block5, self.block6, self.block7,
-                          self.block8, self.block9, self.block10, self.block11),
-            nn.Sequential(self.block12, self.conv3, self.bn3, self.relu, self.conv4, self.bn4),
         ]
     def forward(self, x):
@@ -49,18 +62,18 @@ class XceptionEncoder(Xception, EncoderMixin):
     def load_state_dict(self, state_dict):
         # remove linear
-        state_dict.pop('fc.bias')
-        state_dict.pop('fc.weight')
         super().load_state_dict(state_dict)
 xception_encoders = {
-    'xception': {
-        'encoder': XceptionEncoder,
-        'pretrained_settings': pretrained_settings['xception'],
-        'params': {
-            'out_channels': (3, 64, 128, 256, 728, 2048),
-        }
     },
 }

 import re
+import torch.nn as nn
+from pretrainedmodels.models.xception import Xception, pretrained_settings
 from ._base import EncoderMixin
 class XceptionEncoder(Xception, EncoderMixin):
     def __init__(self, out_channels, *args, depth=5, **kwargs):
         super().__init__(*args, **kwargs)
         del self.fc
     def make_dilated(self, stage_list, dilation_list):
+        raise ValueError(
+            "Xception encoder does not support dilated mode "
+            "due to pooling operation for downsampling!"
+        )
     def get_stages(self):
         return [
             nn.Identity(),
+            nn.Sequential(
+                self.conv1, self.bn1, self.relu, self.conv2, self.bn2, self.relu
+            ),
             self.block1,
             self.block2,
+            nn.Sequential(
+                self.block3,
+                self.block4,
+                self.block5,
+                self.block6,
+                self.block7,
+                self.block8,
+                self.block9,
+                self.block10,
+                self.block11,
+            ),
+            nn.Sequential(
+                self.block12, self.conv3, self.bn3, self.relu, self.conv4, self.bn4
+            ),
         ]
     def forward(self, x):
     def load_state_dict(self, state_dict):
         # remove linear
+        state_dict.pop("fc.bias")
+        state_dict.pop("fc.weight")
         super().load_state_dict(state_dict)
 xception_encoders = {
+    "xception": {
+        "encoder": XceptionEncoder,
+        "pretrained_settings": pretrained_settings["xception"],
+        "params": {
+            "out_channels": (3, 64, 128, 256, 728, 2048),
+        },
     },
 }

segmentation_models_pytorch/segmentation_models_pytorch/fpn/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .model import FPN


1	+ from .model import FPN

segmentation_models_pytorch/segmentation_models_pytorch/fpn/decoder.py CHANGED Viewed

@@ -55,51 +55,63 @@ class MergeBlock(nn.Module):
         super().__init__()
         if policy not in ["add", "cat"]:
             raise ValueError(
-                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(
-                    policy
-                )
             )
         self.policy = policy
     def forward(self, x):
-        if self.policy == 'add':
             return sum(x)
-        elif self.policy == 'cat':
             return torch.cat(x, dim=1)
         else:
             raise ValueError(
-                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(self.policy)
             )
 class FPNDecoder(nn.Module):
     def __init__(
-            self,
-            encoder_channels,
-            encoder_depth=5,
-            pyramid_channels=256,
-            segmentation_channels=128,
-            dropout=0.2,
-            merge_policy="add",
     ):
         super().__init__()
-        self.out_channels = segmentation_channels if merge_policy == "add" else segmentation_channels * 4
         if encoder_depth < 3:
-            raise ValueError("Encoder depth for FPN decoder cannot be less than 3, got {}.".format(encoder_depth))
         encoder_channels = encoder_channels[::-1]
-        encoder_channels = encoder_channels[:encoder_depth + 1]
         self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1)
         self.p4 = FPNBlock(pyramid_channels, encoder_channels[1])
         self.p3 = FPNBlock(pyramid_channels, encoder_channels[2])
         self.p2 = FPNBlock(pyramid_channels, encoder_channels[3])
-        self.seg_blocks = nn.ModuleList([
-            SegmentationBlock(pyramid_channels, segmentation_channels, n_upsamples=n_upsamples)
-            for n_upsamples in [3, 2, 1, 0]
-        ])
         self.merge = MergeBlock(merge_policy)
         self.dropout = nn.Dropout2d(p=dropout, inplace=True)
@@ -112,7 +124,9 @@ class FPNDecoder(nn.Module):
         p3 = self.p3(p4, c3)
         p2 = self.p2(p3, c2)
-        feature_pyramid = [seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2])]
         x = self.merge(feature_pyramid)
         x = self.dropout(x)

         super().__init__()
         if policy not in ["add", "cat"]:
             raise ValueError(
+                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(policy)
             )
         self.policy = policy
     def forward(self, x):
+        if self.policy == "add":
             return sum(x)
+        elif self.policy == "cat":
             return torch.cat(x, dim=1)
         else:
             raise ValueError(
+                "`merge_policy` must be one of: ['add', 'cat'], got {}".format(
+                    self.policy
+                )
             )
 class FPNDecoder(nn.Module):
     def __init__(
+        self,
+        encoder_channels,
+        encoder_depth=5,
+        pyramid_channels=256,
+        segmentation_channels=128,
+        dropout=0.2,
+        merge_policy="add",
     ):
         super().__init__()
+        self.out_channels = (
+            segmentation_channels
+            if merge_policy == "add"
+            else segmentation_channels * 4
+        )
         if encoder_depth < 3:
+            raise ValueError(
+                "Encoder depth for FPN decoder cannot be less than 3, got {}.".format(
+                    encoder_depth
+                )
+            )
         encoder_channels = encoder_channels[::-1]
+        encoder_channels = encoder_channels[: encoder_depth + 1]
         self.p5 = nn.Conv2d(encoder_channels[0], pyramid_channels, kernel_size=1)
         self.p4 = FPNBlock(pyramid_channels, encoder_channels[1])
         self.p3 = FPNBlock(pyramid_channels, encoder_channels[2])
         self.p2 = FPNBlock(pyramid_channels, encoder_channels[3])
+        self.seg_blocks = nn.ModuleList(
+            [
+                SegmentationBlock(
+                    pyramid_channels, segmentation_channels, n_upsamples=n_upsamples
+                )
+                for n_upsamples in [3, 2, 1, 0]
+            ]
+        )
         self.merge = MergeBlock(merge_policy)
         self.dropout = nn.Dropout2d(p=dropout, inplace=True)
         p3 = self.p3(p4, c3)
         p2 = self.p2(p3, c2)
+        feature_pyramid = [
+            seg_block(p) for seg_block, p in zip(self.seg_blocks, [p5, p4, p3, p2])
+        ]
         x = self.merge(feature_pyramid)
         x = self.dropout(x)

segmentation_models_pytorch/segmentation_models_pytorch/fpn/model.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from typing import Optional, Union
-from .decoder import FPNDecoder
-from ..base import SegmentationModel, SegmentationHead, ClassificationHead
 from ..encoders import get_encoder
 class FPN(SegmentationModel):
@@ -10,11 +11,11 @@ class FPN(SegmentationModel):
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
             to extract features of different spatial resolution
-        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
             two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
             with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
-        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         decoder_pyramid_channels: A number of convolution filters in Feature Pyramid of FPN_
         decoder_segmentation_channels: A number of convolution filters in segmentation blocks of FPN_
@@ -26,7 +27,7 @@ class FPN(SegmentationModel):
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
         upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
-        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"

 from typing import Optional, Union
+from ..base import ClassificationHead, SegmentationHead, SegmentationModel
 from ..encoders import get_encoder
+from .decoder import FPNDecoder
 class FPN(SegmentationModel):
     Args:
         encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
             to extract features of different spatial resolution
+        encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
             two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
             with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
             Default is 5
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
             other pretrained weights (see table with available weights for each encoder_name)
         decoder_pyramid_channels: A number of convolution filters in Feature Pyramid of FPN_
         decoder_segmentation_channels: A number of convolution filters in segmentation blocks of FPN_
             Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
             Default is **None**
         upsampling: Final upsampling factor. Default is 4 to preserve input-output spatial shape identity
+        aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
             on top of encoder if **aux_params** is not **None** (default). Supported params:
                 - classes (int): A number of classes
                 - pooling (str): One of "max", "avg". Default is "avg"