G2Net Basic audio data augmentation inference

01bbf1f2e5254c9998dc3e291afc2043
 COLAB = False

if COLAB == True:
    from google.colab import drive
    drive.mount('/content/drive')
    %cd '/content/drive/MyDrive/Colab Notebooks/kaggle/G2Net2022/code'
! pip3 install timm -q
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import h5py
import timm
import torch
import torch.nn as nn
import torchaudio
import torchvision.transforms as TF


from tqdm.auto import tqdm
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from timm.scheduler import CosineLRScheduler

device = torch.device('cuda')
criterion = nn.BCEWithLogitsLoss()

# Train metadata
di = '../input/g2net-detecting-continuous-gravitational-waves'
df = pd.read_csv(di + '/train_labels.csv')
df = df[df.target >= 0]  # Remove 3 unknowns (target = -1)

Dataset

transforms_time_mask = nn.Sequential(
                torchaudio.transforms.TimeMasking(time_mask_param=10),
            )

transforms_freq_mask = nn.Sequential(
                torchaudio.transforms.FrequencyMasking(freq_mask_param=10),
            )

flip_rate = 0.0 # probability of applying the horizontal flip and vertical flip 
fre_shift_rate = 0.0 # probability of applying the vertical shift

time_mask_num = 0 # number of time masking
freq_mask_num = 0 # number of frequency masking
class Dataset(torch.utils.data.Dataset):
    """
    dataset = Dataset(data_type, df)

    img, y = dataset[i]
      img (np.float32): 2 x 360 x 128
      y (np.float32): label 0 or 1
    """
    def __init__(self, data_type, df, tfms=False):
        self.data_type = data_type
        self.df = df
        self.tfms = tfms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, i):
        """
        i (int): get ith data
        """
        r = self.df.iloc[i]
        y = np.float32(r.target)
        file_id = r.id

        img = np.empty((2, 360, 128), dtype=np.float32)

        filename = '%s/%s/%s.hdf5' % (di, self.data_type, file_id)
        with h5py.File(filename, 'r') as f:
            g = f[file_id]

            for ch, s in enumerate(['H1', 'L1']):
                a = g[s]['SFTs'][:, :4096] * 1e22  # Fourier coefficient complex64

                p = a.real**2 + a.imag**2  # power
                p /= np.mean(p)  # normalize
                p = np.mean(p.reshape(360, 128, 32), axis=2)  # compress 4096 -> 128
                img[ch] = p

        if self.tfms:
            if np.random.rand() <= flip_rate: # horizontal flip
                img = np.flip(img, axis=1).copy()
            if np.random.rand() <= flip_rate: # vertical flip
                img = np.flip(img, axis=2).copy()
            if np.random.rand() <= fre_shift_rate: # vertical shift
                img = np.roll(img, np.random.randint(low=0, high=img.shape[1]), axis=1)
            
            img = torch.from_numpy(img)

            for _ in range(time_mask_num): # tima masking
                img = transforms_time_mask(img)
            for _ in range(freq_mask_num): # frequency masking
                img = transforms_freq_mask(img)
        
        else:
            img = torch.from_numpy(img)
                
        return img, y

Audio Data Augmentation

  • horizontal flip
  • vertical flip
  • vertical shift
  • time masking*
  • frequency masking*

*Reference
SpecAugment
https://arxiv.org/abs/1904.08779

Horizontal flip and Vertical flip

dataset = Dataset('train', df, tfms=False)
img, y = dataset[10]


plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()


flip_rate = 1.0 # probability of applying the horizontal flip and vertical flip 

dataset = Dataset('train', df, tfms=True)
img, y = dataset[10]

plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()

Vertical shift

dataset = Dataset('train', df, tfms=False)
img, y = dataset[10]


plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()


flip_rate = 0.0 # probability of applying the horizontal flip and vertical flip 
fre_shift_rate = 1.0 # probability of applying the vertical shift

dataset = Dataset('train', df, tfms=True)
img, y = dataset[10]

plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()

Time masking

dataset = Dataset('train', df, tfms=False)
img, y = dataset[10]


plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()


flip_rate = 0.0 # probability of applying the horizontal flip and vertical flip 
fre_shift_rate = 0.0 # probability of applying the vertical shift
time_mask_num = 3 # number of time masking

dataset = Dataset('train', df, tfms=True)
img, y = dataset[10]

plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()

Frequency masking

dataset = Dataset('train', df, tfms=False)
img, y = dataset[10]


plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()


flip_rate = 0.0 # probability of applying the horizontal flip and vertical flip 
fre_shift_rate = 0.0 # probability of applying the vertical shift
time_mask_num = 0 # number of time masking
freq_mask_num = 3 # number of frequency masking

dataset = Dataset('train', df, tfms=True)
img, y = dataset[10]

plt.figure(figsize=(8, 3))
plt.title('Spectrogram')
plt.xlabel('time')
plt.ylabel('frequency')
plt.imshow(img[0, 0:360])
plt.colorbar()
plt.show()

Model

class Model(nn.Module):
    def __init__(self, name, *, pretrained=False):
        """
        name (str): timm model name, e.g. tf_efficientnet_b2_ns
        """
        super().__init__()

        # Use timm
        model = timm.create_model(name, pretrained=pretrained, in_chans=2)

        clsf = model.default_cfg['classifier']
        n_features = model._modules[clsf].in_features
        model._modules[clsf] = nn.Identity()

        self.fc = nn.Linear(n_features, 1)
        self.model = model

    def forward(self, x):
        x = self.model(x)
        x = self.fc(x)
        return x

Predict and evaluate

def evaluate(model, loader_val, *, compute_score=True, pbar=None):
    """
    Predict and compute loss and score
    """
    tb = time.time()
    was_training = model.training
    model.eval()

    loss_sum = 0.0
    n_sum = 0
    y_all = []
    y_pred_all = []

    if pbar is not None:
        pbar = tqdm(desc='Predict', nrows=78, total=pbar)

    for img, y in loader_val:
        n = y.size(0)
        img = img.to(device)
        y = y.to(device)

        with torch.no_grad():
                y_pred = model(img.to(device))

        loss = criterion(y_pred.view(-1), y)

        n_sum += n
        loss_sum += n * loss.item()

        y_all.append(y.cpu().detach().numpy())
        y_pred_all.append(y_pred.sigmoid().squeeze().cpu().detach().numpy())

        if pbar is not None:
            pbar.update(len(img))
        
        del loss, y_pred, img, y

    loss_val = loss_sum / n_sum

    y = np.concatenate(y_all)
    y_pred = np.concatenate(y_pred_all)

    score = roc_auc_score(y, y_pred) if compute_score else None

    ret = {'loss': loss_val,
           'score': score,
           'y': y,
           'y_pred': y_pred,
           'time': time.time() - tb}
    
    model.train(was_training)  # back to train from eval if necessary

    return ret

Train

model_name = 'tf_efficientnet_b7_ns'
nfold = 5
kfold = KFold(n_splits=nfold, random_state=42, shuffle=True)

epochs = 25
batch_size = 16
num_workers = 2
weight_decay = 1e-6
max_grad_norm = 1000

lr_max = 4e-4
epochs_warmup = 1.0


## setting of audio data augmentation 
flip_rate = 0.5 # probability of applying the horizontal flip and vertical flip 
fre_shift_rate = 1.0 # probability of applying the vertical shift
time_mask_num = 1 # number of time masking
freq_mask_num = 2 # number of frequency masking

Predict and submit

# Load model (if necessary)
submit = pd.read_csv(di + '/sample_submission.csv')
model = Model(model_name, pretrained=False)
model.to(device)


if COLAB == False:
    # Load model (if necessary)
    for i in range(5):
        model = Model(model_name, pretrained=False)
        filename = f'../input/g2net-b7-aug/model{i}.pytorch'
        model.to(device)
        model.load_state_dict(torch.load(filename, map_location=device))
        model.eval()

        # Predict
        submit = pd.read_csv(di + '/sample_submission.csv')
        dataset_test = Dataset('test', submit)
        loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=64,
                                                num_workers=num_workers, pin_memory=True)

        test = evaluate(model, loader_test, compute_score=False, pbar=len(submit))

        # Write prediction
        submit['target'] += test['y_pred'] /5
submit.to_csv('submission.csv', index=False)
print('target range [%.2f, %.2f]' % (submit['target'].min(), submit['target'].max()))
{"version_major":2,"version_minor":0,"model_id":"6a88bc6d8df7432b89c77446ff32c9d9"}

Comments

Popular posts from this blog

Does Autocorrect Make Life Better?

Neural Networks from Scratch For Beginner and Also For Experts