G2Net Basic audio data augmentation inference
= False
COLAB
if COLAB == True:
from google.colab import drive
'/content/drive')
drive.mount(%cd '/content/drive/MyDrive/Colab Notebooks/kaggle/G2Net2022/code'
! pip3 install timm -q
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import h5py
import timm
import torch
import torch.nn as nn
import torchaudio
import torchvision.transforms as TF
from tqdm.auto import tqdm
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from timm.scheduler import CosineLRScheduler
= torch.device('cuda')
device = nn.BCEWithLogitsLoss()
criterion
# Train metadata
= '../input/g2net-detecting-continuous-gravitational-waves'
di = pd.read_csv(di + '/train_labels.csv')
df = df[df.target >= 0] # Remove 3 unknowns (target = -1) df
Dataset
= nn.Sequential(
transforms_time_mask =10),
torchaudio.transforms.TimeMasking(time_mask_param
)
= nn.Sequential(
transforms_freq_mask =10),
torchaudio.transforms.FrequencyMasking(freq_mask_param
)
= 0.0 # probability of applying the horizontal flip and vertical flip
flip_rate = 0.0 # probability of applying the vertical shift
fre_shift_rate
= 0 # number of time masking
time_mask_num = 0 # number of frequency masking freq_mask_num
class Dataset(torch.utils.data.Dataset):
"""
dataset = Dataset(data_type, df)
img, y = dataset[i]
img (np.float32): 2 x 360 x 128
y (np.float32): label 0 or 1
"""
def __init__(self, data_type, df, tfms=False):
self.data_type = data_type
self.df = df
self.tfms = tfms
def __len__(self):
return len(self.df)
def __getitem__(self, i):
"""
i (int): get ith data
"""
= self.df.iloc[i]
r = np.float32(r.target)
y = r.id
file_id
= np.empty((2, 360, 128), dtype=np.float32)
img
= '%s/%s/%s.hdf5' % (di, self.data_type, file_id)
filename with h5py.File(filename, 'r') as f:
= f[file_id]
g
for ch, s in enumerate(['H1', 'L1']):
= g[s]['SFTs'][:, :4096] * 1e22 # Fourier coefficient complex64
a
= a.real**2 + a.imag**2 # power
p /= np.mean(p) # normalize
p = np.mean(p.reshape(360, 128, 32), axis=2) # compress 4096 -> 128
p = p
img[ch]
if self.tfms:
if np.random.rand() <= flip_rate: # horizontal flip
= np.flip(img, axis=1).copy()
img if np.random.rand() <= flip_rate: # vertical flip
= np.flip(img, axis=2).copy()
img if np.random.rand() <= fre_shift_rate: # vertical shift
= np.roll(img, np.random.randint(low=0, high=img.shape[1]), axis=1)
img
= torch.from_numpy(img)
img
for _ in range(time_mask_num): # tima masking
= transforms_time_mask(img)
img for _ in range(freq_mask_num): # frequency masking
= transforms_freq_mask(img)
img
else:
= torch.from_numpy(img)
img
return img, y
Audio Data Augmentation
- horizontal flip
- vertical flip
- vertical shift
- time masking*
- frequency masking*
*Reference
SpecAugment
https://arxiv.org/abs/1904.08779
Horizontal flip and Vertical flip
= Dataset('train', df, tfms=False)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar()
plt.show()
= 1.0 # probability of applying the horizontal flip and vertical flip
flip_rate
= Dataset('train', df, tfms=True)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar() plt.show()
Vertical shift
= Dataset('train', df, tfms=False)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar()
plt.show()
= 0.0 # probability of applying the horizontal flip and vertical flip
flip_rate = 1.0 # probability of applying the vertical shift
fre_shift_rate
= Dataset('train', df, tfms=True)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar() plt.show()
Time masking
= Dataset('train', df, tfms=False)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar()
plt.show()
= 0.0 # probability of applying the horizontal flip and vertical flip
flip_rate = 0.0 # probability of applying the vertical shift
fre_shift_rate = 3 # number of time masking
time_mask_num
= Dataset('train', df, tfms=True)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar() plt.show()
Frequency masking
= Dataset('train', df, tfms=False)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar()
plt.show()
= 0.0 # probability of applying the horizontal flip and vertical flip
flip_rate = 0.0 # probability of applying the vertical shift
fre_shift_rate = 0 # number of time masking
time_mask_num = 3 # number of frequency masking
freq_mask_num
= Dataset('train', df, tfms=True)
dataset = dataset[10]
img, y
=(8, 3))
plt.figure(figsize'Spectrogram')
plt.title('time')
plt.xlabel('frequency')
plt.ylabel(0, 0:360])
plt.imshow(img[
plt.colorbar() plt.show()
Model
class Model(nn.Module):
def __init__(self, name, *, pretrained=False):
"""
name (str): timm model name, e.g. tf_efficientnet_b2_ns
"""
super().__init__()
# Use timm
= timm.create_model(name, pretrained=pretrained, in_chans=2)
model
= model.default_cfg['classifier']
clsf = model._modules[clsf].in_features
n_features = nn.Identity()
model._modules[clsf]
self.fc = nn.Linear(n_features, 1)
self.model = model
def forward(self, x):
= self.model(x)
x = self.fc(x)
x return x
Predict and evaluate
def evaluate(model, loader_val, *, compute_score=True, pbar=None):
"""
Predict and compute loss and score
"""
= time.time()
tb = model.training
was_training eval()
model.
= 0.0
loss_sum = 0
n_sum = []
y_all = []
y_pred_all
if pbar is not None:
= tqdm(desc='Predict', nrows=78, total=pbar)
pbar
for img, y in loader_val:
= y.size(0)
n = img.to(device)
img = y.to(device)
y
with torch.no_grad():
= model(img.to(device))
y_pred
= criterion(y_pred.view(-1), y)
loss
+= n
n_sum += n * loss.item()
loss_sum
y_all.append(y.cpu().detach().numpy())
y_pred_all.append(y_pred.sigmoid().squeeze().cpu().detach().numpy())
if pbar is not None:
len(img))
pbar.update(
del loss, y_pred, img, y
= loss_sum / n_sum
loss_val
= np.concatenate(y_all)
y = np.concatenate(y_pred_all)
y_pred
= roc_auc_score(y, y_pred) if compute_score else None
score
= {'loss': loss_val,
ret 'score': score,
'y': y,
'y_pred': y_pred,
'time': time.time() - tb}
# back to train from eval if necessary
model.train(was_training)
return ret
Train
= 'tf_efficientnet_b7_ns'
model_name = 5
nfold = KFold(n_splits=nfold, random_state=42, shuffle=True)
kfold
= 25
epochs = 16
batch_size = 2
num_workers = 1e-6
weight_decay = 1000
max_grad_norm
= 4e-4
lr_max = 1.0
epochs_warmup
## setting of audio data augmentation
= 0.5 # probability of applying the horizontal flip and vertical flip
flip_rate = 1.0 # probability of applying the vertical shift
fre_shift_rate = 1 # number of time masking
time_mask_num = 2 # number of frequency masking freq_mask_num
Predict and submit
# Load model (if necessary)
= pd.read_csv(di + '/sample_submission.csv')
submit = Model(model_name, pretrained=False)
model
model.to(device)
if COLAB == False:
# Load model (if necessary)
for i in range(5):
= Model(model_name, pretrained=False)
model = f'../input/g2net-b7-aug/model{i}.pytorch'
filename
model.to(device)=device))
model.load_state_dict(torch.load(filename, map_locationeval()
model.
# Predict
= pd.read_csv(di + '/sample_submission.csv')
submit = Dataset('test', submit)
dataset_test = torch.utils.data.DataLoader(dataset_test, batch_size=64,
loader_test =num_workers, pin_memory=True)
num_workers
= evaluate(model, loader_test, compute_score=False, pbar=len(submit))
test
# Write prediction
'target'] += test['y_pred'] /5
submit['submission.csv', index=False)
submit.to_csv(print('target range [%.2f, %.2f]' % (submit['target'].min(), submit['target'].max()))
{"version_major":2,"version_minor":0,"model_id":"6a88bc6d8df7432b89c77446ff32c9d9"}
Comments
Post a Comment