题目
这个主要是做孟加拉语识别,孟加拉语是个非常其他的文字。每个字有三种属性相当于一个多标签分类问题。
标签间的耦合性相对较低,我使用三个不同的模型训练每个标签然后再组装,简单来说就是对三个标签分别
训练,用三个densenet模型分别是168分类,11分类和7分类,同一张图片分别输入到三个模型,然后有三个结果
结果又是白忙活了一场名次不高。10000美刀毛都没搞到又白忙活了一场
数据
读取parquet格式数据需要一个特殊的引擎
import torch
import torchvision
from PIL import Image
import os
import pandas as pd
import numpy as np
df = pd.read_parquet('./test_image_data_3.parquet',engine='pyarrow') #需要加入引擎才能读取
df_n=df.values
image_name=df_n[:,0]
image=df_n[:,1:]
for i in range(len(df_n)):
image_TR=Image.fromarray(np.array(image[i].reshape(137,236),np.uint8)) #将数据保存成图片形式
image_TR.save('./test_data/{}.jpg'.format(image_name[i]))
当数据量较大的时候我逐行读取
from tqdm import tqdm
import joblib
import glob
files = glob.glob("./train_image_data_2.parquet") #先读一个parquet文件
for f in files:
df = pd.read_parquet(f, engine='pyarrow') #按行读取
image_ids = df.image_id.values
df = df.drop("image_id", axis=1)
image_array = df.values
for j, image_id in tqdm(enumerate(image_ids), total=len(image_ids)):
Image.fromarray(np.array(image_array[j, :].reshape(137,236),np.uint8)).save(f"./train_data/{image_id}.jpg") #保存图片
#joblib.dump(image_array[j, :], f"./train_data/{image_id}.pkl")
训练
我使用了densenat121模型,使用torch创建好的构建模型
生成训练数据
import torch
import torchvision
from PIL import Image
import os
import pandas as pd
import numpy as np
root='./train_data/'
train_data=pd.read_csv('./train.csv')
class data_set(torch.utils.data.Dataset):
def __init__(self,train_data,x=3):
self.train_image_name=train_data.values[:,0]
self.train_image_target=train_data.values[:,x]
def __getitem__(self,ind): #返回一张图片和一个标签因为是交叉熵损失所以范围的目标值是一维的
image_path=root+self.train_image_name[ind]+'.jpg'
image=torch.from_numpy(np.array(Image.open(image_path),np.float32)).unsqueeze(0) #为图像增加一个通道
target=self.train_image_target[ind]
return image,target
def __len__(self):
return len(self.train_image_target)
train_data_set=data_set(train_data)
train_data_set_loader=torch.utils.data.DataLoader(train_data_set,32,shuffle=True)
创建模型
model = torchvision.models.densenet121(pretrained=True)
model.features.conv0=torch.nn.Conv2d(1, 64, kernel_size=7, stride=2)
model.classifier = torch.nn.Linear(1024, 7)
创建损失和优化器
oss_f=torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
训练
使用我电脑的1060显卡,需要12小时左右
model.cuda()
for i in range(10):
k=0
for img,tar in train_data_set_loader:
optimizer.zero_grad()
out=model(img.cuda())
out=out.to('cpu')
Loss=Loss_f(out,tar)
Loss.backward()
optimizer.step()
k=k+1
if k%100==0:
print ("第 {} epoch第 {} batch的loss={}".format(i,k,Loss))
torch.save(model,'./mode_zoo/mode_7分类{}loass-{}.pth'.format(i,Loss))
预测
导入模型
model = torchvision.models.densenet121()
model.features.conv0=torch.nn.Conv2d(1, 64, kernel_size=7, stride=2)
model.classifier = torch.nn.Linear(1024, 168)
mode_168=torch.load('./mode_zoo/mode1loass-0.8475236892700195.pth')
预测结果
for i in range(len(test_csv_np)):
name_imge=test_csv_np[i][0].split('_')
if name_imge[2]=='consonant':
image_path='./test_data/'+'Test_'+name_imge[1]+'.jpg'
image=torch.from_numpy(np.array(Image.open(image_path),np.float32)).unsqueeze(0).unsqueeze(0)
mode_7.eval()
P=mode_7(image.cuda())
out=P.argmax()
test_csv_np[i][1]=int(out.to('cpu').numpy())
if name_imge[2]=='grapheme':
image_path='./test_data/'+'Test_'+name_imge[1]+'.jpg'
image=torch.from_numpy(np.array(Image.open(image_path),np.float32)).unsqueeze(0).unsqueeze(0)
mode_168.eval()
P=mode_168(image.cuda())
out=P.argmax()
test_csv_np[i][1]=int(out.to('cpu').numpy())
if name_imge[2]=='vowel':
image_path='./test_data/'+'Test_'+name_imge[1]+'.jpg'
image=torch.from_numpy(np.array(Image.open(image_path),np.float32)).unsqueeze(0).unsqueeze(0)
mode_11.eval()
P=mode_11(image.cuda())
out=P.argmax()
test_csv_np[i][1]=int(out.to('cpu').numpy())
生成预测文件
ss_data=pd.DataFrame(test_csv_np,columns=('row_id','target'))
ss_data.to_csv('submission.csv')