项目github : https://github.com/taisuii/ClassificationCaptchaOcr
某验验证码识别,算法部分不放了,识别的教程和代码如下
import
requests
from
PIL
import
Image, ImageFont, ImageDraw, ImageOps
from
io
import
BytesIO
def
crop_image(image_bytes, coordinates):
img
=
Image.
open
(BytesIO(image_bytes))
width, height
=
img.size
grid_width
=
width
/
/
3
grid_height
=
height
/
/
3
cropped_images
=
[]
for
coord
in
coordinates:
y, x
=
coord
left
=
(x
-
1
)
*
grid_width
upper
=
(y
-
1
)
*
grid_height
right
=
left
+
grid_width
lower
=
upper
+
grid_height
box
=
(left, upper, right, lower)
cropped_img
=
img.crop(box)
cropped_images.append(cropped_img)
return
cropped_images
coordinates
=
[[
1
,
1
], [
1
,
2
], [
1
,
3
], [
2
,
1
], [
2
,
2
], [
2
,
3
], [
3
,
1
], [
3
,
2
], [
3
,
3
]]
bg_img
=
requests.get(
"https://static.geetest.com/captcha_v4/policy/3d0936b11a2c4a65bbb53635e656c780/nine/110394/2024-09-06T00/ed02acd0ac294a41b880d9106240f12a.jpg"
).content
cropped_images
=
crop_image(bg_img, coordinates)
for
j, img_crop
in
enumerate
(cropped_images):
img_crop.save(f
"./test_crop/bg{j}.jpg"
)
import
requests
from
PIL
import
Image, ImageFont, ImageDraw, ImageOps
from
io
import
BytesIO
def
crop_image(image_bytes, coordinates):
img
=
Image.
open
(BytesIO(image_bytes))
width, height
=
img.size
grid_width
=
width
/
/
3
grid_height
=
height
/
/
3
cropped_images
=
[]
for
coord
in
coordinates:
y, x
=
coord
left
=
(x
-
1
)
*
grid_width
upper
=
(y
-
1
)
*
grid_height
right
=
left
+
grid_width
lower
=
upper
+
grid_height
box
=
(left, upper, right, lower)
cropped_img
=
img.crop(box)
cropped_images.append(cropped_img)
return
cropped_images
coordinates
=
[[
1
,
1
], [
1
,
2
], [
1
,
3
], [
2
,
1
], [
2
,
2
], [
2
,
3
], [
3
,
1
], [
3
,
2
], [
3
,
3
]]
bg_img
=
requests.get(
"https://static.geetest.com/captcha_v4/policy/3d0936b11a2c4a65bbb53635e656c780/nine/110394/2024-09-06T00/ed02acd0ac294a41b880d9106240f12a.jpg"
).content
cropped_images
=
crop_image(bg_img, coordinates)
for
j, img_crop
in
enumerate
(cropped_images):
img_crop.save(f
"./test_crop/bg{j}.jpg"
)
import
torchvision.transforms as transforms
from
torchvision.datasets
import
ImageFolder
from
tqdm
import
tqdm
import
torch
import
torchvision
import
torch.nn as nn
from
torch.utils.data
import
DataLoader
import
numpy as np
data_transform
=
transforms.Compose(
[
transforms.Resize((
224
,
224
)),
transforms.ToTensor(),
transforms.Normalize(
(
0.485
,
0.456
,
0.406
), (
0.229
,
0.224
,
0.225
)
),
]
)
class
CustomDataset:
def
__init__(
self
, data_dir):
self
.dataset
=
ImageFolder(root
=
data_dir, transform
=
data_transform)
def
__len__(
self
):
return
len
(
self
.dataset)
def
__getitem__(
self
, idx):
image, label
=
self
.dataset[idx]
return
image, label
class
MyResNet18(torch.nn.Module):
def
__init__(
self
, num_classes):
super
(MyResNet18,
self
).__init__()
self
.resnet
=
torchvision.models.resnet18(pretrained
=
True
)
self
.resnet.fc
=
nn.Linear(
512
, num_classes)
def
forward(
self
, x):
return
self
.resnet(x)
def
train(epoch):
device
=
torch.device(
"cuda"
if
torch.cuda.is_available()
else
"cpu"
)
data_dir
=
"dataset"
custom_dataset
=
CustomDataset(data_dir)
batch_size
=
64
data_loader
=
DataLoader(custom_dataset, batch_size
=
batch_size, shuffle
=
True
)
model
=
MyResNet18(num_classes
=
91
)
model.to(device)
criterion
=
torch.nn.CrossEntropyLoss()
optimizer
=
torch.optim.SGD(model.parameters(), lr
=
0.001
, momentum
=
0.9
)
for
i
in
range
(epoch):
losses
=
[]
data_loader_tqdm
=
tqdm(data_loader)
for
inputs, labels
in
data_loader_tqdm:
inputs, labels
=
inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs
=
model(inputs)
loss
=
criterion(outputs, labels)
losses.append(loss.item())
epoch_loss
=
np.mean(losses)
data_loader_tqdm.set_description(
f
"This epoch is {i} and it's loss is {loss.item()}, average loss {epoch_loss}"
)
loss.backward()
optimizer.step()
torch.save(model.state_dict(), f
'model/my_resnet18_{epoch_loss}.pth'
)
print
(f
"completed. Model saved."
)
if
__name__
=
=
'__main__'
:
train(
50
)
import
torchvision.transforms as transforms
from
torchvision.datasets
import
ImageFolder
from
tqdm
import
tqdm
import
torch
import
torchvision
import
torch.nn as nn
from
torch.utils.data
import
DataLoader
import
numpy as np
data_transform
=
transforms.Compose(
[
transforms.Resize((
224
,
224
)),
transforms.ToTensor(),
transforms.Normalize(
(
0.485
,
0.456
,
0.406
), (
0.229
,
0.224
,
0.225
)
),
]
)
class
CustomDataset:
def
__init__(
self
, data_dir):
self
.dataset
=
ImageFolder(root
=
data_dir, transform
=
data_transform)
def
__len__(
self
):
return
len
(
self
.dataset)
def
__getitem__(
self
, idx):
image, label
=
self
.dataset[idx]
return
image, label
class
MyResNet18(torch.nn.Module):
def
__init__(
self
, num_classes):
super
(MyResNet18,
self
).__init__()
self
.resnet
=
torchvision.models.resnet18(pretrained
=
True
)
self
.resnet.fc
=
nn.Linear(
512
, num_classes)
def
forward(
self
, x):
return
self
.resnet(x)
def
train(epoch):
device
=
torch.device(
"cuda"
if
torch.cuda.is_available()
else
"cpu"
)
data_dir
=
"dataset"
custom_dataset
=
CustomDataset(data_dir)
batch_size
=
64
data_loader
=
DataLoader(custom_dataset, batch_size
=
batch_size, shuffle
=
True
)
model
=
MyResNet18(num_classes
=
91
)
model.to(device)
[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课