JKになりたい

何か書きたいことを書きます。主にWeb方面の技術系記事が多いかも。

ハイブリッドOCR

これまでの流れを汲んでOCRかけるクラスを完成させるで。
まだまだ改良の余地はあるけど、いつまでもこんなことばっかりやってられへんから一旦これでOCRは完結や。

流れは

1)白紙のセル判定 ->白紙なら終了 2)文字の部分だけ切り取る 3)tesseractにかける 4)数字ならそれで終了,数字以外ならvisionAPIにかける

簡単に実装するとこんな感じやで。

import requests
import base64
import json
import cv2
import re
import numpy as np
from PIL import Image
import sys
import pyocr
import pyocr.builders
from enum import Enum
from collections import namedtuple

CellKind = Enum('CellKind','empty string number')
DetectText = namedtuple('DetectText', ['kind', 'text'])

class HybridOCR:
    def __init__(self):
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]

    def _isBlankCell(self,img_gray):
        points = [value for row in img_gray for value in row]
        for p in points:
            if p != 255:
                return False
        return True

    def _getEndPoint(self,img_gray):
        left_pt = sys.maxsize
        right_pt = 0
        for row in img_gray:
            for index,val in enumerate(row):
                    if val == 255: continue
                    if left_pt > index: left_pt = index
                    if right_pt < index: right_pt = index
        return (left_pt,right_pt)

    def _cutOut(self,points,img,margin=20):
        return img[:,points[0]-margin:points[1]+margin]

    def _tesseract_ocr(self,img):
        txt = self.tool.image_to_string(
            img,
            lang="jpn+eng",
            builder=pyocr.builders.TextBuilder(tesseract_layout=6)
        )
        txt = re.sub(r',|\s', "", txt)
        try:
            res = int(txt)
        except:
            pass
        try:
            res = float(txt)
            return DetectText(kind=CellKind.number,text=res)
        except:
            return None

    def _visionapi_ocr(self,img_path):
        img = open(img_path, 'rb').read()
        req_body = json.dumps({
            'requests': [{
                'image': {
                    'content': base64.b64encode(img).decode('UTF-8')
                },
                'features': [{
                    'type': 'TEXT_DETECTION',
                    'maxResults': 10,
                }]
            }]
        })
        response = requests.post(
            url='https://vision.googleapis.com/v1/images:annotate?key=<API_KEY>',
            data=req_body,
            headers={'Content-Type': 'application/json'}
        )

        res = json.loads(response.text)
        try:
            txt = res['responses'][0]['textAnnotations'][0]['description'].replace('\n','')
            return DetectText(kind=CellKind.string,text=txt)
        except:
            return DetectText(kind=CellKind.empty,text="")

    def recognize(self,file_path):
        image = cv2.imread(file_path,1)
        img_gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

        isBlank = self._isBlankCell(img_gray)
        if(isBlank): return DetectText(kind=CellKind.empty,text="")

        cut_image = self._cutOut(self._getEndPoint(img_gray),image)
        try:
            pil_image = Image.fromarray(cut_image)
        except Exception as e:
            pil_image = Image.fromarray(image)

        detect = self._tesseract_ocr(pil_image)
        if(detect is not None): return detect
        return self._visionapi_ocr(file_path)

(input)
f:id:deeptoneworks:20160930050552p:plain
(output)
DetectText(kind=<CellKind.number: 3>, text=42.1)

(input)
f:id:deeptoneworks:20160930050600p:plain
(output)
DetectText(kind=<CellKind.string: 2>, text=‘住宅の所有の関係(6区分)’)

(input)
f:id:deeptoneworks:20160930050606p:plain

(output)
DetectText(kind=<CellKind.empty: 1>, text=‘’)

ええ感じやな!次は素性作成器の実装や!