Merge pull request #120349 from lukegb/debug-release-2009
nixos/test-driver: use a variety of different Tesseract settings for OCR
This commit is contained in:
commit
6e4f8b06f5
@ -186,6 +186,25 @@ start_all()
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
<varlistentry>
|
||||||
|
<term>
|
||||||
|
<methodname>get_screen_text_variants</methodname>
|
||||||
|
</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Return a list of different interpretations of what is currently visible
|
||||||
|
on the machine's screen using optical character recognition. The number
|
||||||
|
and order of the interpretations is not specified and is subject to
|
||||||
|
change, but if no exception is raised at least one will be returned.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
This requires passing <option>enableOCR</option> to the test attribute
|
||||||
|
set.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term>
|
<term>
|
||||||
<methodname>get_screen_text</methodname>
|
<methodname>get_screen_text</methodname>
|
||||||
@ -350,7 +369,8 @@ start_all()
|
|||||||
<para>
|
<para>
|
||||||
Wait until the supplied regular expressions matches the textual contents
|
Wait until the supplied regular expressions matches the textual contents
|
||||||
of the screen by using optical character recognition (see
|
of the screen by using optical character recognition (see
|
||||||
<methodname>get_screen_text</methodname>).
|
<methodname>get_screen_text</methodname> and
|
||||||
|
<methodname>get_screen_text_variants</methodname>).
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#! /somewhere/python3
|
#! /somewhere/python3
|
||||||
from contextlib import contextmanager, _GeneratorContextManager
|
from contextlib import contextmanager, _GeneratorContextManager
|
||||||
from queue import Queue, Empty
|
from queue import Queue, Empty
|
||||||
from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List
|
from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List, Iterable
|
||||||
from xml.sax.saxutils import XMLGenerator
|
from xml.sax.saxutils import XMLGenerator
|
||||||
import queue
|
import queue
|
||||||
import io
|
import io
|
||||||
@ -205,6 +205,37 @@ class Logger:
|
|||||||
self.xml.endElement("nest")
|
self.xml.endElement("nest")
|
||||||
|
|
||||||
|
|
||||||
|
def _perform_ocr_on_screenshot(
|
||||||
|
screenshot_path: str, model_ids: Iterable[int]
|
||||||
|
) -> List[str]:
|
||||||
|
if shutil.which("tesseract") is None:
|
||||||
|
raise Exception("OCR requested but enableOCR is false")
|
||||||
|
|
||||||
|
magick_args = (
|
||||||
|
"-filter Catrom -density 72 -resample 300 "
|
||||||
|
+ "-contrast -normalize -despeckle -type grayscale "
|
||||||
|
+ "-sharpen 1 -posterize 3 -negate -gamma 100 "
|
||||||
|
+ "-blur 1x65535"
|
||||||
|
)
|
||||||
|
|
||||||
|
tess_args = f"-c debug_file=/dev/null --psm 11"
|
||||||
|
|
||||||
|
cmd = f"convert {magick_args} {screenshot_path} tiff:{screenshot_path}.tiff"
|
||||||
|
ret = subprocess.run(cmd, shell=True, capture_output=True)
|
||||||
|
if ret.returncode != 0:
|
||||||
|
raise Exception(f"TIFF conversion failed with exit code {ret.returncode}")
|
||||||
|
|
||||||
|
model_results = []
|
||||||
|
for model_id in model_ids:
|
||||||
|
cmd = f"tesseract {screenshot_path}.tiff - {tess_args} --oem {model_id}"
|
||||||
|
ret = subprocess.run(cmd, shell=True, capture_output=True)
|
||||||
|
if ret.returncode != 0:
|
||||||
|
raise Exception(f"OCR failed with exit code {ret.returncode}")
|
||||||
|
model_results.append(ret.stdout.decode("utf-8"))
|
||||||
|
|
||||||
|
return model_results
|
||||||
|
|
||||||
|
|
||||||
class Machine:
|
class Machine:
|
||||||
def __init__(self, args: Dict[str, Any]) -> None:
|
def __init__(self, args: Dict[str, Any]) -> None:
|
||||||
if "name" in args:
|
if "name" in args:
|
||||||
@ -637,43 +668,29 @@ class Machine:
|
|||||||
"""Debugging: Dump the contents of the TTY<n>"""
|
"""Debugging: Dump the contents of the TTY<n>"""
|
||||||
self.execute("fold -w 80 /dev/vcs{} | systemd-cat".format(tty))
|
self.execute("fold -w 80 /dev/vcs{} | systemd-cat".format(tty))
|
||||||
|
|
||||||
|
def _get_screen_text_variants(self, model_ids: Iterable[int]) -> List[str]:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
screenshot_path = os.path.join(tmpdir, "ppm")
|
||||||
|
self.send_monitor_command(f"screendump {screenshot_path}")
|
||||||
|
return _perform_ocr_on_screenshot(screenshot_path, model_ids)
|
||||||
|
|
||||||
|
def get_screen_text_variants(self) -> List[str]:
|
||||||
|
return self._get_screen_text_variants([0, 1, 2])
|
||||||
|
|
||||||
def get_screen_text(self) -> str:
|
def get_screen_text(self) -> str:
|
||||||
if shutil.which("tesseract") is None:
|
return self._get_screen_text_variants([2])[0]
|
||||||
raise Exception("get_screen_text used but enableOCR is false")
|
|
||||||
|
|
||||||
magick_args = (
|
|
||||||
"-filter Catrom -density 72 -resample 300 "
|
|
||||||
+ "-contrast -normalize -despeckle -type grayscale "
|
|
||||||
+ "-sharpen 1 -posterize 3 -negate -gamma 100 "
|
|
||||||
+ "-blur 1x65535"
|
|
||||||
)
|
|
||||||
|
|
||||||
tess_args = "-c debug_file=/dev/null --psm 11 --oem 2"
|
|
||||||
|
|
||||||
with self.nested("performing optical character recognition"):
|
|
||||||
with tempfile.NamedTemporaryFile() as tmpin:
|
|
||||||
self.send_monitor_command("screendump {}".format(tmpin.name))
|
|
||||||
|
|
||||||
cmd = "convert {} {} tiff:- | tesseract - - {}".format(
|
|
||||||
magick_args, tmpin.name, tess_args
|
|
||||||
)
|
|
||||||
ret = subprocess.run(cmd, shell=True, capture_output=True)
|
|
||||||
if ret.returncode != 0:
|
|
||||||
raise Exception(
|
|
||||||
"OCR failed with exit code {}".format(ret.returncode)
|
|
||||||
)
|
|
||||||
|
|
||||||
return ret.stdout.decode("utf-8")
|
|
||||||
|
|
||||||
def wait_for_text(self, regex: str) -> None:
|
def wait_for_text(self, regex: str) -> None:
|
||||||
def screen_matches(last: bool) -> bool:
|
def screen_matches(last: bool) -> bool:
|
||||||
text = self.get_screen_text()
|
variants = self.get_screen_text_variants()
|
||||||
matches = re.search(regex, text) is not None
|
for text in variants:
|
||||||
|
if re.search(regex, text) is not None:
|
||||||
|
return True
|
||||||
|
|
||||||
if last and not matches:
|
if last:
|
||||||
self.log("Last OCR attempt failed. Text was: {}".format(text))
|
self.log("Last OCR attempt failed. Text was: {}".format(variants))
|
||||||
|
|
||||||
return matches
|
return False
|
||||||
|
|
||||||
with self.nested("waiting for {} to appear on screen".format(regex)):
|
with self.nested("waiting for {} to appear on screen".format(regex)):
|
||||||
retry(screen_matches)
|
retry(screen_matches)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user