qw-gallery-scenery/tools/yandex_rs.py

78 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json
from bs4 import BeautifulSoup
import requests
import imagesize
from os import listdir
from tqdm import tqdm
import cv2
import io
import math
import magic
RESIZE_THRESHOLD=3000*3000
def resize_img_to_threshold(img, height, width):
k = math.sqrt(height*width/(RESIZE_THRESHOLD))
img = cv2.resize(img, (round(width/k), round(height/k)),interpolation=cv2.INTER_AREA)
return img
def yandex_reverse_search(filePath=None, image_buffer=None):
searchUrl = 'https://yandex.ru/images/search'
if filePath:
image_buffer = open(filePath, 'rb')
files = {'upfile': ('blob', image_buffer, 'image/jpeg')}
params = {'rpt': 'imageview', 'format': 'json','request': '{"blocks":[{"block":"b-page_type_search-by-image__link"}]}'}
response = requests.post(searchUrl, params=params, files=files)
query_string = json.loads(response.content)['blocks'][0]['params']['url']
img_search_url = searchUrl + '?' + query_string
search_page_text = requests.get(img_search_url).text
search_page_soup = BeautifulSoup(search_page_text, 'html.parser')
search_page_dim_div = search_page_soup.find("div", class_="Tags Tags_type_simple Tags_view_buttons")
if search_page_dim_div:
links = search_page_dim_div.find_all("a")
dimensions = links[0].getText().split("×")
pixels = int(dimensions[0])*int(dimensions[1])
return (img_search_url, pixels, dimensions)
return (0, 0, 0)
IMAGE_PATH="./../../import/images"
file_names=listdir(IMAGE_PATH)
for file_name in tqdm(file_names):
io_buf=None
img_path = f'{IMAGE_PATH}/{file_name}'
width, height = imagesize.get(img_path)
if width*height > RESIZE_THRESHOLD:
mime_type = magic.from_buffer(open(img_path, "rb").read(2048), mime=True)
if mime_type == "image/jpeg":
ext = ".jpg"
if mime_type == "image/png":
ext = ".png"
img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
img = resize_img_to_threshold(img, height, width)
is_success, buffer = cv2.imencode(ext, img)
if is_success:
io_buf = io.BytesIO(buffer)
try:
if io_buf:
img_search_url, pixels, dimensions = yandex_reverse_search(image_buffer=io_buf)
else:
img_search_url, pixels, dimensions = yandex_reverse_search(img_path)
except Exception as e:
print(e)
print(f'yandex_reverse_search error. {file_name}')
continue
if width*height < pixels:
print(img_path)
print(f"original resolution - {width, height}")
print(f"new resolution {dimensions}")
print(img_search_url)