qw-gallery-scenery/tools/yandex_rs.py
qwertyforce 24d0cd0734 scenery is working again
Renovated to support new version of ambience. Deleted some unused functionality (may bring it back later)
2022-08-30 22:22:35 +03:00

78 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from bs4 import BeautifulSoup
import requests
import imagesize
from os import listdir
from tqdm import tqdm
import cv2
import io
import math
import magic
RESIZE_THRESHOLD=3000*3000
def resize_img_to_threshold(img, height, width):
k = math.sqrt(height*width/(RESIZE_THRESHOLD))
img = cv2.resize(img, (round(width/k), round(height/k)),interpolation=cv2.INTER_AREA)
return img
def yandex_reverse_search(filePath=None, image_buffer=None):
searchUrl = 'https://yandex.ru/images/search'
if filePath:
image_buffer = open(filePath, 'rb')
files = {'upfile': ('blob', image_buffer, 'image/jpeg')}
params = {'rpt': 'imageview', 'format': 'json','request': '{"blocks":[{"block":"b-page_type_search-by-image__link"}]}'}
response = requests.post(searchUrl, params=params, files=files)
query_string = json.loads(response.content)['blocks'][0]['params']['url']
img_search_url = searchUrl + '?' + query_string
search_page_text = requests.get(img_search_url).text
search_page_soup = BeautifulSoup(search_page_text, 'html.parser')
search_page_dim_div = search_page_soup.find("div", class_="Tags Tags_type_simple Tags_view_buttons")
if search_page_dim_div:
links = search_page_dim_div.find_all("a")
dimensions = links[0].getText().split("×")
pixels = int(dimensions[0])*int(dimensions[1])
return (img_search_url, pixels, dimensions)
return (0, 0, 0)
IMAGE_PATH="./../../import/images"
file_names=listdir(IMAGE_PATH)
for file_name in tqdm(file_names):
io_buf=None
img_path = f'{IMAGE_PATH}/{file_name}'
width, height = imagesize.get(img_path)
if width*height > RESIZE_THRESHOLD:
mime_type = magic.from_buffer(open(img_path, "rb").read(2048), mime=True)
if mime_type == "image/jpeg":
ext = ".jpg"
if mime_type == "image/png":
ext = ".png"
img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
img = resize_img_to_threshold(img, height, width)
is_success, buffer = cv2.imencode(ext, img)
if is_success:
io_buf = io.BytesIO(buffer)
try:
if io_buf:
img_search_url, pixels, dimensions = yandex_reverse_search(image_buffer=io_buf)
else:
img_search_url, pixels, dimensions = yandex_reverse_search(img_path)
except Exception as e:
print(e)
print(f'yandex_reverse_search error. {file_name}')
continue
if width*height < pixels:
print(img_path)
print(f"original resolution - {width, height}")
print(f"new resolution {dimensions}")
print(img_search_url)