78 lines
2.6 KiB
Python
78 lines
2.6 KiB
Python
import json
|
||
from bs4 import BeautifulSoup
|
||
import requests
|
||
import imagesize
|
||
from os import listdir
|
||
from tqdm import tqdm
|
||
import cv2
|
||
import io
|
||
import math
|
||
import magic
|
||
|
||
RESIZE_THRESHOLD=3000*3000
|
||
|
||
|
||
def resize_img_to_threshold(img, height, width):
|
||
k = math.sqrt(height*width/(RESIZE_THRESHOLD))
|
||
img = cv2.resize(img, (round(width/k), round(height/k)),interpolation=cv2.INTER_AREA)
|
||
return img
|
||
|
||
|
||
def yandex_reverse_search(filePath=None, image_buffer=None):
|
||
searchUrl = 'https://yandex.ru/images/search'
|
||
if filePath:
|
||
image_buffer = open(filePath, 'rb')
|
||
|
||
files = {'upfile': ('blob', image_buffer, 'image/jpeg')}
|
||
params = {'rpt': 'imageview', 'format': 'json','request': '{"blocks":[{"block":"b-page_type_search-by-image__link"}]}'}
|
||
response = requests.post(searchUrl, params=params, files=files)
|
||
query_string = json.loads(response.content)['blocks'][0]['params']['url']
|
||
img_search_url = searchUrl + '?' + query_string
|
||
search_page_text = requests.get(img_search_url).text
|
||
search_page_soup = BeautifulSoup(search_page_text, 'html.parser')
|
||
search_page_dim_div = search_page_soup.find("div", class_="Tags Tags_type_simple Tags_view_buttons")
|
||
if search_page_dim_div:
|
||
links = search_page_dim_div.find_all("a")
|
||
dimensions = links[0].getText().split("×")
|
||
pixels = int(dimensions[0])*int(dimensions[1])
|
||
return (img_search_url, pixels, dimensions)
|
||
return (0, 0, 0)
|
||
|
||
|
||
IMAGE_PATH="./../../import/images"
|
||
file_names=listdir(IMAGE_PATH)
|
||
|
||
for file_name in tqdm(file_names):
|
||
io_buf=None
|
||
img_path = f'{IMAGE_PATH}/{file_name}'
|
||
width, height = imagesize.get(img_path)
|
||
|
||
if width*height > RESIZE_THRESHOLD:
|
||
mime_type = magic.from_buffer(open(img_path, "rb").read(2048), mime=True)
|
||
if mime_type == "image/jpeg":
|
||
ext = ".jpg"
|
||
if mime_type == "image/png":
|
||
ext = ".png"
|
||
img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
|
||
img = resize_img_to_threshold(img, height, width)
|
||
is_success, buffer = cv2.imencode(ext, img)
|
||
if is_success:
|
||
io_buf = io.BytesIO(buffer)
|
||
|
||
try:
|
||
if io_buf:
|
||
img_search_url, pixels, dimensions = yandex_reverse_search(image_buffer=io_buf)
|
||
else:
|
||
img_search_url, pixels, dimensions = yandex_reverse_search(img_path)
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
print(f'yandex_reverse_search error. {file_name}')
|
||
continue
|
||
|
||
if width*height < pixels:
|
||
print(img_path)
|
||
print(f"original resolution - {width, height}")
|
||
print(f"new resolution {dimensions}")
|
||
print(img_search_url)
|