


from google.colab import drive
!pip install pdf2image
!apt-get install poppler-utils  # Install utility required by pdf2image
!pip install pdf2image

from pdf2image import convert_from_path
import cv2
import numpy as np
from PIL import Image
from google.colab import drive


# A4 size in pixels at 96 dpi
A4_WIDTH = 793
A4_HEIGHT = 1122
BORDER_HEIGHT = 19  # 0.5 cm in pixels at 96 dpi

# Convert PDF to list of images
pdf_path = '/content/drive/My Drive/hi.pdf'  # Replace with your PDF path
pages = convert_from_path(pdf_path, 96)  # Assuming 96 dpi

# Function to crop an image to its edges
def crop_to_edges(img):
    img_np = np.array(img.convert('L'))
    edges = cv2.Canny(img_np, 50, 150)
    y_indices, x_indices = np.nonzero(edges)
    return img.crop((min(x_indices), min(y_indices), max(x_indices), max(y_indices)))

# Loop over pairs of pages
for i in range(0, len(pages), 2):
    # Take two pages and crop to edges
    img1 = crop_to_edges(pages[i])
    img2 = crop_to_edges(pages[i + 1])

    # Maintain aspect ratio when resizing
    scale = min(A4_WIDTH / img1.width, (A4_HEIGHT - 2 * BORDER_HEIGHT) / (img1.height + img2.height))
    img1 = img1.resize((int(img1.width * scale), int(img1.height * scale)))
    img2 = img2.resize((int(img2.width * scale), int(img2.height * scale)))

    # Calculate where to paste images
    paste_height_1 = BORDER_HEIGHT
    paste_height_2 = A4_HEIGHT - BORDER_HEIGHT - img2.height
    paste_width_1 = (A4_WIDTH - img1.width) // 2
    paste_width_2 = (A4_WIDTH - img2.width) // 2

    # Paste images at calculated positions
    result = Image.new('RGB', (A4_WIDTH, A4_HEIGHT), 'white')
    result.paste(img1, (paste_width_1, paste_height_1))
    result.paste(img2, (paste_width_2, paste_height_2))

    # Save the combined image
    result.save(f'/content/drive/My Drive/sodai/output_{i//2 + 1}.jpg')






!apt-get install poppler-utils  # Install utility required by pdf2image
!pip install pdf2image

from pdf2image import convert_from_path
import cv2
import numpy as np
from PIL import Image
from google.colab import drive


# A4 size in pixels at 96 dpi
A4_WIDTH = 793
A4_HEIGHT = 1122
BORDER_HEIGHT = 19  # 0.5 cm in pixels at 96 dpi

# Convert PDF to list of images
pdf_path = '/content/drive/My Drive/hi.pdf'  # Replace with your PDF path
pages = convert_from_path(pdf_path, 96)  # Assuming 96 dpi

# Function to crop an image to its edges and get center
def crop_to_edges_and_center(img):
    img_np = np.array(img.convert('L'))
    edges = cv2.Canny(img_np, 50, 150)
    y_indices, x_indices = np.nonzero(edges)
    center_x = (min(x_indices) + max(x_indices)) // 2
    return img.crop((min(x_indices), min(y_indices), max(x_indices), max(y_indices))), center_x

# Loop over pairs of pages
for i in range(0, len(pages), 2):
    # Take two pages and crop to edges
    (img1, center_x1) = crop_to_edges_and_center(pages[i])
    (img2, center_x2) = crop_to_edges_and_center(pages[i + 1])

    # Find best scale by minimizing variance of black pixel counts
    scales = np.linspace(0.1, 1.0, 10)  # Try scales from 0.1 to 1.0
    areas = []
    for scale in scales:
        # Resize images and convert to grayscale
        img1_resized = img1.resize((int(img1.width * scale), int(img1.height * scale)))
        img2_resized = img2.resize((int(img2.width * scale), int(img2.height * scale)))
        img1_gray = np.array(img1_resized.convert('L'))
        img2_gray = np.array(img2_resized.convert('L'))
        # Count black pixels
        areas.append((np.sum(img1_gray < 128), np.sum(img2_gray < 128)))
    areas = np.array(areas)
    mean_areas = np.mean(areas, axis=1)
    var_areas = np.var(areas, axis=1)
    best_scale = scales[np.argmin(np.abs(mean_areas - np.mean(mean_areas)) + var_areas)]

    # Resize images to best scale
    img1 = img1.resize((int(img1.width * best_scale), int(img1.height * best_scale)))
    img2 = img2.resize((int(img2.width * best_scale), int(img2.height * best_scale)))

    # Calculate where to paste images
    paste_height_1 = BORDER_HEIGHT
    paste_height_2 = A4_HEIGHT - BORDER_HEIGHT - img2.height
    paste_width_1 = A4_WIDTH // 2 - int(center_x1 * best_scale)
    paste_width_2 = A4_WIDTH // 2 - int(center_x2 * best_scale)

    # Paste images at calculated positions
    result = Image.new('RGB', (A4_WIDTH, A4_HEIGHT), 'white')
    result.paste(img1, (paste_width_1, paste_height_1))
    result.paste(img2, (paste_width_2, paste_height_2))

    # Save the combined image
    result.save(f'/content/drive/My Drive/sodai/output_{i//2 + 1}.jpg')
