Commit 59809846 authored by mitshel's avatar mitshel
Browse files

Разработан новый метод поиска дубликатов (альфа версия)

parent 62f42950
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ root_lib = /mnt/SATA1TB-2/КНИГИ/BOOKS
#root_lib   = /mnt/SATA1TB-2/КНИГИ/TEST

#Списк форматов, которые будут включаться в каталог
formats    = .pdf .djvu .fb2 .txt
formats    = .pdf .djvu .fb2

#Некоторым OPDS-клиентам требуется полный путь в HTTP-запросе
#таким образом cgi_path и cover_path - задают такие пути
@@ -24,7 +24,10 @@ wsgi_path=/opds/py/sopds.wsgi
# Количество выдаваемых строк на одну страницу
maxitems   = 60

# dublicates_find = yes, приводит к поиску дубликатов на основании полей title, format и filesize
# dublicates_find - управляет способом поиска дубликатов книг в базе данных
# dublicates_find = no     - поиск дкбликатов не производится
# dublicates_find = yes    - производится поиск дубликатов на основании спсика авторов и названия произведения
# dublicates_find = strong - производится поиск дубликатов на основании полей title, format и filesize
# при этом дубликат помещается в базу, но помечается как дубликат и может исключаться из выдачи
# при поиске по наименованиям и по авторам. В случае поиска по каталогам, книги, помеченные как дубликаты все равно будут показываться
dublicates_find = yes
+67 −0
Original line number Diff line number Diff line
@@ -112,8 +112,75 @@ commit;

DROP PROCEDURE IF EXISTS sp_update_dbl;
DROP PROCEDURE IF EXISTS sp_newinfo;
DROP FUNCTION IF EXISTS BOOK_CMPSTR;
DROP PROCEDURE IF EXISTS sp_mark_dbl;
DELIMITER //

CREATE FUNCTION BOOK_CMPSTR(id INT, cmp_type INT)
RETURNS VARCHAR(512)
BEGIN
  DECLARE done INT DEFAULT 0;
  DECLARE T VARCHAR(256);
  DECLARE fmt VARCHAR(8) DEFAULT '';
  DECLARE fsize INT DEFAULT 0;
  DECLARE AUTHORS VARCHAR(256) DEFAULT '';
  DECLARE RESULT VARCHAR(512);
  SELECT GROUP_CONCAT(DISTINCT author_id order by author_id SEPARATOR ':') into AUTHORS from bauthors where book_id=id;
  IF AUTHORS=NULL THEN
     SET AUTHORS='';
  END IF;

  SELECT UPPER(trim(REPLACE(title,' ',''))),format,filesize INTO T,fmt,fsize FROM books WHERE book_id=id;
  IF T=NULL THEN
     SET T='';
  END IF;

  IF cmp_type=1 THEN
     SET RESULT=CONCAT_WS(':',T,filesize,format);
  ELSEIF cmp_type=2 THEN
     SET RESULT=CONCAT_WS(':',AUTHORS,T);
  ELSE
     SET RESULT='';
  END IF;

  RETURN RESULT; 
END //

CREATE PROCEDURE sp_mark_dbl(cmp_type INT)
BEGIN
  DECLARE done INT DEFAULT 0;
  DECLARE idx,prev,current,orig_id INT;
  DECLARE ids VARCHAR(512);
  DECLARE cur CURSOR for select GROUP_CONCAT(DISTINCT book_id order by filesize DESC SEPARATOR ':') as ids 
                         from books where avail<>0 group by BOOK_CMPSTR(book_id,cmp_type) having count(*)>1 and SUM(CASE WHEN 

(doublicat=0) THEN 1 ELSE 0 END)<>1;
  DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;

  OPEN cur;

  WHILE done=0 DO
    FETCH cur INTO ids;
    IF done=0 THEN
       set idx=0;
       set prev=-1;
       set current=0;
       set orig_id=0;
       WHILE prev<>current DO
           set prev=current;
           set idx=idx+1;
           SELECT CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(ids,':',idx),':',-1) as INT) into current;
           IF prev<>current THEN
              UPDATE books SET doublicat=orig_id where book_id=current;
              if orig_id=0 THEN SET orig_id=current; END IF;
           END IF;
       END WHILE;
    END IF;
  END WHILE;  

  CLOSE cur;
END //

CREATE PROCEDURE sp_update_dbl()
BEGIN
  DECLARE done INT DEFAULT 0;
+76 −0
Original line number Diff line number Diff line
update dbver set ver="0.21";
DROP PROCEDURE IF EXISTS sp_update_dbl;
DROP FUNCTION IF EXISTS BOOK_CMPSTR;
DROP PROCEDURE IF EXISTS sp_mark_dbl;
DELIMITER //

CREATE FUNCTION BOOK_CMPSTR(id INT, cmp_type INT)
RETURNS VARCHAR(512)
BEGIN
  DECLARE done INT DEFAULT 0;
  DECLARE T VARCHAR(256);
  DECLARE fmt VARCHAR(8) DEFAULT '';
  DECLARE fsize INT DEFAULT 0;
  DECLARE AUTHORS VARCHAR(256) DEFAULT '';
  DECLARE RESULT VARCHAR(512);
  SELECT GROUP_CONCAT(DISTINCT author_id order by author_id SEPARATOR ':') into AUTHORS from bauthors where book_id=id;
  IF AUTHORS=NULL THEN
     SET AUTHORS='';
  END IF;

  SELECT UPPER(trim(REPLACE(title,' ',''))),format,filesize INTO T,fmt,fsize FROM books WHERE book_id=id;
  IF T=NULL THEN
     SET T='';
  END IF;

  IF cmp_type=1 THEN
     SET RESULT=CONCAT_WS(':',T,filesize,format);
  ELSEIF cmp_type=2 THEN
     SET RESULT=CONCAT_WS(':',AUTHORS,T);
  ELSE
     SET RESULT='';
  END IF;

  RETURN RESULT; 
END //

CREATE PROCEDURE sp_mark_dbl(cmp_type INT)
BEGIN
  DECLARE done INT DEFAULT 0;
  DECLARE idx,prev,current,orig_id INT;
  DECLARE ids VARCHAR(512);
  DECLARE cur CURSOR for select GROUP_CONCAT(DISTINCT book_id order by filesize DESC SEPARATOR ':') as ids 
                         from books where avail<>0 group by BOOK_CMPSTR(book_id,cmp_type) having count(*)>1 and SUM(CASE WHEN 

(doublicat=0) THEN 1 ELSE 0 END)<>1;
  DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;

  OPEN cur;

  WHILE done=0 DO
    FETCH cur INTO ids;
    IF done=0 THEN
       set idx=0;
       set prev=-1;
       set current=0;
       set orig_id=0;
       WHILE prev<>current DO
           set prev=current;
           set idx=idx+1;
           SELECT CAST(SUBSTRING_INDEX(SUBSTRING_INDEX(ids,':',idx),':',-1) as INT) into current;
           IF prev<>current THEN
              UPDATE books SET doublicat=orig_id where book_id=current;
              if orig_id=0 THEN SET orig_id=current; END IF;
           END IF;
       END WHILE;
    END IF;
  END WHILE;  

  CLOSE cur;
END //

DELIMITER ;
commit;


+1.29 KiB (42.2 KiB)

File changed.

No diff preview for this file type.

+3 −1
Original line number Diff line number Diff line
@@ -85,7 +85,9 @@ class opdsScanner:
           self.books_deleted=self.opdsdb.books_del_logical()
        else:
           self.books_deleted=self.opdsdb.books_del_phisical()
        self.opdsdb.update_double()
#        self.opdsdb.update_double()
        if self.cfg.DUBLICATES_FIND!=0:
           self.opdsdb.mark_double(self.cfg.DUBLICATES_FIND)
        self.opdsdb.closeDB()
        self.opdsdb=None

Loading