6
6
7
7
TODO: Add support for multiple database backends?
8
8
"""
9
+ import os
9
10
from pathlib import Path
10
11
from typing import Iterable
11
12
12
- from playhouse import sqlite_ext
13
+ from peewee import SQL , fn
14
+ from playhouse import postgres_ext as ext
13
15
14
- db_proxy = sqlite_ext . DatabaseProxy ()
16
+ from atsphinx . sqlite3fts . playhouse import TSVectorFieldPlus
15
17
18
+ db_proxy = ext .DatabaseProxy ()
16
19
17
- class Document (sqlite_ext .Model ):
20
+
21
+ class Document (ext .Model ):
18
22
"""Document main model."""
19
23
20
- page = sqlite_ext .TextField (null = False , unique = True )
21
- title = sqlite_ext .TextField (null = False )
24
+ page = ext .TextField (null = False , unique = True )
25
+ title = ext .TextField (null = False )
22
26
23
27
class Meta : # noqa: D106
24
28
database = db_proxy
25
29
26
30
27
- class Section (sqlite_ext .Model ):
31
+ class Section (ext .Model ):
28
32
"""Section unit of document."""
29
33
30
- document = sqlite_ext .ForeignKeyField (Document )
31
- root = sqlite_ext .BooleanField (default = False , null = False )
32
- ref = sqlite_ext .TextField (null = False )
33
- title = sqlite_ext .TextField (null = False )
34
- body = sqlite_ext .TextField (null = False )
34
+ document = ext .ForeignKeyField (Document )
35
+ root = ext .BooleanField (default = False , null = False )
36
+ ref = ext .TextField (null = False )
37
+ title = ext .TextField (null = False )
38
+ body = ext .TextField (null = False )
35
39
36
40
class Meta : # noqa: D106
37
41
database = db_proxy
38
42
39
43
40
- class Content (sqlite_ext . FTS5Model ):
44
+ class Content (ext . Model ):
41
45
"""Searching model."""
42
46
43
- rowid = sqlite_ext . RowIDField ()
44
- title = sqlite_ext . SearchField ()
45
- body = sqlite_ext . SearchField ()
47
+ rowid = ext . IntegerField ()
48
+ title = TSVectorFieldPlus ()
49
+ body = TSVectorFieldPlus ()
46
50
47
51
class Meta : # noqa: D106
48
52
database = db_proxy
49
- options = {"tokenize" : "trigram" }
53
+ # TODO: This is an option from SQLite, it does not work on other DBMS.
54
+ # options = {"tokenize": "trigram"}
50
55
51
56
52
57
def store_document (document : Document , sections : Iterable [Section ]):
@@ -58,32 +63,68 @@ def store_document(document: Document, sections: Iterable[Section]):
58
63
Content .insert (
59
64
{
60
65
Content .rowid : section .id ,
61
- Content .title : section .title or document .title ,
62
- Content .body : section .body ,
66
+ Content .title : fn . to_tsvector ( section .title or document .title ) ,
67
+ Content .body : fn . to_tsvector ( section .body ) ,
63
68
}
64
69
).execute ()
65
70
66
71
67
72
def search_documents (keyword : str ) -> Iterable [Section ]:
68
73
"""Search documents from keyword by full-text-search."""
74
+ # SQLite.
75
+ """
69
76
return (
70
77
Section.select()
71
78
.join(Content, on=(Section.id == Content.rowid))
72
79
.where(Content.match(keyword))
73
80
.order_by(Content.bm25())
74
81
)
82
+ """
83
+
84
+ # PostgreSQL.
85
+ # https://www.postgresql.org/docs/current/textsearch-controls.html
86
+ # https://stackoverflow.com/questions/25033184/postgresql-full-text-search-performance-not-acceptable-when-ordering-by-ts-rank/25245291#25245291
87
+ return (
88
+ Section .select (
89
+ Section ,
90
+ fn .ts_rank_cd (Content .title , fn .websearch_to_tsquery (keyword ), 32 ).alias (
91
+ "rank_title"
92
+ ),
93
+ fn .ts_rank_cd (Content .body , fn .websearch_to_tsquery (keyword ), 32 ).alias (
94
+ "rank_body"
95
+ ),
96
+ )
97
+ .join (Content , on = (Section .id == Content .rowid ))
98
+ .where (
99
+ Content .title .match (keyword , web = True )
100
+ | Content .body .match (keyword , web = True )
101
+ )
102
+ .order_by (
103
+ SQL ("rank_title" ).desc (),
104
+ SQL ("rank_body" ).desc (),
105
+ )
106
+ )
75
107
76
108
77
- def bind (db_path : Path ):
109
+ def bind (db_type : str , db_path : Path ):
78
110
"""Bind connection.
79
111
80
112
This works only set db into proxy, not included creating tables.
81
113
"""
82
- db = sqlite_ext .SqliteExtDatabase (db_path )
114
+ if db_type == "sqlite" :
115
+ db = ext .SqliteExtDatabase (db_path )
116
+ elif db_type == "postgresql" :
117
+ db = ext .PostgresqlExtDatabase (db_path )
118
+ if "POSTGRES_LOG_STATEMENT" in os .environ :
119
+ db .execute_sql (
120
+ f"SET log_statement='{ os .environ ['POSTGRES_LOG_STATEMENT' ]} ';"
121
+ )
122
+ else :
123
+ raise ValueError (f"Unknown database type: { db_type } " )
83
124
db_proxy .initialize (db )
84
125
85
126
86
- def initialize (db_path : Path ):
127
+ def initialize (db_type : str , db_path : Path ):
87
128
"""Bind connection and create tables."""
88
- bind (db_path )
129
+ bind (db_type , db_path )
89
130
db_proxy .create_tables ([Document , Section , Content ])
0 commit comments