6
6
7
7
TODO: Add support for multiple database backends?
8
8
"""
9
+ import os
9
10
from pathlib import Path
10
11
from typing import Iterable
11
12
12
- from playhouse import sqlite_ext
13
+ from peewee import fn , SQL
14
+ from playhouse import postgres_ext as ext
13
15
14
- db_proxy = sqlite_ext . DatabaseProxy ()
16
+ from atsphinx . sqlite3fts . playhouse import TSVectorFieldPlus
15
17
18
+ db_proxy = ext .DatabaseProxy ()
16
19
17
- class Document (sqlite_ext .Model ):
20
+
21
+ class Document (ext .Model ):
18
22
"""Document main model."""
19
23
20
- page = sqlite_ext .TextField (null = False , unique = True )
21
- title = sqlite_ext .TextField (null = False )
24
+ page = ext .TextField (null = False , unique = True )
25
+ title = ext .TextField (null = False )
22
26
23
27
class Meta : # noqa: D106
24
28
database = db_proxy
25
29
26
30
27
- class Section (sqlite_ext .Model ):
31
+ class Section (ext .Model ):
28
32
"""Section unit of document."""
29
33
30
- document = sqlite_ext .ForeignKeyField (Document )
31
- root = sqlite_ext .BooleanField (default = False , null = False )
32
- ref = sqlite_ext .TextField (null = False )
33
- title = sqlite_ext .TextField (null = False )
34
- body = sqlite_ext .TextField (null = False )
34
+ document = ext .ForeignKeyField (Document )
35
+ root = ext .BooleanField (default = False , null = False )
36
+ ref = ext .TextField (null = False )
37
+ title = ext .TextField (null = False )
38
+ body = ext .TextField (null = False )
35
39
36
40
class Meta : # noqa: D106
37
41
database = db_proxy
38
42
39
43
40
- class Content (sqlite_ext . FTS5Model ):
44
+ class Content (ext . Model ):
41
45
"""Searching model."""
42
46
43
- rowid = sqlite_ext . RowIDField ()
44
- title = sqlite_ext . SearchField ()
45
- body = sqlite_ext . SearchField ()
47
+ rowid = ext . IntegerField ()
48
+ title = TSVectorFieldPlus ()
49
+ body = TSVectorFieldPlus ()
46
50
47
51
class Meta : # noqa: D106
48
52
database = db_proxy
49
- options = {"tokenize" : "trigram" }
53
+ # TODO: This is an option from SQLite, it does not work on other DBMS.
54
+ # options = {"tokenize": "trigram"}
50
55
51
56
52
57
def store_document (document : Document , sections : Iterable [Section ]):
@@ -58,32 +63,69 @@ def store_document(document: Document, sections: Iterable[Section]):
58
63
Content .insert (
59
64
{
60
65
Content .rowid : section .id ,
61
- Content .title : section .title or document .title ,
62
- Content .body : section .body ,
66
+ Content .title : fn . to_tsvector ( section .title or document .title ) ,
67
+ Content .body : fn . to_tsvector ( section .body ) ,
63
68
}
64
69
).execute ()
65
70
66
71
67
72
def search_documents (keyword : str ) -> Iterable [Section ]:
68
73
"""Search documents from keyword by full-text-search."""
74
+
75
+ # SQLite.
76
+ """
69
77
return (
70
78
Section.select()
71
79
.join(Content, on=(Section.id == Content.rowid))
72
80
.where(Content.match(keyword))
73
81
.order_by(Content.bm25())
74
82
)
83
+ """
84
+
85
+ # PostgreSQL.
86
+ # https://www.postgresql.org/docs/current/textsearch-controls.html
87
+ # https://stackoverflow.com/questions/25033184/postgresql-full-text-search-performance-not-acceptable-when-ordering-by-ts-rank/25245291#25245291
88
+ return (
89
+ Section .select (
90
+ Section ,
91
+ fn .ts_rank_cd (Content .title , fn .websearch_to_tsquery (keyword ), 32 ).alias (
92
+ "rank_title"
93
+ ),
94
+ fn .ts_rank_cd (Content .body , fn .websearch_to_tsquery (keyword ), 32 ).alias (
95
+ "rank_body"
96
+ ),
97
+ )
98
+ .join (Content , on = (Section .id == Content .rowid ))
99
+ .where (
100
+ Content .title .match (keyword , web = True )
101
+ | Content .body .match (keyword , web = True )
102
+ )
103
+ .order_by (
104
+ SQL ("rank_title" ).desc (),
105
+ SQL ("rank_body" ).desc (),
106
+ )
107
+ )
75
108
76
109
77
- def bind (db_path : Path ):
110
+ def bind (db_type : str , db_path : Path ):
78
111
"""Bind connection.
79
112
80
113
This works only set db into proxy, not included creating tables.
81
114
"""
82
- db = sqlite_ext .SqliteExtDatabase (db_path )
115
+ if db_type == "sqlite" :
116
+ db = ext .SqliteExtDatabase (db_path )
117
+ elif db_type == "postgresql" :
118
+ db = ext .PostgresqlExtDatabase (db_path )
119
+ if "POSTGRES_LOG_STATEMENT" in os .environ :
120
+ db .execute_sql (
121
+ f"SET log_statement='{ os .environ ['POSTGRES_LOG_STATEMENT' ]} ';"
122
+ )
123
+ else :
124
+ raise ValueError (f"Unknown database type: { db_type } " )
83
125
db_proxy .initialize (db )
84
126
85
127
86
- def initialize (db_path : Path ):
128
+ def initialize (db_type : str , db_path : Path ):
87
129
"""Bind connection and create tables."""
88
- bind (db_path )
130
+ bind (db_type , db_path )
89
131
db_proxy .create_tables ([Document , Section , Content ])
0 commit comments