1: <?php
2:
3: /**
4: * search_class.php
5: *
6: * PHP version 8
7: *
8: * LICENSE: This program is free software: you can redistribute it and/or modify
9: * it under the terms of the GNU Affero General Public License as
10: * published by the Free Software Foundation, either version 3 of the
11: * License, or (at your option) any later version.
12: * This program is distributed in the hope that it will be useful,
13: * but WITHOUT ANY WARRANTY; without even the implied warranty of
14: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: * GNU Affero General Public License for more details.
16: * You should have received a copy of the GNU Affero General Public License
17: * along with this program. If not, see <http://www.gnu.org/licenses/>.
18: *
19: * @category Homeopathy
20: * @package Search
21: * @author Henri Schumacher <henri.hulski@gazeta.pl>
22: * @copyright 2007-2014 Henri Schumacher
23: * @license http://www.gnu.org/licenses/agpl.html GNU Affero General Public License v3
24: * @version 1.0
25: * @link https://research.openhomeo.info/download/OpenHomeopath_1.0.2.tar.gz
26: */
27:
28: /**
29: * The Search class is responsible for building the WHERE part of the symptoms-search query
30: *
31: * @category Homeopathy
32: * @package Search
33: * @author Henri Schumacher <henri.hulski@gazeta.pl>
34: * @copyright 2007-2014 Henri Schumacher
35: * @license http://www.gnu.org/licenses/agpl.html GNU Affero General Public License v3
36: */
37: class Search {
38:
39: /**
40: * Escape character for encoding quotes
41: */
42: const esc_chr = '@';
43:
44:
45: /**
46: * The requested search string after an array with the search words
47: * @var string|array
48: * @access public
49: */
50: public $search;
51:
52: /**
53: * If true we're searching for whole words with boolean fulltext search.
54: * If false we're searching for parts of words with regular expressions.
55: * @var boolean
56: * @access private
57: */
58: private $whole_word;
59:
60: /**
61: * 'AND'|'OR': If 'AND' the search result should contain all requested words/phrases, if 'OR' any requested words/phrases.
62: * @var string
63: * @access private
64: */
65: private $and_or;
66:
67: /**
68: * Symptoms table
69: * @var string
70: * @access private
71: */
72: private $symptoms_tbl;
73:
74: /**
75: * The phrases from the search request first as array later as string prepared for the SQL-query
76: * @var array|string
77: * @access private
78: */
79: private $search_phrase;
80:
81: /**
82: * The words which the search result must not contain first as array later as string prepared for the SQL-query
83: * @var array|string
84: * @access private
85: */
86: private $search_not;
87:
88: /**
89: * 'boolean'|'regexp': 'boolean' if we're using the boolean fulltext search 'regexp' for a search with regular expressions
90: * @var string
91: * @access private
92: */
93: private $mode;
94:
95: /**
96: * Contains the dublicated requested words with ss or ß
97: * @var array
98: * @access private
99: */
100: private $dublicated_ss = array();
101:
102: /**
103: * Class constructor
104: *
105: * @return Search
106: * @access public
107: */
108: function __construct() {
109:
110: global $db;
111:
112: $this->search = $_REQUEST['search'];
113: $this->whole_word = (empty($_REQUEST['whole_word']) || $_REQUEST['whole_word'] === 'false') ? false : true;
114: $this->and_or = (empty($_REQUEST['and_or'])) ? "AND" : $_REQUEST['and_or'];
115: $this->symptoms_tbl = $db->get_custom_table("symptoms");
116: }
117:
118:
119: /**
120: * build_search is the central function of the Search class that builds the search query
121: *
122: * @return void
123: * @access public
124: */
125: function build_search() {
126:
127: // searching for parts of words with regular expressions
128: if ($this->whole_word === false) {
129: $this->mode = 'regexp';
130: $this->delete_punctuation();
131: $this->encode_quotes();
132: $this->clean_whitespace();
133: $this->extract_phrases();
134: $this->build_phrases_search_query();
135: $this->extract_search_not();
136: $this->build_search_not_query();
137: if (!empty($this->search)) {
138: $this->extract_search_words();
139: $this->build_search_query($this->search);
140: if (empty($this->search_phrase)) {
141: $this->search = "({$this->search})";
142: } else {
143: $this->search = "({$this->search} {$this->and_or} {$this->search_phrase})";
144: }
145: } elseif (!empty($this->search_phrase)) {
146: $this->search = "({$this->search_phrase})";
147: }
148: if (!empty($this->search_not)) {
149: if (!empty($this->search)) {
150: $this->search .= " AND ";
151: } else {
152: $this->search = "";
153: }
154: $this->search .= $this->search_not;
155: }
156: $this->decode_quotes();
157: $this->clean_whitespace();
158: }
159: // searching for whole words with boolean fulltext search
160: else {
161: $this->mode = 'boolean';
162: $this->encode_quotes();
163: $this->extract_phrases();
164: $this->extract_search_not();
165: $this->extract_search_words();
166: $this->build_boolean_query('+', $this->search);
167: $this->build_boolean_query('+', $this->search_phrase);
168: $this->build_boolean_query('-', $this->search_not);
169: $this->search .= $this->search_phrase . $this->search_not;
170: $this->clean_whitespace();
171: $this->decode_quotes();
172: $this->search = "MATCH ({$this->symptoms_tbl}.symptom) AGAINST ('" . $this->search . "' IN BOOLEAN MODE)";
173: }
174: }
175:
176: /**
177: * clean_whitespace removes whitespace at the beginning and end of the search string and double whitespace inside the search string
178: *
179: * @return void
180: * @access private
181: */
182: private function clean_whitespace() {
183: $this->search = preg_replace('/\s\s+/u', ' ', $this->search);
184: $this->search = trim ($this->search);
185: }
186:
187: /**
188: * delete_punctuation replaces punctuation inside the search string with a space
189: *
190: * @return void
191: * @access private
192: */
193: private function delete_punctuation() {
194: $punctuation = array('.', ',', ';', '!', ':', '@', '/', '*', '$', '^', '#');
195: $this->search = str_replace($punctuation, ' ', $this->search);
196: }
197:
198: /**
199: * encode_quotes repaces quotes and escaped quotes in the search string with the escape character constant esc_chr
200: *
201: * @return void
202: * @access private
203: */
204: private function encode_quotes() {
205: $quotes = array("\\'", '\\"', "\'", '\"', "'", '"');
206: $this->search = str_replace($quotes, self::esc_chr, $this->search);
207: // delete "\"
208: $this->search = str_replace('\\', '', $this->search);
209: }
210:
211: /**
212: * decode_quotes replaces the escape character constant esc_chr in the search string with a double quote
213: *
214: * @return void
215: * @access private
216: */
217: private function decode_quotes() {
218: $quote = '"';
219: if ($this->mode === 'regexp') {
220: $quote = '';
221: }
222: $this->search = str_replace(self::esc_chr, $quote, $this->search);
223: }
224:
225: /**
226: * extract_phrases pulls quoted phrases from the search string and stores them in an array ($this->search_phrase)
227: *
228: * @return void
229: * @access private
230: */
231: private function extract_phrases() {
232: // copy phrases in quotes to an array
233: preg_match_all('/' . self::esc_chr . '[^' . self::esc_chr . ']+' . self::esc_chr . '/u', $this->search, $this->search_phrase);
234: // delete phrases in quotes from search
235: $this->search = preg_replace('/' . self::esc_chr . '[^' . self::esc_chr . ']+' . self::esc_chr . '/u', '', $this->search);
236: $this->search_phrase = $this->search_phrase[0];
237: // if we are in regexp-mode append after every word '[[:punct:][:space:]]*' so we ignore punctuation and whitespaces between words in a phrase
238: if (!empty($this->search_phrase) && $this->mode === 'regexp') {
239: $this->search_phrase = preg_replace('/([\s' . self::esc_chr . '][\wßäöüÄÖÜ]+)/u', '\1[[:punct:][:space:]]*', $this->search_phrase);
240: }
241: $this->dublicate_ss('search_phrase');
242: }
243:
244: /**
245: * extract_search_not pulls not desired words with a preceded '-' from the search string and stores them in an array ($this->search_not)
246: *
247: * @return void
248: * @access private
249: */
250: private function extract_search_not() {
251: // copy words beginning with "-" to an array
252: preg_match_all('/-([\wßäöüÄÖÜ]+)/u', $this->search, $this->search_not);
253: // delete words beginning with "-"
254: $this->search = preg_replace('/-[\wßäöüÄÖÜ]+/u', '', $this->search);
255: $this->search_not = $this->search_not[1];
256: $this->dublicate_ss('search_not');
257: }
258:
259: /**
260: * extract_search_words extract the remaining search words from the search string and stores them in an array ($this->search)
261: *
262: * @return void
263: * @access private
264: */
265: private function extract_search_words() {
266: // words to array
267: $this->search = preg_split("/[\s\\,]+/", $this->search, -1, PREG_SPLIT_NO_EMPTY);
268: $this->dublicate_ss('search');
269: }
270:
271: /**
272: * build_phrases_search_query builds the SQL search query for the phrases search in regexp-mode
273: *
274: * @return void
275: * @access private
276: */
277: private function build_phrases_search_query() {
278: $this->build_search_query($this->search_phrase);
279: }
280:
281: /**
282: * build_search_not_query builds the SQL search query for not desired words in regexp-mode
283: *
284: * @return void
285: * @access private
286: */
287: private function build_search_not_query() {
288: // array to string
289: if (!empty($this->search_not)) {
290: $this->search_not = implode("' AND {$this->symptoms_tbl}.symptom NOT REGEXP '", $this->search_not);
291: $this->search_not = "{$this->symptoms_tbl}.symptom NOT REGEXP '{$this->search_not}'";
292: }
293: if (!empty($this->dublicated_ss)) {
294: if (!empty($this->search_not)) {
295: $this->search_not .= " AND ";
296: } else {
297: $this->search_not = "";
298: }
299: $this->search_not .= implode(" AND ", $this->dublicated_ss);
300: unset($this->dublicated_ss);
301: }
302: }
303:
304: /**
305: * build_search_query builds the SQL search query in regexp-mode
306: *
307: * @param array|string &$search receiving an array containing the search strings,
308: * returning a string with the SQL-query
309: * ($this->search|$this->search_phrases)
310: * @return void
311: * @access private
312: */
313: private function build_search_query(&$search) {
314: if (!empty($search)) {
315: $search = implode("' {$this->and_or} {$this->symptoms_tbl}.symptom REGEXP '", $search);
316: $search = "{$this->symptoms_tbl}.symptom REGEXP '$search'";
317: }
318: if (!empty($this->dublicated_ss)) {
319: if (!empty($search)) {
320: $search .= " {$this->and_or} ";
321: } else {
322: $search = "";
323: }
324: $search .= implode(" {$this->and_or} ", $this->dublicated_ss);
325: unset($this->dublicated_ss);
326: }
327: }
328:
329: /**
330: * build_boolean_query builds the SQL search query in boolean-mode
331: *
332: * @param string $operator '+'|'-': If '+' the result has to contain this string if '-' it must not
333: * @param array|string &$search receiving an array containing the search strings,
334: * returning a string with the SQL-query
335: * ($this->search|$this->search_phrases|$this->search_not)
336: * @return void
337: * @access private
338: */
339: private function build_boolean_query($operator, &$search) {
340: if (empty($search)) {
341: $search = '';
342: } else {
343: if ($operator === '+' && $this->and_or === 'OR') {
344: $operator = '';
345: }
346: $search = implode(" $operator", $search);
347: $search = " $operator" . $search;
348: }
349: }
350:
351: /**
352: * dublicate_ss search for a 'ss' or 'ß' in the search string and dublicate them with the counterpart
353: *
354: * This function closes a bug in the German repertories, where some words sometimes are written
355: * with ss and sometimes with ß.
356: *
357: * Every string containing 'ss' is dublicated and the 'ss' is replaced by 'ß'.
358: * Also every string containing 'ß' is dublicated and the 'ß' is replaced by 'ss'.
359: *
360: * @param string $search_ar_name The variable name of the array containing the search strings ('search'|'search_phrase'|'search_not').
361: * @return void
362: * @access private
363: */
364: private function dublicate_ss($search_ar_name) {
365: $search_ar = $this->$search_ar_name;
366: if (!empty($search_ar)) {
367: foreach ($search_ar as $key => $search_string) {
368: if (strpos($search_string, 'ss') !== false) {
369: $duplicate = str_replace('ss', 'ß', $search_string);
370: $this->merge_dublicate_ss($search_ar_name, $duplicate, $key);
371: }
372: if (strpos($search_string, 'ß') !== false) {
373: $duplicate = str_replace('ß', 'ss', $search_string);
374: $this->merge_dublicate_ss($search_ar_name, $duplicate, $key);
375: }
376: }
377: }
378: }
379:
380: /**
381: * merge_dublicate_ss creates the SQL query string for the dublicated search strings containing 'ss' or 'ß'.
382: *
383: * @param string $search_ar_name The variable name of the array containing the search strings ('search'|'search_phrase'|'search_not').
384: * @param string $duplicate The dublicated search string with the replaced 'ss'|'ß'.
385: * @param integer $key The array key of the dublicated search string in the search strings array.
386: * @return void
387: * @access private
388: */
389: private function merge_dublicate_ss($search_ar_name, $duplicate, $key) {
390: $search_ar = &$this->$search_ar_name;
391: if ($this->mode === 'boolean') {
392: $search_ar[$key] = "(" . $search_ar[$key] . " " . $duplicate . ")";
393: } else {
394: $regexp = 'REGEXP';
395: $and_or = 'OR';
396: if ($search_ar_name === 'search_not') {
397: $regexp = 'NOT REGEXP';
398: $and_or = 'AND';
399: }
400: $this->dublicated_ss[] = "({$this->symptoms_tbl}.symptom $regexp '{$search_ar[$key]}' $and_or {$this->symptoms_tbl}.symptom $regexp '$duplicate')";
401: unset($search_ar[$key]);
402: }
403: }
404:
405: }
406: