1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26:
27:
28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38:
39: class Express {
40:
41: 42: 43: 44: 45:
46: public $script = "";
47:
48: 49: 50: 51: 52:
53: public $src_id = "";
54:
55: 56: 57: 58: 59:
60: public $src_lang = "";
61:
62: 63: 64: 65: 66: 67: 68: 69:
70: public $rubric_id = -1;
71:
72: 73: 74: 75: 76:
77: public $rubric_name = "";
78:
79: 80: 81: 82: 83:
84: public $ref_not_found_ar = array();
85:
86: 87: 88: 89: 90:
91: public $rem_error_ar = array();
92:
93: 94: 95: 96: 97:
98: public $count_ar = array();
99:
100: 101: 102: 103: 104: 105: 106:
107: function __construct($sym_rem) {
108: global $db;
109:
110: $this->count_ar['sym']['in'] = 0;
111: $this->count_ar['sym']['ex'] = 0;
112: $this->count_ar['sym']['sim'] = 0;
113: $this->count_ar['sym']['sim_in'] = 0;
114: $this->count_ar['sym']['nonclassic_in'] = 0;
115: $this->count_ar['symrem']['in'] = 0;
116: $this->count_ar['symrem']['ex'] = 0;
117: $this->count_ar['rem']['noex'] = 0;
118: $this->count_ar['rem']['alias'] = 0;
119: $this->count_ar['grade_ch'] = 0;
120: $this->count_ar['status_ch'] = 0;
121: $this->count_ar['kuenzli_ch'] = 0;
122: $this->count_ar['rec']['all'] = 0;
123: $this->count_ar['rec']['nocolon'] = 0;
124: $this->count_ar['rec']['alias']['all'] = 0;
125: $this->count_ar['rec']['alias']['noequal'] = 0;
126: $this->count_ar['rec']['src'] = 0;
127: $this->count_ar['alias']['in'] = 0;
128: $this->count_ar['alias']['ex'] = 0;
129: $this->count_ar['alias']['noex'] = 0;
130: $this->count_ar['main_noex'] = 0;
131: $this->count_ar['parent_noex'] = 0;
132: $this->count_ar['src']['in'] = 0;
133: $this->count_ar['src']['ex'] = 0;
134: $this->count_ar['src']['err'] = 0;
135: $this->count_ar['ref_noex'] = 0;
136: $this->count_ar['no_src'] = 0;
137: $this->count_ar['no_main'] = 0;
138:
139: if (!empty($_POST['src_id'])) {
140: $this->src_id = $_POST['src_id'];
141: $query = "SELECT lang_id FROM sources WHERE src_id = '$this->src_id'";
142: $db->send_query($query);
143: list($this->src_lang) = $db->db_fetch_row();
144: $db->free_result();
145: if (!empty($_POST['rubric_id'])) {
146: $this->rubric_id = $_POST['rubric_id'];
147: $query = "SELECT rubric_" . $this->src_lang . " FROM main_rubrics WHERE rubric_id = " . $this->rubric_id;
148: $db->send_query($query);
149: list($this->rubric_name) = $db->db_fetch_row();
150: $db->free_result();
151: }
152: }
153: $this->create_temporary_express_tables();
154: $this->parse_express_script($sym_rem);
155: }
156:
157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168:
169: private function create_temporary_express_tables() {
170: global $db;
171: $query = "DROP TEMPORARY TABLE IF EXISTS express_symptoms, express_sym_rem, express_alias, express_source";
172: $db->send_query($query);
173: $query = "CREATE TEMPORARY TABLE express_symptoms (
174: sympt_id mediumint(8) unsigned NOT NULL AUTO_INCREMENT,
175: symptom text NOT NULL,
176: rubric_id tinyint(3) unsigned NOT NULL,
177: page smallint(5) unsigned NOT NULL,
178: kuenzli tinyint(1) NOT NULL,
179: extra text NOT NULL,
180: backup text NOT NULL,
181: PRIMARY KEY(sympt_id)
182: ) ENGINE=MyISAM DEFAULT CHARSET=utf8";
183: $db->send_query($query);
184: $query = "CREATE TEMPORARY TABLE express_sym_rem (
185: sympt_id mediumint(8) unsigned NOT NULL,
186: remedy varchar(255) NOT NULL,
187: wert tinyint(1) unsigned NOT NULL,
188: status tinyint(3) unsigned NOT NULL,
189: kuenzli tinyint(1) NOT NULL,
190: ref varchar(12) NOT NULL,
191: nonclassic tinyint(1) NOT NULL,
192: backup text NOT NULL,
193: KEY sympt_id (sympt_id)
194: ) ENGINE=MyISAM DEFAULT CHARSET=utf8";
195: $db->send_query($query);
196: $query = "CREATE TEMPORARY TABLE express_alias (
197: remedy varchar(255) NOT NULL,
198: aliase varchar(255) NOT NULL
199: ) ENGINE=MyISAM DEFAULT CHARSET=utf8";
200: $db->send_query($query);
201: $query = "CREATE TEMPORARY TABLE express_source (
202: src_id varchar(12) NOT NULL,
203: author tinytext NOT NULL,
204: title varchar(200) NOT NULL,
205: year varchar(9) NOT NULL,
206: lang varchar(6) NOT NULL,
207: grade_max tinyint(1) unsigned NOT NULL,
208: src_type varchar(30) NOT NULL,
209: primary_src tinyint(1) NOT NULL
210: ) ENGINE=MyISAM DEFAULT CHARSET=utf8";
211: $db->send_query($query);
212: }
213:
214:
215: 216: 217: 218: 219: 220: 221:
222: private function parse_express_script ($sym_rem) {
223:
224: $prev_symptom = "";
225: $sym_rem = str_replace("\r", "\n", $sym_rem);
226: $sym_rem = preg_replace('/\n[\n\s]*\n/u', "\n", $sym_rem);
227: $sym_rem = str_replace("%", "&percent;", $sym_rem);
228: $sym_rem = str_replace("\n", "%", $sym_rem);
229: if ($sym_rem{strlen($sym_rem)-1} == "%") {
230: $sym_rem = substr_replace($sym_rem, "", -1, 1);
231: }
232: $sym_rem = preg_replace('/\s+/u', ' ', $sym_rem);
233: $sym_rem = explode("%", $sym_rem);
234: foreach ($sym_rem as $key => $value) {
235: $value = str_replace("&percent;", "%", $value);
236: if (strpos($value, ":")) {
237: list($symptom, $remedy) = explode(":", $value, 2);
238: $symptom = trim ($symptom);
239: if ($symptom != "alias" && $symptom != "source" && $symptom != "ref" && !empty($this->src_id)) {
240: $sympt_id = $this->extract_symptom($symptom);
241: if ($sympt_id != 0) {
242: if (preg_match('/\{[\s,]*(.+)\}[\s,]*/u', $remedy, $matches)) {
243: $nonclassic = 1;
244: $this->extract_remedies($matches[1], $sympt_id, $nonclassic);
245: }
246: $remedy = preg_replace('/\s*\{.*\}\s*/u', "", $remedy);
247: if ($remedy != "") {
248: $nonclassic = 0;
249: $this->extract_remedies($remedy, $sympt_id, $nonclassic);
250: }
251: } else {
252: $this->script .= $symptom . ": " . $remedy . "\n";
253: }
254: } elseif ($symptom == "alias") {
255: $this->extract_alias($remedy);
256: } elseif ($symptom == "source" || $symptom == "ref") {
257: if ($symptom == "source") {
258: $primary_src = 1;
259: } else {
260: $primary_src = 0;
261: }
262: $this->extract_source($remedy, $primary_src);
263: } else {
264: $this->script .= $symptom . ": " . $remedy . "\n";
265: $this->count_ar['no_src']++;
266: }
267: } else {
268: $this->script .= $value . "\n";
269: $prev_symptom = "";
270: $this->count_ar['rec']['nocolon']++;
271: }
272: }
273: }
274:
275:
276: 277: 278: 279: 280: 281: 282:
283: private function ($symptom_string) {
284:
285: global $db, $prev_symptom;
286: $rubric_id = $this->rubric_id;
287: $page = 0;
288: $kuenzli = 0;
289: $extra = "";
290: $custom_rubric = false;
291: $backref = 0;
292:
293: $symptom_backup = $symptom_string;
294: if (stripos($symptom_string, "s.") !== false || stripos($symptom_string, "p.") !== false) {
295: preg_match('/[sp]\.\s*(\d+)/iu', $symptom_string, $matches);
296: $page = $matches[1];
297: $symptom_string = preg_replace('/\s*[sp]\.\s*\d+\s*/iu', "", $symptom_string);
298: }
299: if (strpos($symptom_string, "@") !== false) {
300: $kuenzli = 1;
301: $symptom_string = preg_replace('/\s*\@\s*/u', "", $symptom_string);
302: }
303: if (strpos($symptom_string, "(") !== false && strpos($symptom_string, ")") !== false) {
304: preg_match('/\((.*)\)/u', $symptom_string, $matches);
305: $extra = $matches[1];
306: $symptom_string = preg_replace('/\(.*\)/u', "", $symptom_string);
307: }
308: $symptom_string = preg_replace('/\s*,\s*$/u', "", $symptom_string);
309:
310: if (strpos($symptom_string, ">") === 0) {
311: preg_match('/^(>+)/u', $symptom_string, $arrows);
312: $backref = strspn($arrows[1], ">");
313: $symptom_string = preg_replace('/^>+\s*/u', "", $symptom_string);
314: }
315: if ($backref != 0 && $prev_symptom != "") {
316: if ($backref != 1) {
317: $prev_symptom_ar = explode(" > ", $prev_symptom);
318: array_splice($prev_symptom_ar, -($backref-1));
319: $prev_symptom = implode(" > ", $prev_symptom_ar);
320: }
321: $symptom_string = $prev_symptom . " > " . $symptom_string;
322: }
323: if ($backref != 0 && $prev_symptom == "") {
324: $this->count_ar['parent_noex']++;
325: return 0;
326: }
327: if (strpos($symptom_string, ">>")) {
328: preg_match('/^(.+)>>/u', $symptom_string, $matches);
329: $rubric_name = $matches[1];
330: $rubric_name = trim($rubric_name);
331: $symptom_string = preg_replace('/^.+>>\s*/u', "", $symptom_string);
332: $query = "SELECT rubric_id FROM main_rubrics WHERE rubric_" . $this->src_lang . " = '$rubric_name'";
333: $db->send_query($query);
334: list($custom_rubric_id) = $db->db_fetch_row();
335: $db->free_result();
336: if (!empty($custom_rubric_id)) {
337: $rubric_id = $custom_rubric_id;
338: $custom_rubric = true;
339: } else {
340: $this->count_ar['main_noex']++;
341: $prev_symptom = "";
342: return 0;
343: }
344: }
345: if ($rubric_id == -1) {
346: $this->count_ar['no_main']++;
347: $prev_symptom = "";
348: return 0;
349: }
350: $symptom_string = preg_replace('/\s*>\s*/u', " > ", $symptom_string);
351: $prev_symptom = $symptom_string;
352: if ($custom_rubric === true) {
353: $prev_symptom = $rubric_name . " >> " . $prev_symptom;
354: }
355: $symptom_string = $db->escape_string($symptom_string);
356: $extra = $db->escape_string($extra);
357: $page = $db->escape_string($page);
358: $query = "INSERT INTO express_symptoms (symptom, rubric_id, page, kuenzli, extra, backup) VALUES ('$symptom_string', $rubric_id, $page, $kuenzli, '$extra', '$symptom_backup')";
359: $db->send_query($query);
360: $id = $db->db_insert_id();
361: return $id;
362: }
363:
364:
365: 366: 367: 368: 369: 370: 371: 372: 373:
374: private function ($rem_string, $symt_id, $nonclassic) {
375:
376: global $db;
377:
378: $rem_string = preg_replace('/\s+/u', "", $rem_string);
379: $rem_string = preg_replace('/,+/u', ",", $rem_string);
380: if ($rem_string{strlen($rem_string)-1} == ",") {
381: $rem_string = substr_replace($rem_string, "", -1, 1);
382: }
383: $rem_ar = explode(",", $rem_string);
384: foreach ($rem_ar as $key => $remedy) {
385: $kuenzli = 0;
386: $status_id = 0;
387: $grade = 1;
388: unset($ref_ar);
389: $rem_backup = $remedy;
390: if (strpos($remedy, "@") !== false) {
391: $kuenzli = 1;
392: $remedy = str_replace("@", "", $remedy);
393: }
394: $query = "SELECT status_id, status_symbol FROM sym_status";
395: $db->send_query($query);
396: while($status = $db->db_fetch_row()) {
397: if (!empty($status[1]) && strpos($remedy, $status[1]) !== false) {
398: $status_id = $status[0];
399: $status_symbol = $status[1];
400: $remedy = str_replace("$status_symbol", "", $remedy);
401: break;
402: }
403: }
404: $db->free_result();
405: if (strpos($remedy, "#") !== false) {
406: $remedy = preg_replace('/#+/u', "#", $remedy);
407: if ($remedy{strlen($remedy)-1} == "#") {
408: $remedy = substr_replace($remedy, "", -1, 1);
409: }
410: $ref_ar = explode("#", $remedy);
411: $remedy = array_shift($ref_ar);
412: foreach ($ref_ar as $key2 => $ref) {
413: $query = "SELECT COUNT(*) FROM sources WHERE src_id = '$ref'";
414: $db->send_query($query);
415: list($ref_count) = $db->db_fetch_row();
416: $db->free_result();
417: if ($ref_count == 0) {
418: $this->ref_not_found_ar[] = $ref;
419: unset($ref_ar[$key2]);
420: $this->count_ar['ref_noex']++;
421: }
422: }
423: }
424: $remedy = str_replace(".", "", $remedy);
425: if (strpos($remedy, "-") !== false) {
426: if (preg_match('/-([1-5])$/u', $remedy, $matches)) {
427: $grade = $matches[1];
428: }
429: $remedy = preg_replace('/-[1-5]?$/u', "", $remedy);
430: }
431: if (!empty($ref_ar)) {
432: $refs = implode("#", $ref_ar);
433: } else {
434: $refs = "";
435: }
436: $query = "INSERT INTO express_sym_rem (sympt_id, remedy, wert, status, kuenzli, ref, nonclassic, backup) VALUES ($symt_id, '$remedy', $grade, $status_id, $kuenzli, '$refs', $nonclassic, '$rem_backup')";
437: $db->send_query($query);
438: }
439: }
440:
441:
442: 443: 444: 445: 446: 447: 448:
449: function ($alias_string) {
450:
451: global $db;
452:
453: if (strpos($alias_string, "=")) {
454: $alias_string = preg_replace('/\s+/u', "", $alias_string);
455: list($remedy, $aliase) = explode("=", $alias_string, 2);
456: $remedy = str_replace(".", "", $remedy);
457: $aliase = preg_replace('/,+/u', ", ", $aliase);
458: $aliase = rtrim($aliase);
459: if ($aliase{strlen($aliase)-1} == ",") {
460: $aliase = substr_replace($aliase, "", -1, 1);
461: }
462: $query = "INSERT INTO express_alias (remedy, aliase) VALUES ('$remedy', '$aliase')";
463: $db->send_query($query);
464: } else {
465: $this->script .= "alias: $remedy = $aliase\n";
466: $this->count_ar['rec']['alias']['noequal']++;
467: }
468: }
469:
470:
471: 472: 473: 474: 475: 476: 477: 478:
479: private function ($source_string, $primary_src) {
480:
481: global $db;
482: $src_type = "Repertorium";
483: $error = 0;
484: $found_lang = 0;
485:
486: $source_string = preg_replace('/\s*#[\s#]*/u', "#", $source_string);
487: $source_string = trim($source_string);
488: if ($source_string{strlen($source_string)-1} == "#") {
489: $source_string = substr_replace($source_string, "", -1, 1);
490: }
491: $source_ar = explode("#",$source_string);
492: foreach ($source_ar as $key => $value) {
493: $source_ar[$key] = trim($value);
494: }
495: if (count($source_ar) == 6) {
496: list($src_id, $author, $title, $year, $lang, $grade_max) = $source_ar;
497: } elseif (count($source_ar) == 7) {
498: list($src_id, $author, $title, $year, $lang, $grade_max, $src_type) = $source_ar;
499: } else {
500: $error = 1;
501: }
502: if (strlen($src_id) > 12) {
503: $error = 1;
504: }
505: $author = ucfirst($author);
506: $title = ucfirst($title);
507: $year = preg_replace('/\s+/u', "", $year);
508: if (preg_match('/^\d\d\d\d-?$/u', $year) == 0 && preg_match('/^\d\d\d\d-\d\d\d\d$/u', $year) == 0) {
509: $error = 1;
510: }
511: $lang = strtolower($lang);
512: $query = "SELECT COUNT(*) FROM languages WHERE lang_id = '$lang'";
513: $db->send_query($query);
514: list($found_lang) = $db->db_fetch_row();
515: $db->free_result();
516: if ($found_lang == 0) {
517: $error = 1;
518: }
519: if (preg_match('/^[1-5]$/u', $grade_max) == 0) {
520: $error = 1;
521: }
522: $src_type = ucwords(strtolower($src_type));
523: if ($error == 1) {
524: if ($primary_src = 1) {
525: $keyword = "source";
526: } elseif ($primary_src = 0) {
527: $keyword = "ref";
528: }
529: $this->script .= "$keyword: ". implode("#", $source_ar) . "\n";
530: $this->count_ar['src']['err']++;
531: } else {
532: $author = $db->escape_string($author);
533: $title = $db->escape_string($title);
534: $year = $db->escape_string($year);
535: $src_type = $db->escape_string($src_type);
536: $query = "INSERT INTO express_source (src_id, author, title, year, lang, grade_max, src_type, primary_src) VALUES ('$src_id', '$author', '$title', '$year', '$lang', '$grade_max', '$src_type', '$primary_src')";
537: $db->send_query($query);
538: }
539: }
540:
541:
542: 543: 544: 545: 546: 547: 548: 549: 550: 551: 552: 553: 554: 555: 556: 557: 558: 559: 560: 561:
562: public function insert_remedy($sympt_id, $sym_id, $current_user, $is_duplicated_symptom) {
563:
564: global $db;
565: $duplicated_ar = array();
566:
567: $query = "SELECT remedy, wert, status, kuenzli, ref, nonclassic, backup FROM express_sym_rem WHERE sympt_id = '$sympt_id'";
568: $result_sym_rem = $db->send_query($query);
569: while (list($rem_short, $grade, $status_id, $kuenzli, $refs, $nonclassic, $backup) = $db->db_fetch_row($result_sym_rem)) {
570: $ref_ar = explode("#", $refs);
571: if ($is_duplicated_symptom != 1) {
572: $query = "SELECT rem_id FROM remedies WHERE rem_short = '$rem_short.' OR rem_short = '$rem_short'";
573: $db->send_query($query);
574: $rem_id = $db->db_fetch_row();
575: $db->free_result();
576: $rem_id = $rem_id[0];
577: if (empty($rem_id)) {
578: $query = "SELECT rem_id FROM rem_alias WHERE alias_short = '$rem_short.' OR alias_short = '$rem_short'";
579: $db->send_query($query);
580: $rem_id = $db->db_fetch_row();
581: $db->free_result();
582: $rem_id = $rem_id[0];
583: if (!empty($rem_id)) {
584: $this->count_ar['rem']['alias']++;
585: }
586: }
587: if (!empty($rem_id)) {
588: $query = "SELECT rel_id, grade, status_id, kuenzli, username FROM sym_rem WHERE sym_id = '$sym_id' AND rem_id = '$rem_id' AND src_id = '$this->src_id'";
589: $db->send_query($query);
590: $beziehung = $db->db_fetch_row();
591: $db->free_result();
592: $rel_id = $beziehung[0];
593: if (!empty($rel_id)) {
594: if ($beziehung[4] == $current_user) {
595: $update_wert = 0;
596: $update_status = 0;
597: $update_kuenzli = 0;
598: if ($beziehung[1] != $grade) {
599: $update_wert = 1;
600: $this->count_ar['grade_ch']++;
601: }
602: if ($beziehung[2] != $status_id) {
603: $update_status = 1;
604: $this->count_ar['status_ch']++;
605: }
606: if ($beziehung[3] != $kuenzli) {
607: $update_kuenzli = 1;
608: $this->count_ar['kuenzli_ch']++;
609: }
610: if ($update_wert = 1 || $update_status = 1 || $update_kuenzli = 1) {
611: $archive_type = "express_update";
612: $where = "rel_id = $rel_id";
613: $db->archive_table_row("sym_rem", $where, $archive_type);
614: $query = "UPDATE sym_rem SET ";
615: if ($update_wert = 1) {
616: $query .= "grade = $grade, ";
617: }
618: if ($update_status = 1) {
619: $query .= "status_id = $status_id, ";
620: }
621: if ($update_kuenzli = 1) {
622: $query .= "kuenzli = $kuenzli, ";
623: }
624: if (substr($query, -2) == ", ") {
625: $query = substr_replace($query, " ", -2);
626: }
627: $query .= "WHERE $where";
628: $db->send_query($query);
629: }
630: }
631: $this->count_ar['symrem']['ex']++;
632: } else {
633: $query = "INSERT INTO sym_rem (sym_id, rem_id, grade, src_id, status_id, kuenzli, username) VALUES ('$sym_id', '$rem_id', '$grade', '$this->src_id', '$status_id', '$kuenzli', '$current_user')";
634: $db->send_query($query);
635: $rel_id = $db->db_insert_id();
636: $this->count_ar['symrem']['in']++;
637: if ($nonclassic == 1) {
638: $this->count_ar['sym']['nonclassic_in']++;
639: }
640: }
641: if (!empty($ref_ar) || $nonclassic == 1) {
642: if (empty($ref_ar)) {
643: $ref_ar[0] = $this->src_id;
644: }
645: foreach ($ref_ar as $ref) {
646: $query = "SELECT COUNT(*) FROM sym_rem_refs WHERE rel_id = '$rel_id' AND src_id = '$ref' AND nonclassic = '$nonclassic'";
647: $db->send_query($query);
648: list($ref_count) = $db->db_fetch_row();
649: $db->free_result();
650: if ($ref_count == 0) {
651: $query = "INSERT INTO sym_rem_refs (rel_id,src_id,nonclassic,username) VALUES ('$rel_id', '$ref', '$nonclassic', '$current_user')";
652: $db->send_query($query);
653: }
654: }
655: }
656: } else {
657: if ($nonclassic == 1) {
658: $nonclassic = "nonclassic";
659: } else {
660: $nonclassic = "classic";
661: }
662: $query = "SELECT symptom FROM symptoms WHERE sym_id = '$sym_id'";
663: $db->send_query($query);
664: list($symptom) = $db->db_fetch_row();
665: $db->free_result();
666: $this->rem_error_ar[$symptom][$nonclassic][$rem_short] = $backup;
667: $this->count_ar['rem']['noex']++;
668: }
669: } else {
670: if ($nonclassic == 1) {
671: $nonclassic = "nonclassic";
672: } else {
673: $nonclassic = "classic";
674: }
675: $duplicated_ar[$nonclassic][] = $backup;
676: }
677: }
678: $db->free_result($result_sym_rem);
679: if (!empty($duplicated_ar)) {
680: if (!empty($duplicated_ar['classic'])) {
681: foreach ($duplicated_ar['classic'] as $rem_backup) {
682: $this->script .= $rem_backup . ", ";
683: }
684: if (empty($duplicated_ar['nonclassic'])) {
685: $this->script = substr($this->script, 0, -2);
686: }
687: }
688: if (!empty($duplicated_ar['nonclassic'])) {
689: $this->script .= "{";
690: foreach ($duplicated_ar['nonclassic'] as $rem_backup) {
691: $this->script .= $rem_backup . ", ";
692: }
693: $this->script = substr($this->script, 0, -2);
694: $this->script .= "}";
695: }
696: $this->script .= "\n";
697: }
698: }
699:
700:
701: 702: 703: 704: 705: 706: 707: 708: 709: 710: 711:
712: public function build_select_duplicated_symptoms_query($symptom, &$symptom1_similar_ar, &$symptom2_similar_ar) {
713:
714: global $percentage_similarity, $number_duplicated_records, $db;
715:
716: $query_select_all = "SELECT `sym_id`, `symptom` FROM `symptoms` WHERE `rubric_id` = '$this->rubric_id' AND lang_id = '$this->src_lang'";
717: $db->send_query($query_select_all);
718: $where_clause = "";
719: while ($symptom_row = $db->db_fetch_row()){
720: similar_text(strtolower($symptom), strtolower($symptom_row[1]), $percentage);
721: if ($percentage < $percentage_similarity){
722: $words_are_similar = $this->similar_words(strtolower($symptom), strtolower($symptom_row[1]));
723: }
724: if ($percentage >= $percentage_similarity || $words_are_similar === true){
725: $where_clause .= "`sym_id` = '".$symptom_row[0]."' OR ";
726: $symptom1_similar_ar[]=$symptom;
727: $symptom2_similar_ar[]=$symptom_row[1];
728: }
729: }
730: $db->free_result();
731:
732: if (!empty($where_clause)){
733: $where_clause = substr($where_clause, 0, -4);
734: $query = "SELECT `symptoms`.`sym_id`, `symptoms`.`symptom`, `rubrics__1`.`rubric_" . $this->src_lang . "` AS `rubrics__rubric_" . $this->src_lang . "__1`, `symptoms`.`username` FROM `symptoms` LEFT JOIN `main_rubrics` AS `rubrics__1` ON `symptoms`.`rubric_id` = `rubrics__1`.`rubric_id` WHERE " . $where_clause;
735: }
736: else {
737: $query = "";
738: }
739: return $query;
740: }
741:
742:
743: 744: 745: 746: 747: 748: 749: 750:
751: private function similar_words ($string, $compare_string) {
752: global $percentage_similarity, $similar_words_strict;
753: $words_ar = $this->build_clean_words_array ($string);
754: $compare_words_ar = $this->build_clean_words_array ($compare_string);
755: if (($similar_words_strict && count($words_ar) == count($compare_words_ar)) || (!$similar_words_strict && (max(count($words_ar), count($compare_words_ar)) - min(count($words_ar), count($compare_words_ar))) <= (max(count($words_ar), count($compare_words_ar)) * $percentage_similarity / 100))) {
756: $i = 0;
757: $found_words_ar = array();
758: if (!empty($words_ar)) {
759: foreach ($words_ar as $word) {
760: if (array_search($word, $found_words_ar) === false OR array_search($word, $compare_words_ar) !== false) {
761: $count_words = count(array_keys($words_ar, $word));
762: $count_compare_words = count(array_keys($compare_words_ar, $word));
763: if (($similar_words_strict && $count_words == $count_compare_words) || (!$similar_words_strict && (max($count_words, $count_compare_words) - min($count_words, $count_compare_words)) <= (max($count_words, $count_compare_words) * $percentage_similarity / 100))) {
764: $i += $count_words;
765: }
766: $found_words_ar[] = $word;
767: }
768: }
769: }
770: if (($similar_words_strict && count($words_ar) == $i) || (!$similar_words_strict && (count($words_ar) - $i <= count($words_ar) * $percentage_similarity / 100))) {
771: return true;
772: } else {
773: return false;
774: }
775: } else {
776: return false;
777: }
778: }
779:
780: 781: 782: 783: 784: 785: 786:
787: private function build_clean_words_array ($string) {
788:
789: global $session;
790:
791: unset($clean_words_ar);
792: $lang = $session->lang;
793: if ($lang == "de") {
794: $whitelist = array("rot");
795: $blacklist = array("aber", "oder", "wenn", "sind", "einer", "eine", "eines", "beim", "durch", "nach", "während", "gegen");
796: } elseif ($lang == "en") {
797: $whitelist = array("red");
798: $blacklist = array("also");
799: }
800: $words_ar = str_word_count($string, 1);
801: foreach ($words_ar as $word) {
802: if ((strlen($word) > 3 AND array_search($word, $blacklist) === false) OR array_search($word, $whitelist) !== false) {
803: $clean_words_ar[] = $word;
804: }
805: }
806: return $clean_words_ar;
807: }
808:
809: 810: 811: 812: 813: 814: 815:
816: public function build_possible_duplication_table($result)
817:
818:
819:
820:
821: {
822: global $db, $enable_row_highlighting;
823:
824:
825:
826:
827: $results_table = "";
828:
829: $tr_results_class = 'tr_results_1';
830:
831:
832: while ($symptom_row = $db->db_fetch_row($result)){
833:
834: if ($tr_results_class === 'tr_results_1') {
835: $tr_results_class = 'tr_results_2';
836: }
837: else {
838: $tr_results_class = 'tr_results_1';
839: }
840:
841:
842:
843: $where_value = $symptom_row[0];
844:
845:
846:
847: if ($enable_row_highlighting === 1) {
848: $results_table .= " <tr class='$tr_results_class' onmouseover=\"if (this.className!='tr_highlighted_onclick'){this.className='tr_highlighted_onmouseover'}\" onmouseout=\"if (this.className!='tr_highlighted_onclick'){this.className='$tr_results_class'}\" onclick=\"if (this.className == 'tr_highlighted_onclick'){ this.className='$tr_results_class';}else{ this.className='tr_highlighted_onclick';}\">";
849: } else {
850: $results_table .= " <tr class='" . $tr_results_class . "'>";
851: }
852:
853: $results_table .= " <td>$symptom_row[0]</td>\n <td>$symptom_row[1]</td>\n <td>$symptom_row[2]</td>\n <td>".$symptom_row[3]."</td>\n";
854: $results_table .= " </tr>\n";
855: }
856:
857: return $results_table;
858:
859: }
860: }
861: