#!/usr/bin/python # -* encoding: utf-8 *- # Copyright (C) 2008 Jonas Maurus # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from mod_python import apache from mod_python import util from datetime import datetime import simplejson import MySQLdb import string import re MAX_RESULTS = 8 def _get_date(date): if isinstance(date, datetime): return date.strftime('%m/%d/%Y %H:%M') else: return '' """ this expects the columns to be in a certain order, namely: url, description, notes, posted""" def _convert_row_to_result(row, foundby): return {'url': row[1], 'desc': row[2], 'notes': row[3], 'posted': _get_date(row[4]), 'foundby': foundby} def process_query(query, conn): global MAX_RESULTS st = conn.cursor() query_parts = re.split(',+\s+|,+|\s+', query) if len(query_parts) == 0: return(None, None) # matches[] contains the autocomplete suggestions matches = [] # results[] contains the actual posts that # matched one or more tags results = [] # text_results[] contains the posts that # matched a fulltext query text_results = [] # 1. find all parts of the query that match # existing tags tags = [] tag_ids = [] unmatched = [] for part in query_parts: st.execute("""SELECT id FROM mnet_delicious_tags WHERE tag = %s""", (part.encode('utf-8').replace('%', '%%'),)) if st.rowcount > 0: row = st.fetchone() tag_ids.append(row[0]) tags.append(part.encode('utf-8')) else: unmatched.append(part) # 2. match the last part of the query against # possible tags if the user didn't put a # delimiter at the end if query_parts[-1] != '': st.execute("""SELECT count(post_id) AS count, tag_id, t.tag FROM mnet_delicious_posts_tags, mnet_delicious_tags t WHERE t.id=tag_id AND tag LIKE %s GROUP BY tag_id ORDER BY count DESC""", ((query_parts[-1].replace('%', '%%') + '%').encode('utf-8'),)) if st.rowcount > 0: for row in st: tag = row[2] completion = tag[len(query_parts[-1]):] # don't add duplicates, this can happen if a tag matches in 1. if not (tag in tags): tags.append(tag) # if the user input already matched a full tag in step 1. don't add # a second tag, because step 3 will only match posts that match # ALL tags if not tag_ids: tag_ids.append(row[1]) matches.append({'part': query + completion}) # 3. get all results that contain the identified tags already_found = [] if len(tag_ids) > 0: sql = 'SELECT p.id, p.url, p.description, p.notes, p.posted FROM ' + \ ' mnet_delicious_posts p WHERE ' connector = '' for tag_id in tag_ids: sql = sql + connector + \ ' EXISTS(SELECT * FROM mnet_delicious_posts_tags WHERE ' + \ ' tag_id=' + str(tag_id) + ' AND post_id=p.id) ' connector = ' AND ' sql = sql + ' ORDER BY p.posted DESC' st.execute(sql) for row in st: results.append(_convert_row_to_result(row, 'tag')) already_found.append(row[0]) if len(results) == MAX_RESULTS: break tag_results = len(results) # 4. if we don't have MAX_RESULTS results, yet, we now do a fulltext search # against the posts' descriptions fulltext_query = string.join(query_parts, ' ').encode('utf-8') fulltext_query = fulltext_query.replace('%', '%%') if len(results) < MAX_RESULTS: st.execute("""SELECT id, url, description, notes, posted, MATCH(notes) AGAINST(%s)+MATCH(description) AGAINST(%s) AS relevance FROM mnet_delicious_posts WHERE MATCH(notes) AGAINST(%s) OR MATCH(description) AGAINST(%s) ORDER BY relevance DESC""", (fulltext_query,fulltext_query,fulltext_query,fulltext_query,)) for row in st: if not (row[0] in already_found): # prevent dupes text_results.append(_convert_row_to_result(row, 'fulltext')) while (len(results) < MAX_RESULTS and len(text_results) > 0): results.append(text_results.pop(0)) # now assemble the resulting object num_text_results = MAX_RESULTS if len(text_results) < MAX_RESULTS: num_text_results = len(text_results) return {'tag_results': tag_results, 'fulltext_results': num_text_results, 'query': query, 'fulltext_query': fulltext_query, 'matches': matches, 'results': results, 'recognized_tags': tags} def get_newest_posts(conn): global MAX_RESULTS st = conn.cursor() st.execute("""SELECT id, url, description, notes, posted FROM mnet_delicious_posts ORDER BY posted DESC LIMIT %s""" % (MAX_RESULTS,)) results = [] for row in st: results.append(_convert_row_to_result(row, 'date')) return {'tag_results': st.rowcount, 'fulltext_results': 0, 'query': '', 'fulltext_query': '', 'matches': '', 'results': results, 'recognized_tags': []} def handler(req): # prepare response headers req.content_type = "text/javascript; charset=utf-8" req.headers_out['Pragma'] = 'no-cache' req.headers_out['Cache-Control'] = 'no-cache' req.headers_out['Expires'] = '-1' # initialize database connection conn = MySQLdb.connect(user='USER', db='DB', passwd='PASSWORD') # get request data data = util.FieldStorage(req, keep_blank_values=1) if data.has_key('query'): ret = process_query(data['query'], conn) req.write(simplejson.dumps(ret)) else: ret = get_newest_posts(conn) req.write(simplejson.dumps(ret)) conn.close() return apache.OK