mirror of https://github.com/oxen-io/session-ios
				
				
				
			
			You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			252 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Swift
		
	
			
		
		
	
	
			252 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Swift
		
	
| //
 | |
| //  Copyright (c) 2019 Open Whisper Systems. All rights reserved.
 | |
| //
 | |
| 
 | |
| import Foundation
 | |
| 
 | |
| // Create a searchable index for objects of type T
 | |
| public class SearchIndexer<T> {
 | |
| 
 | |
|     private let indexBlock: (T, YapDatabaseReadTransaction) -> String
 | |
| 
 | |
|     public init(indexBlock: @escaping (T, YapDatabaseReadTransaction) -> String) {
 | |
|         self.indexBlock = indexBlock
 | |
|     }
 | |
| 
 | |
|     public func index(_ item: T, transaction: YapDatabaseReadTransaction) -> String {
 | |
|         return normalize(indexingText: indexBlock(item, transaction))
 | |
|     }
 | |
| 
 | |
|     private func normalize(indexingText: String) -> String {
 | |
|         return FullTextSearchFinder.normalize(text: indexingText)
 | |
|     }
 | |
| }
 | |
| 
 | |
| @objc
 | |
| public class FullTextSearchFinder: NSObject {
 | |
| 
 | |
|     // MARK: - Dependencies
 | |
| 
 | |
|     private static var tsAccountManager: TSAccountManager {
 | |
|         return TSAccountManager.sharedInstance()
 | |
|     }
 | |
| 
 | |
|     // MARK: - Querying
 | |
| 
 | |
|     // We want to match by prefix for "search as you type" functionality.
 | |
|     // SQLite does not support suffix or contains matches.
 | |
|     public class func query(searchText: String) -> String {
 | |
|         // 1. Normalize the search text.
 | |
|         //
 | |
|         // TODO: We could arguably convert to lowercase since the search
 | |
|         // is case-insensitive.
 | |
|         let normalizedSearchText = FullTextSearchFinder.normalize(text: searchText)
 | |
| 
 | |
|         // 2. Split the non-numeric text into query terms (or tokens).
 | |
|         let nonNumericText = String(String.UnicodeScalarView(normalizedSearchText.unicodeScalars.lazy.map {
 | |
|             if CharacterSet.decimalDigits.contains($0) {
 | |
|                 return " "
 | |
|             } else {
 | |
|                 return $0
 | |
|             }
 | |
|         }))
 | |
|         var queryTerms = nonNumericText.split(separator: " ")
 | |
| 
 | |
|         // 3. Add an additional numeric-only query term.
 | |
|         let digitsOnlyScalars = normalizedSearchText.unicodeScalars.lazy.filter {
 | |
|             CharacterSet.decimalDigits.contains($0)
 | |
|         }
 | |
|         let digitsOnly: Substring = Substring(String(String.UnicodeScalarView(digitsOnlyScalars)))
 | |
|         queryTerms.append(digitsOnly)
 | |
| 
 | |
|         // 4. De-duplicate and sort query terms.
 | |
|         //    Duplicate terms are redundant.
 | |
|         //    Sorting terms makes the output of this method deterministic and easier to test,
 | |
|         //        and the order won't affect the search results.
 | |
|         queryTerms = Array(Set(queryTerms)).sorted()
 | |
| 
 | |
|         // 5. Filter the query terms.
 | |
|         let filteredQueryTerms = queryTerms.filter {
 | |
|             // Ignore empty terms.
 | |
|             $0.count > 0
 | |
|         }.map {
 | |
|             // Allow partial match of each term.
 | |
|             //
 | |
|             // Note that we use double-quotes to enclose each search term.
 | |
|             // Quoted search terms can include a few more characters than
 | |
|             // "bareword" (non-quoted) search terms.  This shouldn't matter,
 | |
|             // since we're filtering all of the affected characters, but
 | |
|             // quoting protects us from any bugs in that logic.
 | |
|             "\"\($0)\"*"
 | |
|         }
 | |
| 
 | |
|         // 6. Join terms into query string.
 | |
|         let query = filteredQueryTerms.joined(separator: " ")
 | |
|         return query
 | |
|     }
 | |
| 
 | |
|     public func enumerateObjects(searchText: String, maxSearchResults: Int? = nil, transaction: YapDatabaseReadTransaction, block: @escaping (Any, String) -> Void) {
 | |
|         guard let ext: YapDatabaseFullTextSearchTransaction = ext(transaction: transaction) else {
 | |
|             return
 | |
|         }
 | |
| 
 | |
|         let query = FullTextSearchFinder.query(searchText: searchText)
 | |
| 
 | |
|         let maxSearchResults = maxSearchResults ?? 500
 | |
|         var searchResultCount = 0
 | |
|         let snippetOptions = YapDatabaseFullTextSearchSnippetOptions()
 | |
|         snippetOptions.startMatchText = ""
 | |
|         snippetOptions.endMatchText = ""
 | |
|         snippetOptions.numberOfTokens = 5
 | |
|         ext.enumerateKeysAndObjects(matching: query, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer<ObjCBool>) in
 | |
|             guard searchResultCount < maxSearchResults else {
 | |
|                 stop.pointee = true
 | |
|                 return
 | |
|             }
 | |
|             searchResultCount += 1
 | |
| 
 | |
|             block(object, snippet)
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // MARK: - Normalization
 | |
| 
 | |
|     fileprivate static var charactersToRemove: CharacterSet = {
 | |
|         // * We want to strip punctuation - and our definition of "punctuation"
 | |
|         //   is broader than `CharacterSet.punctuationCharacters`.
 | |
|         // * FTS should be robust to (i.e. ignore) illegal and control characters,
 | |
|         //   but it's safer if we filter them ourselves as well.
 | |
|         var charactersToFilter = CharacterSet.punctuationCharacters
 | |
|         charactersToFilter.formUnion(CharacterSet.illegalCharacters)
 | |
|         charactersToFilter.formUnion(CharacterSet.controlCharacters)
 | |
| 
 | |
|         // We want to strip all ASCII characters except:
 | |
|         // * Letters a-z, A-Z
 | |
|         // * Numerals 0-9
 | |
|         // * Whitespace
 | |
|         var asciiToFilter = CharacterSet(charactersIn: UnicodeScalar(0x0)!..<UnicodeScalar(0x80)!)
 | |
|         assert(!asciiToFilter.contains(UnicodeScalar(0x80)!))
 | |
|         asciiToFilter.subtract(CharacterSet.alphanumerics)
 | |
|         asciiToFilter.subtract(CharacterSet.whitespacesAndNewlines)
 | |
|         charactersToFilter.formUnion(asciiToFilter)
 | |
| 
 | |
|         return charactersToFilter
 | |
|     }()
 | |
| 
 | |
|     // This is a hot method, especially while running large migrations.
 | |
|     // Changes to it should go through a profiler to make sure large migrations
 | |
|     // aren't adversely affected.
 | |
|     @objc
 | |
|     public class func normalize(text: String) -> String {
 | |
|         // 1. Filter out invalid characters.
 | |
|         let filtered = text.removeCharacters(characterSet: charactersToRemove)
 | |
| 
 | |
|         // 2. Simplify whitespace.
 | |
|         let simplified = filtered.replaceCharacters(characterSet: .whitespacesAndNewlines,
 | |
|                                                     replacement: " ")
 | |
| 
 | |
|         // 3. Strip leading & trailing whitespace last, since we may replace
 | |
|         // filtered characters with whitespace.
 | |
|         return simplified.trimmingCharacters(in: .whitespacesAndNewlines)
 | |
|     }
 | |
| 
 | |
|     // MARK: - Index Building
 | |
| 
 | |
|     private static let groupThreadIndexer: SearchIndexer<TSGroupThread> = SearchIndexer { (groupThread: TSGroupThread, transaction: YapDatabaseReadTransaction) in
 | |
|         let groupName = groupThread.groupModel.groupName ?? ""
 | |
| 
 | |
|         let memberStrings = groupThread.groupModel.groupMemberIds.map { recipientId in
 | |
|             recipientIndexer.index(recipientId, transaction: transaction)
 | |
|         }.joined(separator: " ")
 | |
| 
 | |
|         return "\(groupName) \(memberStrings)"
 | |
|     }
 | |
| 
 | |
|     private static let contactThreadIndexer: SearchIndexer<TSContactThread> = SearchIndexer { (contactThread: TSContactThread, transaction: YapDatabaseReadTransaction) in
 | |
|         let recipientId =  contactThread.contactSessionID()
 | |
|         var result = recipientIndexer.index(recipientId, transaction: transaction)
 | |
| 
 | |
|         if IsNoteToSelfEnabled(),
 | |
|             let localNumber = tsAccountManager.storedOrCachedLocalNumber(transaction),
 | |
|             localNumber == recipientId {
 | |
| 
 | |
|             let noteToSelfLabel = NSLocalizedString("NOTE_TO_SELF", comment: "Label for 1:1 conversation with yourself.")
 | |
|             result += " \(noteToSelfLabel)"
 | |
|         }
 | |
| 
 | |
|         return result
 | |
|     }
 | |
| 
 | |
|     private static let recipientIndexer: SearchIndexer<String> = SearchIndexer { (recipientId: String, transaction: YapDatabaseReadTransaction) in
 | |
|         var result = "\(recipientId)"
 | |
|         if let contact = Storage.shared.getContact(with: recipientId) {
 | |
|             if let name = contact.name { result += " \(name)" }
 | |
|             if let nickname = contact.nickname { result += " \(nickname)" }
 | |
|         }
 | |
|         return result
 | |
|     }
 | |
| 
 | |
|     private static let messageIndexer: SearchIndexer<TSMessage> = SearchIndexer { (message: TSMessage, transaction: YapDatabaseReadTransaction) in
 | |
|         if let bodyText = message.bodyText(with: transaction) {
 | |
|             return bodyText
 | |
|         }
 | |
|         return ""
 | |
|     }
 | |
| 
 | |
|     private class func indexContent(object: Any, transaction: YapDatabaseReadTransaction) -> String? {
 | |
|         if let groupThread = object as? TSGroupThread {
 | |
|             return self.groupThreadIndexer.index(groupThread, transaction: transaction)
 | |
|         } else if let contactThread = object as? TSContactThread {
 | |
|             guard contactThread.shouldBeVisible else {
 | |
|                 // If we've never sent/received a message in a TSContactThread,
 | |
|                 // then we want it to appear in the "Other Contacts" section rather
 | |
|                 // than in the "Conversations" section.
 | |
|                 return nil
 | |
|             }
 | |
|             return self.contactThreadIndexer.index(contactThread, transaction: transaction)
 | |
|         } else if let message = object as? TSMessage {
 | |
|             return self.messageIndexer.index(message, transaction: transaction)
 | |
|         } else {
 | |
|             return nil
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // MARK: - Extension Registration
 | |
| 
 | |
|     private static let dbExtensionName: String = "FullTextSearchFinderExtension"
 | |
| 
 | |
|     private func ext(transaction: YapDatabaseReadTransaction) -> YapDatabaseFullTextSearchTransaction? {
 | |
|         return transaction.ext(FullTextSearchFinder.dbExtensionName) as? YapDatabaseFullTextSearchTransaction
 | |
|     }
 | |
| 
 | |
|     @objc
 | |
|     public class func asyncRegisterDatabaseExtension(storage: OWSStorage) {
 | |
|         storage.asyncRegister(dbExtensionConfig, withName: dbExtensionName)
 | |
|     }
 | |
| 
 | |
|     // Only for testing.
 | |
|     public class func ensureDatabaseExtensionRegistered(storage: OWSStorage) {
 | |
|         guard storage.registeredExtension(dbExtensionName) == nil else {
 | |
|             return
 | |
|         }
 | |
| 
 | |
|         storage.register(dbExtensionConfig, withName: dbExtensionName)
 | |
|     }
 | |
| 
 | |
|     private class var dbExtensionConfig: YapDatabaseFullTextSearch {
 | |
|         let contentColumnName = "content"
 | |
| 
 | |
|         let handler = YapDatabaseFullTextSearchHandler.withObjectBlock { (transaction: YapDatabaseReadTransaction, dict: NSMutableDictionary, _: String, _: String, object: Any) in
 | |
|             dict[contentColumnName] = indexContent(object: object, transaction: transaction)
 | |
|         }
 | |
| 
 | |
|         // update search index on contact name changes?
 | |
| 
 | |
|         return YapDatabaseFullTextSearch(columnNames: ["content"],
 | |
|                                          options: nil,
 | |
|                                          handler: handler,
 | |
|                                          ftsVersion: YapDatabaseFullTextSearchFTS5Version,
 | |
|                                          versionTag: "2")
 | |
|     }
 | |
| }
 |