Query Engine

Grammar

  • <exp> ::= <term> | <term> AND <exp> | <term> OR <exp>
  • <term> ::= <factor> | NOT <factor>
  • <factor> ::= <user> | <tag> | ( expression )

Exp Classes

Exp

public abstract class Exp {

    public String show() {
        return null;
    }

    public List<PostObj> evaluate() {
        return null;
    }
}

UserExp

    public UserExp(String value, StoreEngine se) {
        this.value = value;
        this.list = se.queryUser(this.value);
    }

    @Override
    public List<PostObj> evaluate() {
        return this.list;
    }

TagExp

    public TagExp(String value, StoreEngine se) {
        this.value = value;
        this.list = se.queryTag(this.value);
    }

    @Override
    public List<PostObj> evaluate() {
        return this.list;
    }

AndExp

    public AndExp(Exp term, Exp exp) {
        this.term = term;
        this.exp = exp;
    }

    /**
     * in term and exp
     * set + retain for intersect
     *
     * @return list of post obj
     */
    @Override
    public List<PostObj> evaluate() {
        // AND
        Set<PostObj> s1 = new HashSet<>(term.evaluate());
        s1.retainAll(exp.evaluate());
        return new ArrayList<>(s1);
    }

OrExp

    public OrExp(Exp term, Exp exp) {
        this.term = term;
        this.exp = exp;
    }

    /**
     * in term or exp
     * set + addall for union
     *
     * @return list of post obj
     */
    @Override
    public List<PostObj> evaluate() {
        // OR
        Set<PostObj> s1 = new HashSet<>(term.evaluate());
        s1.addAll(exp.evaluate());
        return new ArrayList<>(s1);
    }

NotExp

    public NotExp(Exp factor, StoreEngine se) {
        this.factor = factor;
        this.list = se.queryPost();
    }

    /**
     * not in exp
     * world - set for not
     *
     * @return list of post obj
     */
    @Override
    public List<PostObj> evaluate() {
        // NOT
        Set<PostObj> s1 = new HashSet<>(this.list);
        s1.removeAll(factor.evaluate());
        return new ArrayList<>(s1);
    }

Tokenizer.java

public class Tokenizer {
    private String buffer;          // String to be transformed into tokens each time next() is called.
    private Token currentToken;     // The current token. The next token is extracted when next() is called.

    /**
     * Tokenizer class constructor
     * The constructor extracts the first token and save it to currentToken
     * **** please do not modify this part ****
     */
    public Tokenizer(String text) {
        buffer = text;          // save input text (string)
        next();                 // extracts the first token.
    }

    /**
     * This function will find and extract a next token from {@code _buffer} and
     * save the token to {@code currentToken}.
     */
    public void next() {
        buffer = buffer.trim();     // remove whitespace

        if (buffer.isEmpty()) {
            currentToken = null;    // if there's no string left, set currentToken null and return
            return;
        }

        /*
        To help you, we have already written the first few steps in the tokenization process.
        The rest will follow a similar format.
         */
        char firstChar = buffer.charAt(0);
        if (firstChar == '&')
            currentToken = new Token("&", Token.Type.AND);
        if (firstChar == '|')
            currentToken = new Token("|", Token.Type.OR);
        if (firstChar == '!')
            currentToken = new Token("!", Token.Type.NOT);
        if (firstChar == '(')
            currentToken = new Token("(", Token.Type.LBRA);
        if (firstChar == ')')
            currentToken = new Token(")", Token.Type.RBRA);
        if (firstChar == '#') {
            int i = 1;
            while (i < buffer.length()
                    && (Character.isAlphabetic(buffer.charAt(i)) || Character.isDigit(buffer.charAt(i)))) {
                i++;
            }
            currentToken = new Token(buffer.substring(0, i), Token.Type.TAG);
        }
        if (firstChar == '@') {
            int i = 1;
            while (i < buffer.length()
                    && (Character.isAlphabetic(buffer.charAt(i)) || Character.isDigit(buffer.charAt(i)))) {
                i++;
            }
            currentToken = new Token(buffer.substring(0, i), Token.Type.USER);
        }
        if (firstChar != '&'
                && firstChar != '|'
                && firstChar != '!'
                && firstChar != '('
                && firstChar != ')'
                && firstChar != '@'
                && firstChar != '#')
            throw new Token.IllegalTokenException("IllegalToken");

        // Remove the extracted token from buffer
        int tokenLen = currentToken.getToken().length();
        buffer = buffer.substring(tokenLen);
    }

    /**
     * Returns the current token extracted by {@code next()}
     * **** please do not modify this part ****
     *
     * @return type: Token
     */
    public Token current() {
        return currentToken;
    }

    /**
     * Check whether there still exists another tokens in the buffer or not
     * **** please do not modify this part ****
     *
     * @return type: boolean
     */
    public boolean hasNext() {
        return currentToken != null;
    }
}

Parser.java

public class Parser {
    // The tokenizer (class field) this parser will use.
    Tokenizer tokenizer;
    StoreEngine storeEngine;
    int numLBRA = 0;

    /**
     * Parser class constructor
     * Simply sets the tokenizer field.
     */
    public Parser(Tokenizer tokenizer, StoreEngine storeEngine) {
        this.tokenizer = tokenizer;
        this.storeEngine = storeEngine;
    }

    public List<PostObj> evaluate() {
        return this.parseExp().evaluate();
    }

    /**
     * for debug use
     */
    public void printTokenizer() {
        while (tokenizer.hasNext()) {
            System.out.println("TYPE " + tokenizer.current().getType());
            System.out.println("TOKEN " + tokenizer.current().getToken());
            tokenizer.next();
        }
    }

    /**
     * Adheres to the grammar rule:
     * <exp> ::= <term> | <term> AND <exp> | <term> OR <exp>
     *
     * @return type: Exp.
     */
    public Exp parseExp() {

        Exp term = parseTerm();

        if (tokenizer.hasNext()) {
            if (tokenizer.current().getType() == Token.Type.AND) {
                tokenizer.next();
                return new AndExp(term, parseExp());
            } else if (tokenizer.current().getType() == Token.Type.OR) {
                tokenizer.next();
                return new OrExp(term, parseExp());
            } else if (tokenizer.current().getType() == Token.Type.RBRA && numLBRA > 0) {
                return term;
            } else {
                throw new IllegalProductionException("Illegal Exp");
            }
        } else {
            return term;
        }

    }

    /**
     * Adheres to the grammar rule:
     * <term>   ::=  <factor> | NOT <factor>
     *
     * @return type: Exp.
     */
    public Exp parseTerm() {
        if (tokenizer.hasNext()) {
            if (tokenizer.current().getType() == Token.Type.NOT) {
                tokenizer.next();
                return new NotExp(parseFactor(), this.storeEngine);
            } else {
                return parseFactor();
            }
        } else {
            throw new IllegalProductionException("Illegal Term");
        }
    }

    /**
     * Adheres to the grammar rule:
     * <factor> ::= <user> | <tag> | ( expression )
     *
     * @return type: Exp.
     */
    public Exp parseFactor() {

        if (tokenizer.hasNext() && tokenizer.current().getType() == Token.Type.USER) {
            String user = tokenizer.current().getToken();
            tokenizer.next();
            return new UserExp(user, this.storeEngine);
        } else if (tokenizer.hasNext() && tokenizer.current().getType() == Token.Type.TAG) {
            String tag = tokenizer.current().getToken();
            tokenizer.next();
            return new TagExp(tag, this.storeEngine);
        } else if (tokenizer.hasNext() && tokenizer.current().getType() == Token.Type.LBRA) {
            numLBRA++;
            tokenizer.next();
            if (tokenizer.hasNext()) {
                Exp exp = parseExp();
                if (tokenizer.hasNext() && tokenizer.current().getType() == Token.Type.RBRA) {
                    numLBRA--;
                    tokenizer.next();
                    return exp;
                } else {
                    throw new IllegalProductionException("Illegal Factor");
                }
            } else {
                throw new IllegalProductionException("Illegal Factor");
            }
        } else {
            throw new IllegalProductionException("Illegal Factor");
        }

    }

    /**
     * The following exception should be thrown if the parse is faced with series of tokens that do not
     * correlate with any possible production rule.
     */
    public static class IllegalProductionException extends IllegalArgumentException {
        public IllegalProductionException(String errorMessage) {
            super(errorMessage);
        }
    }
}

Query Engine

public class QueryEngine {

    StoreEngine storeEngine;
    Parser parser;

    public QueryEngine(List<PostObj> postObjList) {
        this.storeEngine = new StoreEngine(postObjList);
    }

    public QueryEngine(String jsonStr) {
        this.storeEngine = new StoreEngine(jsonStr);
    }

    /**
     * given query, return matched post obj
     *
     * @param query the query
     * @return list of post obj
     */
    public List<PostObj> queryObject(String query) {
        this.parser = new Parser(new Tokenizer(query), storeEngine);
        return this.parser.evaluate();
    }

    /**
     * given query, return matched text
     *
     * @param query the query
     * @return list of text
     */
    public List<String> queryText(String query) {
        this.parser = new Parser(new Tokenizer(query), storeEngine);
        List<PostObj> list = this.parser.evaluate();
        List<String> returnedList = new ArrayList<>();
        for (PostObj postObj : list) {
            returnedList.add(postObj.text);
        }
        return returnedList;
    }
}

Example Usage:

    private void searchExample() {
        try {
            InputStream is = getAssets().open("allpostdata.json");
            int size = is.available();
            byte[] buffer = new byte[size];
            is.read(buffer);
            is.close();
            String json = new String(buffer, "UTF-8");
            QueryEngine queryEngine = new QueryEngine(json);
            System.out.println("==============================================================");
            List<String> outputs = queryEngine.queryText("#APeoplesJourney");
//            outputs = queryEngine.queryText("#NoTag");
//            outputs = queryEngine.queryText("#NoTag & @acommonname");
//            outputs = queryEngine.queryText("#NoTag | @acommonname");
//            outputs = queryEngine.queryText("!#NoTag");
            outputs = queryEngine.queryText("#APeoplesJourney | (#NoTag & !@acommonname)");
//            outputs = queryEngine.queryText("(#NoTag & !@acommonname) | #APeoplesJourney");
            System.out.println(outputs.size());
            System.out.println("==============================================================");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }