/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "slicer/dex_bytecode.h" #include "slicer/common.h" #include #include #include namespace dex { Opcode OpcodeFromBytecode(u2 bytecode) { Opcode opcode = Opcode(bytecode & 0xff); return opcode; } // Table that maps each opcode to the index type implied by that opcode static constexpr std::array gInstructionDescriptors = {{ #define INSTRUCTION_DESCR(o, c, p, format, index, flags, e, vflags) \ { \ vflags, \ format, \ index, \ flags, \ }, #include "export/slicer/dex_instruction_list.h" DEX_INSTRUCTION_LIST(INSTRUCTION_DESCR) #undef DEX_INSTRUCTION_LIST #undef INSTRUCTION_DESCR }}; InstructionIndexType GetIndexTypeFromOpcode(Opcode opcode) { return gInstructionDescriptors[opcode].index_type; } InstructionFormat GetFormatFromOpcode(Opcode opcode) { return gInstructionDescriptors[opcode].format; } OpcodeFlags GetFlagsFromOpcode(Opcode opcode) { return gInstructionDescriptors[opcode].flags; } VerifyFlags GetVerifyFlagsFromOpcode(Opcode opcode) { return gInstructionDescriptors[opcode].verify_flags; } size_t GetWidthFromFormat(InstructionFormat format) { switch (format) { case k10x: case k12x: case k11n: case k11x: case k10t: return 1; case k20t: case k20bc: case k21c: case k22x: case k21s: case k21t: case k21h: case k23x: case k22b: case k22s: case k22t: case k22c: case k22cs: return 2; case k30t: case k31t: case k31c: case k32x: case k31i: case k35c: case k35ms: case k35mi: case k3rc: case k3rms: case k3rmi: return 3; case k45cc: case k4rcc: return 4; case k51l: return 5; } } size_t GetWidthFromBytecode(const u2* bytecode) { size_t width = 0; if (*bytecode == kPackedSwitchSignature) { width = 4 + bytecode[1] * 2; } else if (*bytecode == kSparseSwitchSignature) { width = 2 + bytecode[1] * 4; } else if (*bytecode == kArrayDataSignature) { u2 elemWidth = bytecode[1]; u4 len = bytecode[2] | (((u4)bytecode[3]) << 16); // The plus 1 is to round up for odd size and width. width = 4 + (elemWidth * len + 1) / 2; } else { width = GetWidthFromFormat( GetFormatFromOpcode(OpcodeFromBytecode(bytecode[0]))); } return width; } // Dalvik opcode names. static constexpr std::array gOpcodeNames = { #define INSTRUCTION_NAME(o, c, pname, f, i, a, e, v) pname, #include "export/slicer/dex_instruction_list.h" DEX_INSTRUCTION_LIST(INSTRUCTION_NAME) #undef DEX_INSTRUCTION_LIST #undef INSTRUCTION_NAME }; const char* GetOpcodeName(Opcode opcode) { return gOpcodeNames[opcode]; } // Helpers for DecodeInstruction() static u4 InstA(u2 inst) { return (inst >> 8) & 0x0f; } static u4 InstB(u2 inst) { return inst >> 12; } static u4 InstAA(u2 inst) { return inst >> 8; } // Helper for DecodeInstruction() static u4 FetchU4(const u2* ptr) { return ptr[0] | (u4(ptr[1]) << 16); } // Helper for DecodeInstruction() static u8 FetchU8(const u2* ptr) { return FetchU4(ptr) | (u8(FetchU4(ptr + 2)) << 32); } // Decode a Dalvik bytecode and extract the individual fields Instruction DecodeInstruction(const u2* bytecode) { u2 inst = bytecode[0]; Opcode opcode = OpcodeFromBytecode(inst); InstructionFormat format = GetFormatFromOpcode(opcode); Instruction dec = {}; dec.opcode = opcode; switch (format) { case k10x: // op return dec; case k12x: // op vA, vB dec.vA = InstA(inst); dec.vB = InstB(inst); return dec; case k11n: // op vA, #+B dec.vA = InstA(inst); dec.vB = s4(InstB(inst) << 28) >> 28; // sign extend 4-bit value return dec; case k11x: // op vAA dec.vA = InstAA(inst); return dec; case k10t: // op +AA dec.vA = s1(InstAA(inst)); // sign-extend 8-bit value return dec; case k20t: // op +AAAA dec.vA = s2(bytecode[1]); // sign-extend 16-bit value return dec; case k20bc: // [opt] op AA, thing@BBBB case k21c: // op vAA, thing@BBBB case k22x: // op vAA, vBBBB dec.vA = InstAA(inst); dec.vB = bytecode[1]; return dec; case k21s: // op vAA, #+BBBB case k21t: // op vAA, +BBBB dec.vA = InstAA(inst); dec.vB = s2(bytecode[1]); // sign-extend 16-bit value return dec; case k21h: // op vAA, #+BBBB0000[00000000] dec.vA = InstAA(inst); // The value should be treated as right-zero-extended, but we don't // actually do that here. Among other things, we don't know if it's // the top bits of a 32- or 64-bit value. dec.vB = bytecode[1]; return dec; case k23x: // op vAA, vBB, vCC dec.vA = InstAA(inst); dec.vB = bytecode[1] & 0xff; dec.vC = bytecode[1] >> 8; return dec; case k22b: // op vAA, vBB, #+CC dec.vA = InstAA(inst); dec.vB = bytecode[1] & 0xff; dec.vC = s1(bytecode[1] >> 8); // sign-extend 8-bit value return dec; case k22s: // op vA, vB, #+CCCC case k22t: // op vA, vB, +CCCC dec.vA = InstA(inst); dec.vB = InstB(inst); dec.vC = s2(bytecode[1]); // sign-extend 16-bit value return dec; case k22c: // op vA, vB, thing@CCCC case k22cs: // [opt] op vA, vB, field offset CCCC dec.vA = InstA(inst); dec.vB = InstB(inst); dec.vC = bytecode[1]; return dec; case k30t: // op +AAAAAAAA dec.vA = FetchU4(bytecode + 1); return dec; case k31t: // op vAA, +BBBBBBBB case k31c: // op vAA, string@BBBBBBBB dec.vA = InstAA(inst); dec.vB = FetchU4(bytecode + 1); return dec; case k32x: // op vAAAA, vBBBB dec.vA = bytecode[1]; dec.vB = bytecode[2]; return dec; case k31i: // op vAA, #+BBBBBBBB dec.vA = InstAA(inst); dec.vB = FetchU4(bytecode + 1); return dec; case k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB case k35ms: // [opt] invoke-virtual+super case k35mi: { // [opt] inline invoke dec.vA = InstB(inst); // This is labeled A in the spec. dec.vB = bytecode[1]; u2 regList = bytecode[2]; // Copy the argument registers into the arg[] array, and // also copy the first argument (if any) into vC. (The // Instruction structure doesn't have separate // fields for {vD, vE, vF, vG}, so there's no need to make // copies of those.) Note that cases 5..2 fall through. switch (dec.vA) { case 5: // A fifth arg is verboten for inline invokes SLICER_CHECK_NE(format, k35mi); // Per note at the top of this format decoder, the // fifth argument comes from the A field in the // instruction, but it's labeled G in the spec. dec.arg[4] = InstA(inst); FALLTHROUGH_INTENDED; case 4: dec.arg[3] = (regList >> 12) & 0x0f; FALLTHROUGH_INTENDED; case 3: dec.arg[2] = (regList >> 8) & 0x0f; FALLTHROUGH_INTENDED; case 2: dec.arg[1] = (regList >> 4) & 0x0f; FALLTHROUGH_INTENDED; case 1: dec.vC = dec.arg[0] = regList & 0x0f; FALLTHROUGH_INTENDED; case 0: // Valid, but no need to do anything return dec; } } SLICER_CHECK(!"Invalid arg count in 35c/35ms/35mi"); case k3rc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB case k3rms: // [opt] invoke-virtual+super/range case k3rmi: // [opt] execute-inline/range dec.vA = InstAA(inst); dec.vB = bytecode[1]; dec.vC = bytecode[2]; return dec; case k45cc: { // AG op BBBB FEDC HHHH dec.vA = InstB(inst); // This is labelled A in the spec. dec.vB = bytecode[1]; // vB meth@BBBB u2 regList = bytecode[2]; dec.vC = regList & 0xf; dec.arg[0] = (regList >> 4) & 0xf; // vD dec.arg[1] = (regList >> 8) & 0xf; // vE dec.arg[2] = (regList >> 12); // vF dec.arg[3] = InstA(inst); // vG dec.arg[4] = bytecode[3]; // vH proto@HHHH } return dec; case k4rcc: // AA op BBBB CCCC HHHH dec.vA = InstAA(inst); dec.vB = bytecode[1]; dec.vC = bytecode[2]; dec.arg[4] = bytecode[3]; // vH proto@HHHH return dec; case k51l: // op vAA, #+BBBBBBBBBBBBBBBB dec.vA = InstAA(inst); dec.vB_wide = FetchU8(bytecode + 1); return dec; } std::stringstream ss; ss << "Can't decode unexpected format " << format << " for " << opcode; SLICER_FATAL(ss.str()); } static inline std::string HexByte(int value) { std::stringstream ss; ss << "0x" << std::setw(2) << std::setfill('0') << std::hex << value; return ss.str(); } std::ostream& operator<<(std::ostream& os, Opcode opcode) { return os << "[" << HexByte(opcode) << "] " << gOpcodeNames[opcode]; } std::ostream& operator<<(std::ostream& os, InstructionFormat format) { switch (format) { #define EMIT_INSTRUCTION_FORMAT_NAME(name) \ case InstructionFormat::k##name: return os << #name; #include "export/slicer/dex_instruction_list.h" DEX_INSTRUCTION_FORMAT_LIST(EMIT_INSTRUCTION_FORMAT_NAME) #undef EMIT_INSTRUCTION_FORMAT_NAME #undef DEX_INSTRUCTION_FORMAT_LIST #undef DEX_INSTRUCTION_LIST } return os << "[" << HexByte(format) << "] " << "Unknown"; } } // namespace dex