I'm going to practice reverse engineering by using a random game I had a disc for (FF7). The goal is to learn more about reverse engineering and gain some experience by getting my hands dirty. It would be neat to learn what the file formats of the assets used in the game are, so that will be my goal for the project. Understanding how the libraries work under the hood is a goal too.
I've written some tools that some people may find useful. The code is not very good and makes assumptions, but for my purposes, these have worked well. The first step in my plan is to disassemble the main PS Executable and separate the library code from the user code. In this case it's the file "SCUS_941.63" in the root of the CD filesystem, which contains the main() routine and all of the statically linked PSY-Q libraries.
I've loaded the binary into IDA, and turned off FLIRT signatures to avoid misnaming any functions. It looks like the PSYQ libraries have debug statements in them, which will allow me to identify a few functions with certainty:

However, only a small fraction of the library functions have strings like the ones above. Normally, you would use FLIRT signatures, and there are some good ones for PSYQ, however, in my case they would often misname functions and it would cause issues, so I decided to match up functions manually. An issue is that I do not know what version of PSYQ the team that wrote this particular game used, but I have a few clues:

Hmm, end of 1996. And there are no DS_XXX functions (which, according to the library reference manual, first appeared in version 4.0). I have version 3.6 installed, and the modification date on library files is near the end of 1996:

I checked my own library's strings:
Code: Select all
st@box:~/gocode/src/psobj$ strings /home/st/lib/libcd.lib | grep bios.c
$Id: bios.c,v 1.80 1996/09/11 07:02:15 ayako Exp $
Code: Select all
st@box:~/gocode/src/psobj$ strings /home/st/LIBCD.LIB | grep bios.c
$Id: bios.c,v 1.77 1996/05/13 06:58:16 suzu Exp $
So I have an idea of the version used. Now, to compare the code, but first, I need to extract the code from the libraries. Unfortunately, the format isn't standard, and the standard tools (ar, objdump) will refuse to work with them. Luckily, PSYQ comes with a utility called "PSYLIB" that can extract the object files out of the library for us. I wrote a batch script to do this for every library:
Code: Select all
mkdir libapi
mkdir libc
mkdir libc2
mkdir libcard
mkdir libcd
mkdir libcomb
mkdir libetc
mkdir libgpu
mkdir libgs
mkdir libgte
mkdir libgun
mkdir libmath
mkdir libpress
mkdir libsio
mkdir libsn
mkdir libsnd
mkdir libspu
mkdir libtap
cd libapi
psylib /x C:\PSYQ\LIB\LIBAPI.LIB
cd ..\libc
psylib /x C:\PSYQ\LIB\LIBC.LIB
cd ..\libc2
psylib /x C:\PSYQ\LIB\LIBC2.LIB
cd ..\libcard
psylib /x C:\PSYQ\LIB\LIBCARD.LIB
cd ..\libcd
psylib /x C:\PSYQ\LIB\LIBCD.LIB
cd ..\libcomb
psylib /x C:\PSYQ\LIB\LIBCOMB.LIB
cd ..\libetc
psylib /x C:\PSYQ\LIB\LIBETC.LIB
cd ..\libgpu
psylib /x C:\PSYQ\LIB\LIBGPU.LIB
cd ..\libgs
psylib /x C:\PSYQ\LIB\LIBGS.LIB
cd ..\libgte
psylib /x C:\PSYQ\LIB\LIBGTE.LIB
cd ..\libgun
psylib /x C:\PSYQ\LIB\LIBGUN.LIB
cd ..\libmath
psylib /x C:\PSYQ\LIB\LIBMATH.LIB
cd ..\libpress
psylib /x C:\PSYQ\LIB\LIBPRESS.LIB
cd ..\libsio
psylib /x C:\PSYQ\LIB\LIBSIO.LIB
cd ..\libsn
psylib /x C:\PSYQ\LIB\LIBSN.LIB
cd ..\libsnd
psylib /x C:\PSYQ\LIB\LIBSND.LIB
cd ..\libspu
psylib /x C:\PSYQ\LIB\LIBSPU.LIB
cd ..\libtap
psylib /x C:\PSYQ\LIB\LIBTAP.LIB
cd ..
EDIT: I've updated this to reflect new info on the patch statements I've found. This will correctly decode them. Figuring out what some of them mean is ongoing however.
Code: Select all
package main
import (
"path/filepath"
"fmt"
"io"
"os"
)
func readAmt(r io.Reader,amt int) ([]byte,error) {
total := 0
buf := make([]byte,amt)
for total < amt {
n,err := r.Read(buf[total:])
if err != nil {
return nil,err
}
total += n
}
return buf,nil
}
func writeAmt(w io.Writer,buf []byte) error {
total := 0
for total < len(buf) {
n, err := w.Write(buf[total:])
if err != nil {
return err
}
total += n
}
return nil
}
func readByte(r io.Reader) (byte,error) {
data,err := readAmt(r,1)
if err != nil {
return 0,err
}
return data[0],nil
}
func writeByte(w io.Writer,b byte) error {
buf := make([]byte,1)
buf[0] = b
err := writeAmt(w,buf)
return err
}
func readWord(r io.Reader) (uint16,error) {
data,err := readAmt(r,2)
if err != nil {
return 0,err
}
res := uint16(0)
res += uint16(data[0])
res += uint16(data[1]) << 8
return res,nil
}
func writeWord(w io.Writer,num uint16) error {
buf := make([]byte,2)
buf[0] = byte(num & 0xff)
buf[1] = byte((num >> 8) & 0xff)
err := writeAmt(w,buf)
return err
}
func writeDWord(w io.Writer,num uint32) error {
buf := make([]byte,4)
buf[0] = byte(num & 0xff)
buf[1] = byte((num >> 8) & 0xff)
err := writeAmt(w,buf)
return err
}
func readDWord(r io.Reader) (uint32,error) {
data,err := readAmt(r,4)
if err != nil {
return 0,err
}
res := uint32(0)
res += uint32(data[0])
res += uint32(data[1]) << 8
res += uint32(data[2]) << 16
res += uint32(data[3]) << 24
return res,nil
}
func readAscii(r io.Reader,len int) (string,error) {
data,err := readAmt(r,len)
if err != nil {
return "",err
}
return string(data),nil
}
func writeAscii(w io.Writer,s string) error {
buf := []byte(s)
err := writeAmt(w,buf)
return err
}
type symbol struct {
sect *section
num uint16
name string
off uint16
unk uint16
size uint16
local bool
xref bool
bss bool
code []byte
}
type section struct {
name string
num uint16
groupNum uint16
align byte
code []byte
symbols []*symbol
uninit_data []byte
}
type finfo struct {
name string
num uint16
sections []*section
}
var files []*finfo
var curFile *finfo
func getSection(num uint16) *section {
for _,s := range curFile.sections {
if s.num == num {
return s
}
}
fmt.Printf("WARNING: getSection(%04x) failed!\n",num)
return nil
}
func getNextOffset(s *section,off uint16) uint16 {
cur := uint16(len(s.code))
for _,sym := range s.symbols {
if sym.off > off && sym.off < cur {
cur = sym.off
}
}
return cur
}
var procType byte
func getRightTextSectionName(codeLen int) string {
num := (codeLen / 64000) + 1
s := fmt.Sprintf(".text%d",num)
return s
}
func getRightTextSectionNumber(codeLen int) uint16 {
num := (codeLen / 64000) + 1
return uint16(0xf010 + num)
}
type codeData []byte
var textCodes [80]codeData
func getRightTextSectionOffset(codeLen int) uint16 {
num := (codeLen / 64000) + 1
cd := textCodes[num]
return uint16(len(cd))
}
func appendByteToCodeSegment(codeLen int,b byte) {
num := (codeLen / 64000) + 1
cd := textCodes[num]
cd = append(cd,b)
textCodes[num] = cd
}
func addPatch1(patchType byte,instructionOffset uint16,sectionNumber uint16,sectOffset uint16) {
// Link location referenced in the instruction at instructionOffset with the symbol @ sectOffset in section sectionNumber
// we need to save the patch instruction, but alter the positions to reflect where the symbols will be when
// we write our huge file
// first check for any symbols at the location
// if not, then it's a label jump or branch or something
}
func addPatch2(patchType byte,instructionOffset uint16,symbolNumber uint16) {
// Link location referenced in the instruction at instructionOffset with the symbol that has symbolNumber
}
func addPatch3(patchType byte,instructionOffset uint16,sectionNumber uint16,sectionThing uint16,sectionOffset uint16) {
}
func processSymbols(f *os.File) {
writeAscii(f,"LNK")
writeByte(f,2)
writeByte(f,0x2e) // processor type 7
writeByte(f,0x7)
// concatenate all .bss sections, bss section number = 1
var symsWritten []string
nextBssNumber := uint16(1)
nextSymNumber := uint16(1)
bssSection := string(".bss")
rdataSection := string(".rdata")
dataSection := string(".data")
sdataSection := string(".sdata")
sbssSection := string(".sbss")
writeByte(f,0x10)
writeWord(f,0xf000)
writeWord(f,0x0)
writeByte(f,8)
writeByte(f,byte(len(bssSection)))
writeAscii(f,bssSection)
writeByte(f,0x10)
writeWord(f,0xf001)
writeWord(f,0x0)
writeByte(f,8)
writeByte(f,byte(len(rdataSection)))
writeAscii(f,rdataSection)
writeByte(f,0x10)
writeWord(f,0xf002)
writeWord(f,0x0)
writeByte(f,8)
writeByte(f,byte(len(dataSection)))
writeAscii(f,dataSection)
writeByte(f,0x10)
writeWord(f,0xf003)
writeWord(f,0x0)
writeByte(f,8)
writeByte(f,byte(len(sdataSection)))
writeAscii(f,sdataSection)
writeByte(f,0x10)
writeWord(f,0xf004)
writeWord(f,0x0)
writeByte(f,8)
writeByte(f,byte(len(sbssSection)))
writeAscii(f,sbssSection)
for i := 0; i < 80; i++ {
writeByte(f,0x10)
writeWord(f,uint16(0xf010 + i))
writeWord(f,0x0)
writeByte(f,8)
s := fmt.Sprintf(".text%d",i)
writeByte(f,byte(len(s)))
writeAscii(f,s)
}
for _,fl := range files {
for _,s := range fl.sections {
for _,sym := range s.symbols {
if sym.bss {
var written bool
written = false
for _,s := range symsWritten {
if s == sym.name {
written = true
break
}
}
if written {
continue
}
writeByte(f,0x30) //xbss
writeWord(f,nextBssNumber)
writeWord(f,0xf000) // bss section number
writeWord(f,sym.size)
writeWord(f,sym.unk)
writeByte(f,byte(len(sym.name)))
writeAscii(f,sym.name)
nextBssNumber++
}
}
}
}
var textCode []byte
for _,fl := range files {
for _,s := range fl.sections {
for _,sym := range s.symbols {
if !sym.bss {
var written bool
written = false
for _,s := range symsWritten {
if s == sym.name {
written = true
break
}
}
if written {
continue
}
if sym.local {
writeByte(f,0x12)
} else {
writeByte(f,0xc) //xdef
writeWord(f,nextSymNumber)
nextSymNumber++
}
sect := getRightTextSectionNumber(len(textCode))
off := getRightTextSectionOffset(len(textCode))
writeWord(f,sect) // text section number
writeWord(f,off)
nextOff := getNextOffset(s,sym.off)
for i := sym.off; i < nextOff;i++ {
appendByteToCodeSegment(len(textCode),s.code[i])
textCode = append(textCode,s.code[i])
}
writeWord(f,sym.unk)
writeByte(f,byte(len(sym.name)))
writeAscii(f,sym.name)
}
}
}
}
for i := 0;i < 80; i++ {
writeByte(f,0x6)
writeWord(f,uint16(0xf010 + i))
writeByte(f,0x2)
cd := textCodes[i]
writeWord(f,uint16(len(cd)))
//fmt.Printf("Actual code size %04x\n",len(cd))
for i := 0; i < len(cd); i++ {
writeByte(f,cd[i])
}
}
writeByte(f,0x0) // EOF
}
func readPatch(f io.Reader) {
cmd,_ := readByte(f)
if cmd <= 24 {
switch cmd {
case 0:
v,_ := readDWord(f)
fmt.Printf("$%x",v)
return
case 2:
v,_ := readWord(f)
fmt.Printf("[%x]",v)
return
case 4:
v,_ := readWord(f)
fmt.Printf("sectbase(%x)",v)
return
case 6:
v,_ := readWord(f)
fmt.Printf("bank(%x)",v)
return
case 8:
v,_ := readWord(f)
fmt.Printf("sectof(%x)",v)
return
case 10:
v,_ := readWord(f)
fmt.Printf("offs(%x)",v)
return
case 12:
v,_ := readWord(f)
fmt.Printf("sectstart(%x)",v)
return
case 14:
v,_ := readWord(f)
fmt.Printf("groupstart(%x)",v)
return
case 16:
v,_ := readWord(f)
fmt.Printf("groupof(%x)",v)
return
case 18:
v,_ := readWord(f)
fmt.Printf("seg(%x)",v)
return
case 20:
v,_ := readWord(f)
fmt.Printf("grouporg(%x)",v)
return
case 22:
v,_ := readWord(f)
fmt.Printf("sectend(%x)",v)
return
case 24:
v,_ := readWord(f)
fmt.Printf("groupend(%x)",v)
return
}
}
fmt.Printf("(")
readPatch(f)
switch cmd {
case 32:
fmt.Printf("=")
case 34:
fmt.Printf("<>")
case 36:
fmt.Printf("<=")
case 38:
fmt.Printf("<")
case 40:
fmt.Printf(">=")
case 42:
fmt.Printf(">")
case 44:
fmt.Printf("+")
case 46:
fmt.Printf("-")
case 48:
fmt.Printf("*")
case 50:
fmt.Printf("/")
case 52:
fmt.Printf("&")
case 54:
fmt.Printf("!")
case 56:
fmt.Printf("^")
case 58:
fmt.Printf("<<")
case 60:
fmt.Printf(">>")
case 62:
fmt.Printf("%%")
case 64:
fmt.Printf("---")
case 66:
fmt.Printf("-revword-")
case 68:
fmt.Printf("-check0-")
case 70:
fmt.Printf("-check1-")
case 72:
fmt.Printf("-bitrange-")
case 74:
fmt.Printf("-arshift_chk-")
}
readPatch(f)
fmt.Printf(")")
}
func processObjectFile(objFileName string) {
f,err := os.Open(objFileName)
if err != nil {
fmt.Printf("Could not open file: %s\n",err)
return
}
defer f.Close()
hdr,err := readAscii(f,3)
if hdr != "LNK" {
fmt.Printf("Not a PSX Library!\n")
return
}
ver,err := readByte(f)
if ver != 2 {
fmt.Printf("I can only handle version 2, you have version %d.\n",ver)
return
}
currentSection := uint16(0)
currentTextSection := uint16(0)
curFile = new(finfo)
files = append(files,curFile)
for {
t, err := readByte(f)
if err != nil {
fmt.Printf("Error: %s\n",err)
return
}
//fmt.Printf("Type Byte: %02x\n",t)
switch t {
case 0x2e: // Processor Type
procType,_ = readByte(f)
break
case 0x10: // Section Number
symNum,_ := readWord(f)
groupNum,_ := readWord(f)
align,_ := readByte(f)
symNameLen,_ := readByte(f)
symName,_ := readAscii(f,int(symNameLen))
s := new(section)
s.name = symName
s.num = symNum
s.align = align
s.groupNum = groupNum
if s.name == ".text" {
currentTextSection = s.num
}
curFile.sections = append(curFile.sections,s)
fmt.Printf("Section '%s' %04x\n",symName,symNum)
break
case 0x12: // Local Symbol
sectionNumber,_ := readWord(f)
offset,_ := readWord(f) // unknown bytes
unk,_ := readWord(f)
nameLen,_ := readByte(f)
name,_ := readAscii(f,int(nameLen))
fmt.Printf("Local sym '%s'\n",name)
s := getSection(sectionNumber)
sym := new(symbol)
sym.sect = s
sym.off = offset
sym.unk = unk
sym.name = name
sym.local = true
s.symbols = append(s.symbols,sym)
break
case 0xc: // Exported Symbol XDEF
symbolNumber,_ := readWord(f)
sectionNumber,_ := readWord(f)
offset,_ := readWord(f)
unknown,_ := readWord(f)
nameLen,_ := readByte(f)
name,_ := readAscii(f,int(nameLen))
fmt.Printf("XDEF sect %04x name '%s'\n",sectionNumber,name)
s := getSection(sectionNumber)
sym := new(symbol)
sym.sect = s
sym.off = offset
sym.name = name
sym.unk = unknown
sym.num = symbolNumber
s.symbols = append(s.symbols,sym)
break
case 0xe: // Exported Symbol XREF
symbolNumber,_ := readWord(f)
nameLen,_ := readByte(f)
name,_ := readAscii(f,int(nameLen))
fmt.Printf("XREF %x '%s'\n",symbolNumber,name)
s := getSection(currentTextSection)
sym := new(symbol)
sym.sect = s
sym.xref = true
sym.num = symbolNumber
sym.name = name
s.symbols = append(s.symbols,sym)
break
case 0x30: // XBSS
symbolNumber,_ := readWord(f)
sectionNumber,_ := readWord(f)
size,_ := readWord(f)
readWord(f) // unknown
nameLen,_ := readByte(f)
name,_ := readAscii(f,int(nameLen))
fmt.Printf("XBSS %x '%s'\n",symbolNumber,name)
s := getSection(sectionNumber)
sym := new(symbol)
sym.sect = s
sym.bss = true
sym.size = size
sym.num = symbolNumber
sym.name = name
s.symbols = append(s.symbols,sym)
break
case 0x8: // Uninitialized data
size,_ := readWord(f)
readWord(f)
//data,_ := readAmt(f,int(size))
s := getSection(currentSection)
data := make([]byte,int(size))
s.uninit_data = append(s.uninit_data,data...)
break
case 0x6: // Switch to Section
section,_ := readWord(f)
currentSection = section
fmt.Printf("SWITCH TO SECTION %04x\n",section)
break
case 0x2: // Code
codeLen,_ := readWord(f)
code,_ := readAmt(f,int(codeLen))
s := getSection(currentSection)
s.code = code
fmt.Printf("CODE %d bytes\n",codeLen)
break
case 0x1c: // File Name
fileNumber,_ := readWord(f)
fileNameLength,_ := readByte(f)
fileName,_ := readAscii(f,int(fileNameLength))
curFile.name = fileName
curFile.num = fileNumber
fmt.Printf("File num %d '%s'\n",fileNumber,fileName)
break
case 0x0a: // Patch
patchType,_ := readByte(f)
instructionOffset,_ := readWord(f)
fmt.Printf("PATCH %02x %04x ",patchType,instructionOffset)
readPatch(f)
fmt.Printf("\n")
break
case 0x0: // End of File
return
default:
fmt.Printf("WARNING: UNKNOWN BYTE CODE %02x!\n",t)
}
}
}
func main() {
args := os.Args[1:]
if len(args) != 2 {
fmt.Printf("I need: %s <dir> <output>\n",os.Args[0])
return
}
f,err := os.Create(args[1])
if err != nil {
fmt.Printf("Error opening output file: %s\n",err)
return
}
defer f.Close()
// scan for object files
err = filepath.Walk(args[0], func(path string,info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
fmt.Printf("Processing file '%s'...\n",path)
processObjectFile(path)
return nil
})
if err != nil {
fmt.Printf("Error walking directory %q: %v\n",args[0],err)
return
}
fmt.Printf("Done reading object files.\n")
// write out our ridiculous file
processSymbols(f)
}
Here is what a normal library object looks like when opened and then our test object:
It works! The code above is in Go, just point it at a directory tree containing library objects and give it an output file name and it will pack all library objects it finds into one file. IDA even recognizes it as a PSX object file, but isn't too thrilled about it having 9000 segments:

I should be able to map out library code and library constants fairly easily now. It feels good to get one task accomplished, if someone needs to know how the object files are laid out, let me know.