#!/usr/bin/env python #--------------------------------------------------- # probabilityOfId - command line version for Python # Copyright (C) 2005-2006 Brant C. Faircloth. # # This program is free software; you can redistribute # it and/or modify it under the terms of the GNU # General Public License as published by the Free # Software Foundation; either version 2 of # the License, or (at your option) any later version. #--------------------------------------------------- from numpy import * import copy, math class input: """functions for data acquisition""" #------------------------------------------------------------------------------------------------------------------ # note: this is ugly - by necessity. We need double dictionary and column formatted data for readability. sucks. # i also didn't feel like thinking about it any more. so, this is the result. #------------------------------------------------------------------------------------------------------------------ def getFileContents(self, file): file = open(file,'r') cleanData = {} header = file.readline() #print header header = header.strip('\r\n').split('\t') headerItems = {} dataHolder = {} for item in header: if item not in headerItems.keys(): headerItems[header.index(item)] = item dataHolder[item] = {} text = file.readlines() for line in text: tempLine = line.strip('\r\n').split('\t') for element in headerItems.keys(): try: if tempLine[element] != '': dataHolder[headerItems[element]][tempLine[element]]=float(tempLine[element+1]) except: pass file.close() return dataHolder class computations: """computations of various P(ID) measures""" def __init__(self, dataHolder): self.dataHolder = dataHolder self.regularList = copy.copy(dataHolder.keys()) def pRandId(self): """computs the probability of randomly drawing 2 indivs. from a pop. in HWE and getting same genotype""" #------------------------------------------------------- # from Paetkau and Strobeck (1994), Mol. Ecol. 3:489-495 #------------------------------------------------------- modList = copy.copy(self.regularList) probabilityList = [] for value in self.regularList: for item in modList: if value == item: prob = (self.dataHolder[item] * self.dataHolder[value])**2 else: prob = (2*(self.dataHolder[item] * self.dataHolder[value]))**2 probabilityList.append(prob) modList.remove(value) return sum(probabilityList) def pSibId(self): """computes probability of randomly drawing 2 sibs from a pop. in HWE and getting same genotype""" #--------------------------------------------------------------------- # from Evett and Weir (1998): Interpreting DNA Evidence: Statistical # Genetics for Forensic Scientists. Sinauer, Sunderland. #--------------------------------------------------------------------- pSubISquare = [] pSubIFour = [] for item in self.regularList: pSubISquare.append(self.dataHolder[item]**2) pSubIFour.append(self.dataHolder[item]**4) prob = 0.25 + (0.5*float(sum(pSubISquare))) + (0.5*float(sum(pSubISquare)**2)) - (0.25*float(sum(pSubIFour))) return prob def pOffId(self): """computes probability that offspring will be drawn from pop. in HWE and getting same genotype as parent""" #--------------------------------------------------------------------------------------------------------------------------------------------------- # Lazily taken from Curtis Strobeck's class BIOL 380 website here: # http://www.biology.ualberta.ca/courses/biol380/uploads/winter03/lecture/b1/curt_strobeck/public/lectures/Lecture_14_Probability_of_Identity.pdf #--------------------------------------------------------------------------------------------------------------------------------------------------- modList = copy.copy(self.regularList) probabilityList = [] for value in self.regularList: for item in modList: if value == item: prob = (self.dataHolder[item])**2 else: prob = ((self.dataHolder[item] * self.dataHolder[value])/2)**2 probabilityList.append(prob) modList.remove(value) return sum(probabilityList) def prettyStuff(num, precision): #---------------------------------------------------------------------------------------------------------------- # blatantly stolen from David S. Harrison snippet here: http://www.faqts.com/knowledge_base/view.phtml/aid/4450 #---------------------------------------------------------------------------------------------------------------- accpow = math.floor(math.log10(precision)) if num < 0: digits = int(math.fabs(num/pow(10,accpow)-0.5)) else: digits = int(math.fabs(num/pow(10,accpow)+0.5)) result = '' if digits > 0: for i in range(0,int(accpow)): if (i % 3)==0 and i>0: result = '0,' + result else: result = '0' + result curpow = int(accpow) while digits > 0: adigit = chr((digits % 10) + ord('0')) if (curpow % 3)==0 and curpow!=0 and len(result)>0: if curpow < 0: result = adigit + ' ' + result else: result = adigit + ',' + result elif curpow==0 and len(result)>0: result = adigit + '.' + result else: result = adigit + result digits = digits/10 curpow = curpow + 1 for i in range(curpow,0): if (i % 3)==0 and i!=0: result = '0 ' + result else: result = '0' + result if curpow <= 0: result = "0." + result if num < 0: result = '-' + result else: result = "0" return result if __name__=="__main__": """main loop""" #------------------------------------ # change filename here for the moment #------------------------------------ file = 'brant_test_freqs.txt' #--------------------------------------------------- # don't change any more stuff or the program will die #--------------------------------------------------- dataDict = input().getFileContents(file) #--------------------------------------------------------- # create some lists to hold data which will be summarized. #--------------------------------------------------------- pRandId = [] pSibId = [] pOffId = [] #------------------------------------------------ # run the methods in the computations class above #------------------------------------------------ for item in dataDict.keys(): data = computations(dataDict[item]) pRandId.append(data.pRandId()) pSibId.append(data.pSibId()) pOffId.append(data.pOffId()) #------------------------------------ # get product of each respective list #------------------------------------ pRandIdProd = product(pRandId) pSibIdProd = product(pSibId) pOffIdProd = product(pOffId) #--------------------------- # get each product's inverse #--------------------------- pRandIdProdInv = 1/pRandIdProd pSibIdProdInv = 1/pSibIdProd pOffIdProdInv = 1/pOffIdProd #---------------------------------------------- # make some pretty output for the folks at home #---------------------------------------------- print (('\nData from %s loci\n') % (len(dataDict.keys()))) print (('The theoretical probability of identity [P(ID); Paetkau and Strobeck (1994)] is: %.2e') % (pRandIdProd)) print (('This means there is a 1 in %s chance of whatever\n') % (prettyStuff(pRandIdProdInv, 100))) print (('The theoretical probability of parents and sibs sharing genotype [P(ID)off] is: %.2e') % (pOffIdProd)) print (('This means there is a 1 in %s chance of whatever\n') % (prettyStuff(pOffIdProdInv, 100))) print (('The theoretical probability of identity among Sibs [P(ID)sib] (AKA conservative P(ID); Evett and Weir (1998)) is: %.2e') % (pSibIdProd)) print (('This means there is a 1 in %s chance of whatever\n') % (prettyStuff(pSibIdProdInv, 100))) #--------------- # end of program #---------------