Commit 87b7ac85 authored by Jonathan Mattingly's avatar Jonathan Mattingly
Browse files

initial set up

parents
Loading
Loading
Loading
Loading

Atlas.py

0 → 100644
+84 −0
Original line number Diff line number Diff line
import json
import gzip
import os


class Atlas:
    description = ""
    date = ""
    atlasParamType = ""
    mapParamType = ""
    atlasParam = dict()
    fp = None

    def __repr__(self):
        return "Atlas()"
    
    def __str__(self):
        return "atlas\ndescription: " + self.description + "\ndate: " + \
               self.date + "\natlasParam: " + str(self.atlasParam)


class Map:
    name = ""
    weight = 1
    data = dict()
    districting = dict()
    
    def __repr__(self):
        return "Map()"
    
    def __str__(self):
        return "map\nname: " + self.name + "\nweight: " + str(self.weight) + \
               "\ndistricting: " + str(self.districting)


def openAtlas(fileName):
    s_name = os.path.splitext(fileName)
    if s_name[1] == '.gz':
        fp = gzip.open(fileName, "r")
    else:
        fp = open(fileName, "r")
    
    atlas = Atlas()
    atlas.fp = fp
    line = fp.readline()  # drop first line
    line = fp.readline() 
    atlasHeader = json.loads(line)
    atlas.description = atlasHeader['description']
    atlas.date = atlasHeader["date"]
    atlas.atlasParamType = atlasHeader["atlasParamType"]
    atlas.mapParamType = atlasHeader["mapParamType"]
    line = fp.readline() 
    atlas.atlasParam = json.loads(line)

    return atlas


def closeAtlas(atlas):
    atlas.fp.close()


def nextMap(atlas):
    line = atlas.fp.readline()
    if not line:
        return None
    exp = json.loads(line)

    map = Map()
    map.name = exp["name"]
    map.weight = exp["weight"]
    map.districting = {}
    for d in exp["districting"]:
        for k, v in d.items():
            map.districting[k] = v
    map.data = exp["data"]
    
    return map






    

example_atlas.py

0 → 100644
+18 −0
Original line number Diff line number Diff line
import Atlas

    
atlas=Atlas.openAtlas("./test.jsonl.gz")
print(atlas)
map=[]
print("\n")
while map!=None:
    map=Atlas.nextMap(atlas)
    print(map)
    print("\n")






    
+64 −0
Original line number Diff line number Diff line
import Atlas
import helper_functions as hf
import json
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

pctDataF = open("pct21_20cen_wMCD.json")
pctData = json.load(pctDataF)
dataElection = pctData['nodes']

atlas = Atlas.openAtlas("./truncated_nc_multiscale.jsonl")
print(atlas)
map = []
print("\n")
electionName = "G16_USS"  # This is the name of the eleection we will consider.

pctToDistVotes = {}
for node in pctData["nodes"]:
    pctToDistVotes[node["id"]] = {}

while map is not None:  # This loops through all of the map in the atlas
    try:
        map = Atlas.nextMap(atlas)  # Get the next map in the atlas 
        print(map.name)
        # The maps are multi scale in the sense that if a county is kept whole
        # the following fuction makes a map from precicts to districts out of 
        # the multiscale assignement 
        node_to_dist = hf.get_node_to_district(map.districting, 
                                               pctData["nodes"]) 
        # The next fuction  sums up the election for this districting (defined 
        # by the map)
        distVoteR, distVoteD, distVoteT = hf.sumElection(electionName, 
                                                         node_to_dist, 
                                                         dataElection)
        for id in pctToDistVotes.keys():
            votes = {}
            votes["Dem"] = distVoteD[node_to_dist[id]]
            votes["Rep"] = distVoteD[node_to_dist[id]]
            votes["Total"] = distVoteT[node_to_dist[id]]
            pctToDistVotes[id][map.name] = votes
    
    except Exception:
        break


# now do somthing with data
id = 100  # Choose precinct
print(dataElection[id]["county"], dataElection[id]["prec_id"])

# or
county = "BEAUFORT"
prec_id = "BLCK"
id = [ii for ii in range(len(dataElection)) 
      if dataElection[ii]["county"] == county 
      and dataElection[ii]["prec_id"] == prec_id][0]
print(id)

df = pd.DataFrame(pctToDistVotes[id]).T
df["Dem %"] = df["Dem"]/df["Total"]
print(df)

sns.displot(data=df, x="Dem %", bins=10, stat="density")
plt.show()    
+73 −0
Original line number Diff line number Diff line
import Atlas
import helper_functions as hf
import json
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns




pctDataF = open("pct21_20cen_wMCD.json")
pctData = json.load(pctDataF)
dataElection = pctData['nodes']

atlas = Atlas.openAtlas("./truncated_nc_multiscale.jsonl")
print(atlas)
map = []
print("\n")
electionName = "G16_USS"  # This is the name of the eleection we will consider.

pctToDistVotes = {}
for node in pctData["nodes"]:
    pctToDistVotes[node["id"]] = {}

while map is not None:  # This loops through all of the map in the atlas
    try:
        map = Atlas.nextMap(atlas)  # Get the next map in the atlas 
        print(map.name)
        # The maps are multi scale in the sense that if a county is kept whole
        # the following fuction makes a map from precicts to districts out of 
        # the multiscale assignement 
        node_to_dist = hf.get_node_to_district(map.districting, 
                                               pctData["nodes"]) 
        # The next fuction  sums up the election for this districting (defined 
        # by the map)
        distVoteR, distVoteD, distVoteT = hf.sumElection(electionName, 
                                                         node_to_dist, 
                                                         dataElection)
        for id in pctToDistVotes.keys():
            votes = {}
            votes["Dem"] = distVoteD[node_to_dist[id]]
            votes["Rep"] = distVoteD[node_to_dist[id]]
            votes["Total"] = distVoteT[node_to_dist[id]]
            pctToDistVotes[id][map.name] = votes
    
    except Exception:
        break

# This secton adds the uniform swings by the amounts listed in swings for the lections
# listed in ellections to the dataEllections
swings=[.47+i/200 for i in range(13)]
electons=["G16_AG","G12_PR","G08_USS"]
hf.addUniformSwings(swings,electons,dataElection)


# now do somthing with data
id = 100  # Choose precinct
print(dataElection[id]["county"], dataElection[id]["prec_id"])

# or
county = "BEAUFORT"
prec_id = "BLCK"
id = [ii for ii in range(len(dataElection)) 
      if dataElection[ii]["county"] == county 
      and dataElection[ii]["prec_id"] == prec_id][0]
print(id)

df = pd.DataFrame(pctToDistVotes[id]).T
df["Dem %"] = df["Dem"]/df["Total"]
print(df)

sns.displot(data=df, x="Dem %", bins=10, stat="density")
plt.show()    

helper_functions.py

0 → 100644
+57 −0
Original line number Diff line number Diff line
def get_node_to_district(districting, nodes):
    # This function reconstucts for a multiscale districting description
    # the mapping from precinct IDs to Districts for a given map 
    node_id_to_district = {}
    for node in nodes:
        county = node["county"]
        cnty_key = '[\"'+county+'\"]'
        if cnty_key in districting:
            node_id_to_district[node["id"]] = districting[cnty_key]
            continue
        pct = node["prec_id"]
        cnty_pct_key = '[\"'+county+'\", \"' + pct + '\"]'
        if cnty_pct_key in districting:
            node_id_to_district[node["id"]] = districting[cnty_pct_key]
        else:
            print(node, county, pct, cnty_key, cnty_pct_key)
    return node_id_to_district


def sumElection(electionName, node_to_dist,  data):
    # This fuction 
    distVoteR = {}
    distVoteD = {}
    distVoteT = {}
    for id in node_to_dist.keys():
        d = node_to_dist[id]
        if d in distVoteT.keys():
            distVoteR[d] += data[id][electionName+"_R"]
            distVoteD[d] += data[id][electionName+"_D"]
            distVoteT[d] += data[id][electionName+"_T"]
        else:
            distVoteR[d] = data[id][electionName+"_R"]
            distVoteD[d] = data[id][electionName+"_D"]
            distVoteT[d] = data[id][electionName+"_T"]
    return distVoteR, distVoteD, distVoteT

def stateWideVotes(electionName,dataElection):
    votes={}
    votes["Total"]=sum([ p[electionName+'_T'] for  p in dataElection])
    votes["Rep"]=sum([ p[electionName+'_R'] for  p in dataElection])
    votes["Dem"]=sum([ p[electionName+'_D'] for  p in dataElection])
    return votes


def addUniformSwings(targetDemFractions,electionNames,dataElection):
    for electionName in electionNames:
        votes=stateWideVotes(electionName,dataElection)
        stateWideVoteFraction=votes["Dem"]/votes["Total"]
        for targetDemFraction  in targetDemFractions:
            for p in dataElection:
                p[electionName+'_USF'+str(targetDemFraction)+'_D']=p[electionName+'_D']*targetDemFraction/stateWideVoteFraction
                p[electionName+'_USF'+str(targetDemFraction)+'_R']=p[electionName+'_R']*(1.0-targetDemFraction)/(1.0-stateWideVoteFraction)
                p[electionName+'_USF'+str(targetDemFraction)+'_T']=p[electionName+'_T']


def listElections(dataElection, prefix="G", exluded={'id','prec_id','pop2020cen','MCD','area','border_length'},idx=0):
    return [ e[0:-2] for e in dataElection[idx].keys() if e not in {'id','prec_id','pop2020cen','MCD','area','border_length'} and e[-1] == 'T']