update (083f66f6) · Commits · quantifyinggerrymandering / MapProcessing

.gitignore

0 → 100644

+1 −0

Original line number	Diff line number	Diff line
		**/__pycache__/

lib/biconnectedComponents.pyx

0 → 100644

+33 −0

Original line number	Diff line number	Diff line
		import numpy as np

		def getArticulationPoints_cdef(nghb_list):
		articulation_points = set([])
		visited = np.zeros(len(nghb_list))
		depth = np.zeros(len(nghb_list))
		low = np.zeros(len(nghb_list))
		parent = -np.ones(len(nghb_list))
		getArticulationPoints_in_c(0, 0, articulation_points,
		visited, depth, low, parent, nghb_list)
		return articulation_points

		cdef set getArticulationPoints_in_c(int i, int d, articulation_points,
		visited, depth, low, parent, nghb_list):
		visited[i] = 1
		depth[i] = d
		low[i] = d
		cdef int childCount = 0
		cdef int isArticulation = 0
		for j in range(len(nghb_list[i])):
		neighborID = nghb_list[i][j]
		if not visited[neighborID]:
		parent[neighborID] = i
		getArticulationPoints_in_c(neighborID, d+1, articulation_points, visited,
		depth, low, parent, nghb_list)
		childCount = childCount + 1
		if low[neighborID] >= depth[i]:
		isArticulation = 1
		low[i] = min(low[i], low[neighborID])
		elif neighborID != parent[i]:
		low[i] = min(low[i], depth[neighborID])
		if ((parent[i]!=-1 and isArticulation) or (parent[i]==-1 and childCount>1)):
		articulation_points.add(i)
		No newline at end of file

lib/censusBlockFunctions.py

0 → 100644

+144 −0

Original line number	Diff line number	Diff line
		import geopandas as gpd
		import numpy as np
		import os

		from tqdm.auto import tqdm

		###############################################################################


		def readCBListData(cbToData, file, dataToExtract, update, path):
		fields = dataToExtract.filesToFields[file]
		filePath = os.path.join(path, "voting_data", file + "_Block.tab")
		file = open(filePath)
		keyLine = file.readline().rstrip().replace('"', '').split("\t")
		# print (keyLine)
		fieldToKeyInd = {}
		for fld in fields:
		fieldToKeyInd[fld] = keyLine.index(fld)

		cbDataKeyToInd = dataToExtract.cbDataKeyToInd
		for line in file:
		splitline = line.rstrip().replace('"', '').split("\t")
		blockKey = splitline[0]
		if not update:
		cbToData[blockKey] = np.zeros(len(cbDataKeyToInd), dtype=int)
		for fld in fieldToKeyInd:
		newInd = cbDataKeyToInd[fld]
		oldInd = fieldToKeyInd[fld]
		cbToData[blockKey][newInd] = int(splitline[oldInd])

		return cbToData


		###############################################################################


		def matchVotesToRegion(cblockShapefile, dataToExtract, path=".."):

		crs = cblockShapefile.crs
		cbToData = {}
		update = False
		for file in dataToExtract.filesToFields:
		cbToData = readCBListData(cbToData, file, dataToExtract, update, path)
		update = True

		dicts = []
		columns = []
		ind = 0
		for file, fields in dataToExtract.filesToFields.items():
		# print(filesToFields[file])
		for fld in fields:
		columns.append(fld)
		d = {k: cbToData[k][ind] for k in cbToData.keys()}
		dicts.append(d)
		ind += 1

		for c, d in zip(columns, dicts):
		cblockShapefile[c] = cblockShapefile['geoID'].map(d)

		cblockShapefile = gpd.GeoDataFrame(cblockShapefile, geometry="geometry",
		crs=crs)
		return cblockShapefile

		###############################################################################


		def interpolateData(gdfTo, gdfFrom, columns, common_part_name):
		common_frame = gdfTo[common_part_name]
		common_partitions = set(common_frame)

		for column in columns:
		gdfTo[column] = np.zeros(len(gdfTo))

		for common_partition in tqdm(common_partitions):
		subToDF = gdfTo[gdfTo[common_part_name] == common_partition]
		subFromDF = gdfFrom[gdfFrom[common_part_name] == common_partition]
		sIndex = subToDF.sindex
		print(common_partition, len(subFromDF.index))
		for ind in tqdm(subFromDF.index):
		# print("from ", subFromDF.loc[ind, "geoID"])
		from_geom = subFromDF.loc[ind]["geometry"]
		possiblePctMatches = list(sIndex.intersection(from_geom.bounds))
		possibleMatches = subToDF.iloc[possiblePctMatches]
		intersectingAreas = possibleMatches.intersection(from_geom).area
		intersectingAreas/= from_geom.area
		maxIntersection = intersectingAreas.max()

		if maxIntersection > 0:
		totalIntersectingArea = sum(intersectingAreas)
		for j_ind, area in zip(intersectingAreas.index, intersectingAreas):
		frac = area/totalIntersectingArea
		if frac == 0:
		continue
		for col in columns:
		# print(col, frac, gdfTo.loc[j_ind, "geoID"], gdfFrom.loc[ind, col])
		gdfTo.loc[j_ind, col] += frac*gdfFrom.loc[ind, col]
		else:
		geoms = subToDF['geometry']
		dists = geoms.distance(from_geom)
		j_ind = dists.idxmin()
		# print(gdfTo.loc[j_ind, "geoID"])
		for col in columns:
		gdfTo.loc[j_ind, col] += gdfFrom.loc[ind, col]
		# break
		# break
		return
		# small_gdf[large_name] = small_gdf.index.map(lambda x: smIndexToLg[x])
		# return small_gdf
		# return cblockShapefile

		###############################################################################


		def determinePlacePopulationData(gdfSF, placeSF, popField, placeNameField,
		newplaceNameField=None, gdfSFIndex=None,
		tol=0.01):
		if gdfSFIndex is None:
		gdfSFIndex = gdfSF.sindex
		if newplaceNameField is None:
		newplaceNameField = placeNameField

		indexToPlaceData = {ind: {} for ind in gdfSF.index}

		for ind in tqdm(placeSF.index):
		place_geom = placeSF.loc[ind, "geometry"]
		possibleMatches = list(gdfSFIndex.intersection(place_geom.bounds))
		possibleMatches = gdfSF.iloc[possibleMatches].intersection(place_geom)
		matches = possibleMatches[possibleMatches.area != 0].area
		matches /= matches.index.map(lambda i: gdfSF.loc[i, "geometry"].area)
		matches = matches[matches > tol]
		matches *= matches.index.map(lambda i: gdfSF.loc[i, popField])
		matches = matches[matches > tol]
		# tmpDict = dict(zip(matches.index, matches))
		tmpDict = dict(zip(matches.index,
		[{placeSF.loc[ind, placeNameField]: match}
		for match in matches]))
		for ii in tmpDict:
		indexToPlaceData[ii].update(tmpDict[ii])
		# print(tmpDict.keys())
		# break
		gdfSF[newplaceNameField] = gdfSF.index.map(lambda ii: indexToPlaceData[ii])


		###############################################################################

lib/clusterFunctions.py

0 → 100644

+152 −0

Original line number	Diff line number	Diff line
		# local libraries
		from importlib import reload

		import countyDataToExtract as cnDExt
		import dataPaths as dP
		reload(cnDExt)
		reload(dP)
		from countyDataToExtract import *
		from dataPaths import *

		################################################################################

		def getCountyGranularity():

		countyGranularity = {}

		countyFIPFileStr = os.path.join(NCDataPath, "StateData",
		"CountyFIPsCodes.txt")
		with open(countyFIPFileStr) as c2FIP:
		for line in c2FIP:
		cnty = line.rstrip().split("\t")[0].upper().replace(" ", "")
		countyGranularity[cnty] = "C"

		clusterHDir = os.path.join(clusterPath, "ClusterHouse/")
		clusters = os.listdir(clusterHDir)
		clusters = [c for c in clusters if "." not in c]
		clusterSDir = os.path.join(clusterPath, "ClusterSenate")
		clustersSenate = os.listdir(clusterSDir)
		for c in clustersSenate:
		clusters.append(c)

		for c in clusters:
		cSplit = c.split("_")
		i = 0
		while i < len(cSplit)-1:
		if cSplit[i+1]=="P":
		countyGranularity[cSplit[i].upper()] = cSplit[i+1]
		i += 2
		return countyGranularity

		################################################################################

		def buildIdMap(clusterDir):
		cluster = clusterDir.split(os.sep)[-1]
		geoIDPath = os.path.join(clusterDir, cluster + "_GEOIDS.txt")

		geoIDToFID = {}
		fidToGeoID = {}

		with open(geoIDPath) as geoIDFile:
		for line in geoIDFile:
		splitline = line.rstrip().split("\t")
		geoID = splitline[1]
		fid = int(splitline[0])
		geoIDToFID[geoID] = fid
		fidToGeoID[fid] = geoID
		return [geoIDToFID, fidToGeoID]

		################################################################################

		def getGeoIDToData(dataDesc, countyToLevel, warn = True):
		geoIDToData = {}
		for county in countyToLevel:
		level = countyToLevel[county]
		dataPath = os.path.join(dataOutPath, level, county.upper(),
		"Votes_" + dataDesc + ".txt")
		with open(dataPath) as dFile:
		for line in dFile:
		splitline = line.rstrip().split("\t")
		geoID = splitline[0]
		data = splitline[1:]
		geoIDToData[geoID] = data
		if data[0][0] == "0" and warn:
		print("WARNING:: ", dataDesc, county, level, geoID)
		return geoIDToData

		################################################################################

		def writeData(dataDesc, fidToGeoID, geoIDToData, clusterDir):
		cluster = clusterDir.split(os.sep)[-1]
		outPath = os.path.join(clusterDir, cluster + "_" + dataDesc + ".txt")

		with open(outPath, "w") as outFile:
		for fid in fidToGeoID:
		geoID = fidToGeoID[fid]
		try:
		data = geoIDToData[geoID]
		outFile.write(str(fid) + "\t" + "\t".join(data) + "\n")
		except:
		print("WARNING:: no data for geoID", geoID, "in cluster",
		cluster)
		key = list(geoIDToData.keys())[0]
		data = geoIDToData[key]
		for ii in range(len(data)):
		data[ii] = "0"
		outFile.write(str(fid) + "\t" + "\t".join(data) + "\n")


		################################################################################

		def extractDataPrecintLevel(eD):
		clusterDir = os.path.join(clusterPath, eD)
		clusters = [c for c in os.listdir(clusterDir) if c[0] != "."]
		for cluster in clusters:
		print(cluster)
		curClusterPath = os.path.join(clusterDir, cluster)
		[geoIDToFID, fidToGeoID] = buildIdMap(curClusterPath)
		countyToLevel = {}
		clustSplit = cluster.split("_")
		ii = 0
		while ii < len(clustSplit)-1:
		c = clustSplit[ii]
		if clustSplit[ii+1] == "P":
		countyToLevel[c] = "Precinct"
		else:
		countyToLevel[c] = "County"
		ii += 2
		for dataDesc in dataDescs:
		if dataDesc[-1] == "_":
		dataDesc = dataDesc[:-1]
		geoIDToData = getGeoIDToData(dataDesc, countyToLevel)
		writeData(dataDesc, fidToGeoID, geoIDToData, curClusterPath)

		################################################################################

		def extractDataCBlockLevel(eD):
		clusterDir = os.path.join(clusterPath, eD + "CB")
		clusters = [c for c in os.listdir(clusterDir) if c[0] != "."]
		for cluster in clusters:
		clusterParsed = cluster.replace("CensusBlock", "_CB_")
		clusterParsed = clusterParsed.replace("County", "_C_")
		print(clusterParsed)
		curClusterPath = os.path.join(clusterDir, cluster)
		[geoIDToFID, fidToGeoID] = buildIdMap(curClusterPath)
		countyToLevel = {}
		clustSplit = clusterParsed.split("_")
		ii = 0
		while ii < len(clustSplit)-1:
		c = clustSplit[ii]
		if clustSplit[ii+1] == "CB":
		countyToLevel[c] = "CBlock"
		else:
		countyToLevel[c] = "County"
		ii += 2
		for dataDesc in dataDescs:
		if dataDesc[-1] == "_":
		dataDesc = dataDesc[:-1]
		geoIDToData = getGeoIDToData(dataDesc, countyToLevel, warn = False)
		writeData(dataDesc, fidToGeoID, geoIDToData, curClusterPath)

		################################################################################

lib/coi_extraction.py

0 → 100644

+23 −0

Original line number	Diff line number	Diff line
		import fiona
		import geopandas as gpd
		import networkx as nx
		import networkx.algorithms as nx_alg
		import numpy as np
		import pandas as pd
		import shapely

		from tqdm.auto import tqdm

		# local libraries
		from importlib import reload

		import countyFunctions as cnF
		import shapefileToGraph
		reload(cnF)
		reload(shapefileToGraph)

		import warnings; warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning)

		###############################################################################