Commit 9945d351 authored by Jonathan Mattingly's avatar Jonathan Mattingly
Browse files

add readme for shapefiles and CT script

parent 006ab2d1
Loading
Loading
Loading
Loading
+87 −2
Original line number Diff line number Diff line
%% Cell type:code id: tags:

``` python
import geopandas as gpd
import numpy as np
import pandas as pd
import os
import shapely
import sys
sys.path.append("../lib")

from tqdm import tqdm

############ local libraries #################
from importlib import reload

import censusBlockFunctions as cbF
import countyFunctions as cnF
import hierarchy
import reader
import shapefileToGraph
import writer
reload(cbF)
reload(cnF)
reload(hierarchy)
reload(reader)
reload(shapefileToGraph)
reload(writer)
##############################################
```

%% Output

    <module 'writer' from '/Users/g/Desktop/mapprocessing/scripts/../lib/writer.py'>
    /Users/jonm/Git/Greg/MapProcessing/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
      from .autonotebook import tqdm as notebook_tqdm

    <module 'writer' from '/Users/jonm/Git/Greg/MapProcessing/scripts/../lib/writer.py'>

%% Cell type:code id: tags:

``` python
stateAbv = "CT"
dists = 5

precinctShapefile = gpd.read_file("../shapefiles/CT/ct_vest_20") # https://redistrictingdatahub.org/data/download-data/
cblockShapefile = gpd.read_file("../shapefiles/CT/tl_rd22_09_tabblock20") # https://www2.census.gov/geo/tiger/TIGER2024/TABBLOCK20/
cblockShapefile = gpd.read_file("../shapefiles/CT/tl_2025_09_tabblock20") # https://www2.census.gov/geo/tiger/TIGER2025/TABBLOCK20/tl_2025_09_tabblock20.zip

reload(cnF)
cnF.determineCountyIDMaps("data", path="..")
cblockShapefile["COUNTY"] = cblockShapefile["COUNTYFP20"].apply(lambda g: cnF.countyName(g, stateID="09"))
precinctShapefile["COUNTY"] = precinctShapefile["COUNTYFP20"].apply(lambda g: cnF.countyName(g, stateID="09"))
```

%% Output

    ---------------------------------------------------------------------------
    DataSourceError                           Traceback (most recent call last)
    Cell In[2], line 5
          2 dists = 5
          4 precinctShapefile = gpd.read_file("../shapefiles/CT/ct_vest_20") # https://redistrictingdatahub.org/data/download-data/
    ----> 5 cblockShapefile = gpd.read_file("../shapefiles/CT/tl_rd22_09_tabblock20") # https://www2.census.gov/geo/tiger/TIGER2024/TABBLOCK20/
          7 reload(cnF)
          8 cnF.determineCountyIDMaps("data", path="..")
    File ~/Git/Greg/MapProcessing/.venv/lib/python3.13/site-packages/geopandas/io/file.py:316, in _read_file(filename, bbox, mask, columns, rows, engine, **kwargs)
        313             filename = response.read()
        315 if engine == "pyogrio":
    --> 316     return _read_file_pyogrio(
        317         filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
        318     )
        320 elif engine == "fiona":
        321     if pd.api.types.is_file_like(filename):
    File ~/Git/Greg/MapProcessing/.venv/lib/python3.13/site-packages/geopandas/io/file.py:576, in _read_file_pyogrio(path_or_bytes, bbox, mask, rows, **kwargs)
        567     warnings.warn(
        568         "The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
        569         "will be removed in a future release. You can use the 'columns' keyword "
       (...)    572         stacklevel=3,
        573     )
        574     kwargs["columns"] = kwargs.pop("include_fields")
    --> 576 return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
    File ~/Git/Greg/MapProcessing/.venv/lib/python3.13/site-packages/pyogrio/geopandas.py:275, in read_dataframe(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)
        270 if not use_arrow:
        271     # For arrow, datetimes are read as is.
        272     # For numpy IO, datetimes are read as string values to preserve timezone info
        273     # as numpy does not directly support timezones.
        274     kwargs["datetime_as_string"] = True
    --> 275 result = read_func(
        276     path_or_buffer,
        277     layer=layer,
        278     encoding=encoding,
        279     columns=columns,
        280     read_geometry=read_geometry,
        281     force_2d=gdal_force_2d,
        282     skip_features=skip_features,
        283     max_features=max_features,
        284     where=where,
        285     bbox=bbox,
        286     mask=mask,
        287     fids=fids,
        288     sql=sql,
        289     sql_dialect=sql_dialect,
        290     return_fids=fid_as_index,
        291     **kwargs,
        292 )
        294 if use_arrow:
        295     import pyarrow as pa
    File ~/Git/Greg/MapProcessing/.venv/lib/python3.13/site-packages/pyogrio/raw.py:198, in read(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)
         59 """Read OGR data source into numpy arrays.
         60
         61 IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
       (...)    194
        195 """
        196 dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
    --> 198 return ogr_read(
        199     get_vsi_path_or_buffer(path_or_buffer),
        200     layer=layer,
        201     encoding=encoding,
        202     columns=columns,
        203     read_geometry=read_geometry,
        204     force_2d=force_2d,
        205     skip_features=skip_features,
        206     max_features=max_features or 0,
        207     where=where,
        208     bbox=bbox,
        209     mask=_mask_to_wkb(mask),
        210     fids=fids,
        211     sql=sql,
        212     sql_dialect=sql_dialect,
        213     return_fids=return_fids,
        214     dataset_kwargs=dataset_kwargs,
        215     datetime_as_string=datetime_as_string,
        216 )
    File pyogrio/_io.pyx:1313, in pyogrio._io.ogr_read()
    File pyogrio/_io.pyx:227, in pyogrio._io.ogr_open()
    DataSourceError: '../shapefiles/CT/tl_rd22_09_tabblock20' not recognized as being in a supported file format.; It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.

%% Cell type:code id: tags:

``` python
cblockShapefile = cblockShapefile.to_crs("ESRI:102656")
precinctShapefile = precinctShapefile.to_crs("ESRI:102656")
```

%% Cell type:markdown id: tags:

## Assign census blocks to precincts

%% Cell type:code id: tags:

``` python
precinctShapefile.NAME20 += "_" +precinctShapefile.COUNTY
precinctShapefile["NAME"] = precinctShapefile["NAME20"]
# precinctShapefile["PCTID"] = precinctShapefile.GEOID20
precinctShapefile.COUNTY = precinctShapefile.COUNTY.map(lambda c: c.upper())
```

%% Cell type:code id: tags:

``` python
pctField = "NAME"
```

%% Cell type:code id: tags:

``` python
reload(hierarchy)
cblockFile = "cblocksWPct2020_" + stateAbv
filePath = os.path.join("..", "shapefiles", "derived", cblockFile + ".shp")
if os.path.exists(filePath):
    cblockShapefile = gpd.read_file(filePath)
else:
    cblockShapefile = hierarchy.smallUnitsToPartition(cblockShapefile, precinctShapefile,
                                                      pctField, "COUNTY")
#     writer.writeShapefile(cblockShapefile, filePath)
```

%% Output

    100%|██████████| 8/8 [00:41<00:00,  5.19s/it]

%% Cell type:markdown id: tags:

## Clean precinct noise/small discontinuities

%% Cell type:code id: tags:

``` python
cblockShapefile = hierarchy.cleanSmallPartitionDiscontinuity(cblockShapefile,
                                                             ["COUNTY", pctField],
                                                             "POP20")
```

%% Output

    100%|██████████| 43/43 [00:00<00:00, 53.85it/s]

%% Cell type:code id: tags:

``` python
# cblockFile = "cblocksDenoise_" + stateAbv
# filePath = os.path.join("..", "shapefiles", "derived", cblockFile + ".shp")
# if os.path.exists(filePath):
#     cblockShapefile = gpd.read_file(filePath)
# else:
#     writer.writeShapefile(cblockShapefile, filePath)
```

%% Cell type:markdown id: tags:

## Merge or split multipolyginol precincts

%% Cell type:code id: tags:

``` python
# cblockShapefile.NAME = cblockShapefile.NAME.map(lambda n: n if "_Split" not in n
#                                                             else "_".join(n.split("_")[:-1]))
```

%% Cell type:code id: tags:

``` python
reload(hierarchy)
# To split discontinuous precincts
# cblockShapefile = hierarchy.splitSignificantPartitionDiscontinuity(cblockShapefile, ["COUNTY", "NAME"],
#                                                                    "POP20")

# To merge discontinous precincts with neighbors
# reload(hierarchy)
cblockShapefile = hierarchy.mergeSignificantPartitionDiscontinuity(cblockShapefile,
                                                                   ["COUNTY", pctField],
                                                                   "POP20")
```

%% Output

    100%|██████████| 739/739 [00:03<00:00, 195.45it/s]
    100%|██████████| 42/42 [00:00<00:00, 54.89it/s]

%% Cell type:code id: tags:

``` python
pctSF = cblockShapefile.dissolve(by=["COUNTY", pctField], aggfunc='sum')
pctSF = pctSF.reset_index()
split_regions = pctSF[pctSF.geometry.map(lambda g: isinstance(g,
                                         shapely.geometry.multipolygon.MultiPolygon))]
assert len(split_regions) == 0
# cntySF = pctSF.dissolve(by = "COUNTY", aggfunc = "sum")
# cntySF = cntySF.reset_index()
# split_regions = cntySF[cntySF.geometry.map(lambda g: isinstance(g,
#                                            shapely.geometry.multipolygon.MultiPolygon))]
# assert len(split_regions) == 0
```

%% Cell type:code id: tags:

``` python
len(pctSF[pctSF["NAME"].str.contains(",")])
```

%% Output

    32

%% Cell type:code id: tags:

``` python
len(pctSF), len(precinctShapefile)
```

%% Output

    (696, 741)

%% Cell type:code id: tags:

``` python
max(list(pctSF.POP20)), sum(pctSF.POP20)/dists, len(pctSF)
```

%% Output

    (26970, 721188.8, 696)

%% Cell type:code id: tags:

``` python
cblockFile = "cblocksMergeMultiPcts_" + stateAbv
filePath = os.path.join("..", "shapefiles", "derived", cblockFile + ".shp")
if os.path.exists(filePath):
    cblockShapefile = gpd.read_file(filePath)
else:
    writer.writeShapefile(cblockShapefile, filePath)
```

%% Cell type:markdown id: tags:

## Find biconnected components and merge

%% Cell type:code id: tags:

``` python
import pyximport; pyximport.install(reload_support=True, language_level=3)
import biconnectedComponents
```

%% Output

    ld: warning: duplicate -rpath '/Users/g/anaconda3/envs/gis/lib' ignored

%% Cell type:code id: tags:

``` python
import pyximport; pyximport.install(reload_support=True, language_level=3)
import biconnectedComponents

adjacency = shapefileToGraph.findAdjacency(pctSF)
adjacency_nobdr = [e for e in adjacency if -1 not in e]
nbrList = shapefileToGraph.neighborList(pctSF, adjacency_nobdr)
articulationPoints = biconnectedComponents.getArticulationPoints_cdef(nbrList)
```

%% Output

    100%|██████████| 696/696 [00:03<00:00, 192.79it/s]

%% Cell type:code id: tags:

``` python
articulationPoints
```

%% Output

    {77}

%% Cell type:code id: tags:

``` python
reload(hierarchy)
pctSF = hierarchy.mergeArticulationDict(pctSF, adjacency, articulationPoints,
                                                  ["COUNTY", pctField],
                                                  "POP20")
```

%% Output

    100%|██████████| 1/1 [00:00<00:00, 376.68it/s]

%% Cell type:code id: tags:

``` python
pctSF = pctSF.dissolve(by = ["COUNTY", pctField], aggfunc='sum')
pctSF = pctSF.reset_index()
len(pctSF)
```

%% Output

    695

%% Cell type:code id: tags:

``` python
adjacency_check = [e for e in shapefileToGraph.findAdjacency(pctSF)
                   if -1 not in e]
nbrList_check = shapefileToGraph.neighborList(pctSF, adjacency_check)
articulationPoints = biconnectedComponents.getArticulationPoints_cdef(nbrList_check)
assert len(articulationPoints) == 0
```

%% Output

    100%|██████████| 695/695 [00:03<00:00, 181.33it/s]

%% Cell type:markdown id: tags:

## Add votes

%% Cell type:code id: tags:

``` python
precinctShapefile.columns
```

%% Output

    Index(['STATEFP20', 'COUNTYFP20', 'NAME20', 'G20PREDBID', 'G20PRERTRU',
           'G20PRELJOR', 'G20PREGHAW', 'G20PREOWRI', 'geometry', 'COUNTY', 'NAME'],
          dtype='object')

%% Cell type:code id: tags:

``` python
dr = dict(zip(precinctShapefile[pctField], precinctShapefile.G20PRERTRU))
dd = dict(zip(precinctShapefile[pctField], precinctShapefile.G20PREDBID))

def getv(name, votedict):
    split_name = name.split(",")
    return sum([votedict[n] for n in split_name])


pctSF["G20PREDEM"] = pctSF[pctField].map(lambda n: getv(n, dd))
pctSF["G20PREREP"] = pctSF[pctField].map(lambda n: getv(n, dr))
```

%% Cell type:code id: tags:

``` python
pctSF.columns
```

%% Output

    Index(['COUNTY', 'NAME', 'geometry', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20',
           'BLOCKCE20', 'GEOID20', 'NAME20', 'MTFCC20', 'UR20', 'UACE20',
           'UATYPE20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20',
           'INTPTLON20', 'HOUSING20', 'POP20', 'G20PREDEM', 'G20PREREP'],
          dtype='object')

%% Cell type:code id: tags:

``` python
pctSF.drop(["ALAND20", "AWATER20", "HOUSING20"], axis=1, inplace=True)
```

%% Cell type:code id: tags:

``` python
# pctSF.to_file("../shapefiles/derived/pctSF_"+ stateAbv)
```

%% Cell type:markdown id: tags:

## Extract precinct graphs in various formats

%% Cell type:code id: tags:

``` python
reload(writer)
outPath = os.path.join("..", "graph", stateAbv+"_pct20.json")
pctSF["area"] = pctSF.geometry.area
writer.saveGerrychainJSON(pctSF, outPath, stateAbv)
```

%% Output

    100%|██████████| 695/695 [00:03<00:00, 189.34it/s]

%% Cell type:code id: tags:

``` python
import networkx as nx
G = nx.Graph()
for e in adjacency_check:
    if -1 in list(e):
        continue
    n1, n2 = list(e)
    G.add_edge(n1, n2)
assert nx.is_connected(G)
```

%% Cell type:code id: tags:

``` python
pctSF
```

%% Output

                   COUNTY                                               NAME  \
    0    FAIRFIELD COUNTY                     Bethel 001-00_FAIRFIELD COUNTY
    1    FAIRFIELD COUNTY  Bethel 002-00_FAIRFIELD COUNTY,Bethel 005-00_F...
    2    FAIRFIELD COUNTY                     Bethel 003-00_FAIRFIELD COUNTY
    3    FAIRFIELD COUNTY                     Bethel 004-00_FAIRFIELD COUNTY
    4    FAIRFIELD COUNTY                 Bridgeport 124-01_FAIRFIELD COUNTY
    ..                ...                                                ...
    690    WINDHAM COUNTY                      Windham 005-00_WINDHAM COUNTY
    691    WINDHAM COUNTY                      Windham 006-00_WINDHAM COUNTY
    692    WINDHAM COUNTY                      Windham 007-00_WINDHAM COUNTY
    693    WINDHAM COUNTY                      Windham 008-00_WINDHAM COUNTY
    694    WINDHAM COUNTY                    Woodstock 001-00_WINDHAM COUNTY
    
                                                  geometry  \
    0    POLYGON ((818410.486 696684.981, 818364.897 69...
    1    POLYGON ((821641.904 703498.760, 821691.266 70...
    2    POLYGON ((821515.450 691432.947, 821385.981 69...
    3    POLYGON ((813155.775 691075.367, 812865.915 69...
    4    POLYGON ((881253.707 633360.851, 881224.418 63...
    ..                                                 ...
    690  POLYGON ((1144857.467 820824.848, 1144709.740 ...
    691  POLYGON ((1146143.052 819308.868, 1146145.577 ...
    692  POLYGON ((1161096.102 801575.944, 1160371.613 ...
    693  POLYGON ((1140236.769 820508.328, 1140009.976 ...
    694  POLYGON ((1177534.849 898932.087, 1177512.613 ...
    
                                                 STATEFP20  \
    0             0909090909090909090909090909090909090909
    1    0909090909090909090909090909090909090909090909...
    2    0909090909090909090909090909090909090909090909...
    3    0909090909090909090909090909090909090909090909...
    4                           09090909090909090909090909
    ..                                                 ...
    690  0909090909090909090909090909090909090909090909...
    691  0909090909090909090909090909090909090909090909...
    692  0909090909090909090909090909090909090909090909...
    693       09090909090909090909090909090909090909090909
    694  0909090909090909090909090909090909090909090909...
    
                                                COUNTYFP20  \
    0    0010010010010010010010010010010010010010010010...
    1    0010010010010010010010010010010010010010010010...
    2    0010010010010010010010010010010010010010010010...
    3    0010010010010010010010010010010010010010010010...
    4              001001001001001001001001001001001001001
    ..                                                 ...
    690  0150150150150150150150150150150150150150150150...
    691  0150150150150150150150150150150150150150150150...
    692  0150150150150150150150150150150150150150150150...
    693  0150150150150150150150150150150150150150150150...
    694  0150150150150150150150150150150150150150150150...
    
                                                 TRACTCE20  \
    0    2003012002002002002002002002002002002002002003...
    1    2003012003012003012003012003012003012003012003...
    2    2003022003022003022002002003022003022003022003...
    3    2003022001002001002001002002002001002002002001...
    4    0734000734000734000735000734000734000734000734...
    ..                                                 ...
    690  8006008003008003008007008007008007008003008003...
    691  8007008004008006008004008007008004008007008004...
    692  8005028005028005028005028005028005018005028005...
    693  8003008004008004008003008003008003008003008003...
    694  9011019011029011029011029011029011029011019011...
    
                                                 BLOCKCE20  \
    0    2019100710001010100510124001201340031006100310...
    1    2018300210253008101110212005102830142020100010...
    2    1007400240043005100210034006101210083006101340...
    3    2000100920151005400410034000200010101002100710...
    4    2007200120001002200320022005200810001001200620...
    ..                                                 ...
    690  3008200620031007200230063004300330091011302110...
    691  3013100620113006101910071016100410053003101230...
    692  3016200120033025101910042007100110002002302120...
    693  1008300430011018202220141005100610073016300720...
    694  1030104530121016103130041017202030091039105610...
    
                                                   GEOID20  \
    0    0900120030120190900120020010070900120020010000...
    1    0900120030120180900120030130020900120030110250...
    2    0900120030210070900120030240020900120030240040...
    3    0900120030220000900120010010090900120010020150...
    4    0900107340020070900107340020010900107340020000...
    ..                                                 ...
    690  0901580060030080901580030020060901580030020030...
    691  0901580070030130901580040010060901580060020110...
    692  0901580050230160901580050220010901580050220030...
    693  0901580030010080901580040030040901580040030010...
    694  0901590110110300901590110210450901590110230120...
    
                                                    NAME20  \
    0    Block 2019Block 1007Block 1000Block 1010Block ...
    1    Block 2018Block 3002Block 1025Block 3008Block ...
    2    Block 1007Block 4002Block 4004Block 3005Block ...
    3    Block 2000Block 1009Block 2015Block 1005Block ...
    4    Block 2007Block 2001Block 2000Block 1002Block ...
    ..                                                 ...
    690  Block 3008Block 2006Block 2003Block 1007Block ...
    691  Block 3013Block 1006Block 2011Block 3006Block ...
    692  Block 3016Block 2001Block 2003Block 3025Block ...
    693  Block 1008Block 3004Block 3001Block 1018Block ...
    694  Block 1030Block 1045Block 3012Block 1016Block ...
    
                                                   MTFCC20  \
    0    G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    1    G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    2    G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    3    G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    4    G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    ..                                                 ...
    690  G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    691  G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    692  G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    693  G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    694  G5040G5040G5040G5040G5040G5040G5040G5040G5040G...
    
                                                      UR20  \
    0                                 UUUUUUUUUUUUUUUUUUUU
    1    UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    2    URRUUURRUURRUUUUUUUUUUUUUUUUURUUUUURUURUUUUUUU...
    3    UURUUUUUUUUUUUUUUUUUUUUUUUURURUUUUUUUUUUUUUUUU...
    4                                        UUUUUUUUUUUUU
    ..                                                 ...
    690  UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    691  UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    692  RRURUUURUURUURRRUURRRURRUUURRURRRRRURRRRRURURR...
    693                             UUUUURURRRUUUUUUURRRRU
    694  RRURRRRRRRRRRRRRRRRRRRURRRRRRRRRRRRRRRRRRRRURR...
    
                                                    UACE20  \
    0    2209622096220962209622096220962209622096220962...
    1    2209622096220962209622096220962209622096220962...
    2    2209622096220962209622096220962209622096220962...
    3    2209622096220962209622096220962209622096220962...
    4    1016210162101621016210162101621016210162101621...
    ..                                                 ...
    690  9559095590955909559095590955909559095590955909...
    691  9559095590955909559095590955909559095590955909...
    692  9559095590955909559095590955909559095590955909...
    693  9559095590955909559095590955909559095590955909...
    694  7286872868728687286872868728687286872868728687...
    
                                                  UATYPE20  \
    0                                 UUUUUUUUUUUUUUUUUUUU
    1    UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    2           UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
    3    UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    4                                        UUUUUUUUUUUUU
    ..                                                 ...
    690  UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    691  UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU...
    692                   UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
    693                                     UUUUUUUUUUUUUU
    694                                    UUUUUUUUUUUUUUU
    
                                                FUNCSTAT20  \
    0                                 SSSSSSSSSSSSSSSSSSSS
    1    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    2    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    3    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    4                                        SSSSSSSSSSSSS
    ..                                                 ...
    690  SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    691  SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    692  SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    693                             SSSSSSSSSSSSSSSSSSSSSS
    694  SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS...
    
                                                INTPTLAT20  \
    0    +41.3908647+41.3878371+41.3932749+41.3838596+4...
    1    +41.3910143+41.4154880+41.4179660+41.4057285+4...
    2    +41.3608019+41.3607929+41.3606207+41.3724732+4...
    3    +41.3602695+41.3770989+41.3581148+41.3791469+4...
    4    +41.2002753+41.2022561+41.2056116+41.1984986+4...
    ..                                                 ...
    690  +41.7155620+41.7164764+41.7159123+41.7274243+4...
    691  +41.7164456+41.7081731+41.7107981+41.7089858+4...
    692  +41.6886249+41.7401999+41.7519934+41.6663504+4...
    693  +41.7251206+41.7087923+41.7109068+41.7200344+4...
    694  +41.9455386+41.9565782+41.9260304+41.9784916+4...
    
                                                INTPTLON20  POP20  G20PREDEM  \
    0    -073.4038721-073.4159745-073.4090007-073.41683...   3077        920
    1    -073.3948466-073.3929670-073.3993009-073.39730...   5221       1613
    2    -073.3834793-073.3636509-073.3785212-073.40645...   6293       2064
    3    -073.4139666-073.4314266-073.4293577-073.42678...   5767       1673
    4    -073.1826051-073.1801577-073.1814102-073.18216...   1580        332
    ..                                                 ...    ...        ...
    690  -072.2140220-072.2223063-072.2284299-072.20770...   8519       1456
    691  -072.2108746-072.2021979-072.2083136-072.21785...   7165       1692
    692  -072.1481711-072.1374199-072.1507972-072.13319...   3950       1243
    693  -072.2459741-072.2295604-072.2251597-072.24319...   1282        339
    694  -072.0754769-071.9953641-071.9306718-072.05957...   8221       2548
    
         G20PREREP          area
    0          520  3.626240e+07
    1         1425  1.003939e+08
    2         1656  2.185647e+08
    3         1133  1.191771e+08
    4           87  3.534818e+06
    ..         ...           ...
    690        563  4.096040e+07
    691        745  4.738276e+07
    692       1025  4.988012e+08
    693        130  3.733801e+07
    694       2520  1.724802e+09
    
    [695 rows x 20 columns]

%% Cell type:code id: tags:

``` python
```
+1 −1
Original line number Diff line number Diff line
# Download and Unzip Shapefile

You can download the shapefile  `ct_vest_20.zip` from the following URL:
[https://redistrictingdatahub.org/data/download-data/](https://redistrictingdatahub.org/data/download-data/)
[ct_vest_20.zip](https://redistrictingdatahub.org/dataset/vest-2020-connecticut-precinct-and-election-results/)


Unzip the archive and place its contents in this directory. The contains should contain following files:
+11 −0
Original line number Diff line number Diff line
# Download and Unzip Shapefile

Download the shapefile `tl_2025_09_tabblock20.zip` from the following URL:
[tl_2025_09_tabblock20.zip](https://www2.census.gov/geo/tiger/TIGER2025/TABBLOCK20/tl_2025_09_tabblock20.zip)


Unzip the archive `tl_2025_09_tabblock20.zip)` and place its contents in this directory. The contains should contain following files:

```{.sh}

```
 No newline at end of file
+1 −1
Original line number Diff line number Diff line
# Download and Unzip Shapefile

You can download the shapefile  `tx_vest_20.zip` from the following URL:
[https://www2.census.gov/geo/tiger/TIGER2024/TABBLOCK20/](https://www2.census.gov/geo/tiger/TIGER2024/TABBLOCK20/)
[tx_vest_20.zip](https://redistrictingdatahub.org/dataset/vest-2020-texas-precinct-boundaries-and-election-results/)


Unzip the archive and place its contents in this directory. The contains should contain following files: