Invocation notes for some useful patterns to me.
– Local system utilities –
Password generation
openssl rand -base64 $numCharacters
Split a newline delimited file
zcat $VAL.gz | split - -l 1500000 --filter='gzip > $FILE.gz' $VAL.gz.part.
Sum across file size for all files matching a glob pattern
find . --name "*.normed.gz" -ls | awk '{total += $7} END {print total}'
Count lines in gzipped file
gzcat filename.gz | wc -l
Local machine activity
top -o cpu
top -o mem
nettop
Converting images
From .avif to .png:
magick mogrify -format png -depth 10 -define heic:speed=2 *.avif
– Google Cloud –
vim ~/.config/gcloud
gcloud config configurations list
gcloud config configurations activate $acct
gcloud compute ssh $machine
gcloud compute scp localfile $machine:~
gsutil cp gs://path/to/file local.file
And for GCP auth debugging:
import google.auth
creds, project = google.auth.default()
print(creds.service_account_email)
Bigtable
cbt ls
cbt ls $table
cbt -project $p -instance $i count $table
cbt -project $p -instance $i read $table prefix='' count=10
BigQuery
ARRAY(SELECT JSON_VALUE(singleton, '$.name') FROM UNNEST(JSON_QUERY_ARRAY(jsonBlob, '$')) singleton) AS fieldValuesWithinRepeatedJsonSchema
ARRAY(SELECT value FROM UNNEST(keyValuePairs) WHERE key = "Date" ORDER BY value ASC)[SAFE_OFFSET(0)] AS earliestDate
ARRAY(SELECT x FROM UNNEST([itemThatMightBeNull, otherItemThatMightBeNull]) x WHERE x is NOT NULL)
ARRAY(SELECT s.path.to.field FROM UNNEST(elements) s WHERE s.property IS NOT NULL and s.otherProperty = "value")
NET.IP_FROM_STRING, NET.IPV4_to_INT64, NET.IP_TRUNC, NET.PUBLIC_SUFFIX, NET.REG_DOMAIN, NET.HOST
– Docker –
docker run -e env_var=$value \\
-v /Users/path/to/Documents/some/folder/to/mount:/user \\
--publish $hostLocalPort:$containerizedServicePort \\
--name $yourChoice $imageIdentifier
docker run -it --entrypoint /bin/bash $imageName:$tag
To mount GCP credentials:
-v /path/to/svc-acct.json:/mnt/workspace/gcloud_creds.json -e GOOGLE_APPLICATION_CREDENTIALS=/mnt/workspace/gcloud_creds.json
To access a service on the host running at localhost:1234, use http://host.docker.internal:1234
.
Access a database that is portforwarded to localhost on host from within container:
postgres://$USER:$PASS@docker.for.mac.host.internal:$localPort/$dbName
Kill Docker:
pkill -SIGHUP -f /Applications/Docker.app/ 'docker serve'
– Kubernetes –
Port forwarding:
kubectl --namespace=$namespace port-forward svc/$serviceName $localPort:$remotePort
– .ssh/config –
Host $location-or-*
User $name
HostName $ip
LocalForward $localPort fully.qualified.domain.name:$remotePort
IdentityFile ~/.ssh/id_rsa
IgnoreUnknown UseKeychain AddKeysToAgent
UseKeychain yes
AddKeysToAgent yes
– Python –
pyenv
pyenv install 3.11
pyenv versions
pyenv local|global|shell 3.11
Environment management
poetry | pipenv | isolated conda env with pip | |
---|---|---|---|
Initialize | poetry init |
(pipenv install any library to create a Pipfile) |
conda create -n $name python=3.$version -yes |
Add dependency | poetry add $library [--group $group] |
pipenv install $library |
pip install $library |
Add dependency to next major version | poetry add $library@^2.0.5 |
pipenv install $library~=2.0.5 |
N/A (no ability to specify) |
Update dependencies | poetry update |
pipenv install |
N/A (cannot update all in one command) |
Update a dependency | poetry update $library |
pipenv update $library (side-effect: updates all) |
pip install $library --upgrade |
Create a lockfile | poetry lock [--no-update] |
pipenv lock |
N/A |
Start shell | poetry shell |
pipenv shell |
conda activate $name |
Run in environment | poetry run python $script.py |
pipenv run python $script.py |
N/A |
Install from lockfile | poetry install |
pipenv sync |
N/A |
Install in developer mode (with pointers) | N/A (included in poetry install ) |
pipenv install -e .[all] |
pip install -e .[all] |
New project configuration
To create a set of “dev-only” dependencies in poetry (all group dependencies get installed unless they are explicitly marked optional):
poetry add --group dev $packageName
Makefile
ROOTDIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
.PHONY: format
format:
cd ${ROOTDIR}; python -m isort . --profile black
cd ${ROOTDIR}; python -m black --target-version py39 .
cd ${ROOTDIR}; nbstripout notebook/*
.PHONY: lint
lint:
cd ${ROOTDIR}; python -m mypy --install-types --non-interactive --check-untyped-defs .
cd ${ROOTDIR}; python -m isort * --check-only --profile black
cd ${ROOTDIR}; python -m flake8 .
cd ${ROOTDIR}; python -m black --check .
.PHONY: test
test:
# one of the following
cd ${ROOTDIR}; pytest
cd ${ROOTDIR}; python -m unittest discover
.PHONY: nbclean
nbclean:
# run nbstripout on all .ipynb files at any depth within `notebooks` directory that are NOT in hidden directories
cd ${ROOTDIR}; find notebooks -type f -name "*.ipynb" -not -path '*/\.*' -exec poetry run nbstripout {} \;
.pre-commit-config.yaml
repos:
- repo: local
hooks:
- id: format
name: Format
stages: [commit, push]
language: system
entry: make format
- id: lint
name: Lint
stages: [commit, push]
language: system
entry: make lint
setup.cfg
[metadata]
description-file = README.md
[flake8]
ignore = E121,E203,E251,E261,E266,E302,E303,E305,E402,E501,F841,W503,E741,W605
exclude =
.git,
.eggs,
.tox,
build,
dist,
data,
*.egg-info,
notebooks,
.mypy*,
*.db
# Flake8 Ignored Context
# Codes: http://flake8.pycqa.org/en/latest/user/error-codes.html
# E121: continuation line under-indented for hanging indent
# E203: black enforces white space around slice operators
# E251: unexpected spaces around keyword / parameter equals
# E261: at least two spaces before inline comment
# E266: too many leading '#' for block comment; we might want blocks starting with #####
# E302: too many blank lines
# E303: too many blank lines
# E305: expected 2 blank lines after class or function definition
# E402: module level import not at top of file; useful for putting docstrings at top of module before imports
# E501: line too long
# F841: local variable is assigned to but never used; we might want the pandas syntax `df.query('blah = @variable')`
# W503: line break before binary operator
# E741: ambiguous variable name
# W605: invalid escape sequence; triggers on valid regex expression like re.search('\W')
Ignoring missing mypy types
pyproject.toml:
[[tool.mypy.overrides]]
module = [
"tqdm"
]
ignore_missing_imports = true
Configuring poetry for a custom binary repo
pyproject.toml:
[[tool.poetry.source]]
name = "$customBinaryRepository"
url = "https://path.to.custom.binary.repository/pip-private/simple"
default = true # disables PyPI
secondary = false
[[tool.poetry.source]]
name = "pypi_secondary" # `pypi` is reserved for default PyPI source
url = "https://pypi.org/simple"
default = false
secondary = true
Configuration (see https://python-poetry.org/docs/repositories/):
poetry config http-basic.$customBinaryRepository <USERNAME_WITH_@> <PASSWORD_API_KEY>
Jupyter notebook setup
%load_ext autoreload
%autoreload 2
import logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(name)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
force=True)
LOGGER = logging.getLogger(__name__)
Testing
python -m unittest test_package.test_module.TestClass.test_method
pytest path/to/test_file.py::test_method
t-SNE
Derived from code by Chris Potts (Stanford).
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
def tsne_viz(df, colors=None, output_filename=None, figsize=(40, 50), random_state=42):
"""
2d plot of `df` using t-SNE, with the points labeled by `df.index`,
aligned with `colors` (defaults to all black).
Source: https://github.com/cgpotts/cs224u/blob/afd64b41f845b0f444b152d0f7acf2a45228349a/vsm.py#L188
"""
# Colors:
vocab = df.index
if not colors:
colors = ['black' for i in vocab]
# Recommended reduction via PCA or similar:
n_components = 50 if df.shape[1] >= 50 else df.shape[1]
dimreduce = PCA(n_components=n_components, random_state=random_state)
X = dimreduce.fit_transform(df)
print(f"Explained variance ratio: {np.round(dimreduce.explained_variance_ratio_, 2)}")
# t-SNE:
tsne = TSNE(n_components=2, random_state=random_state)
tsnemat = tsne.fit_transform(X)
# Plot values:
xvals = tsnemat[: , 0]
yvals = tsnemat[: , 1]
# Plotting:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize)
ax.plot(xvals, yvals, marker='', linestyle='')
# Text labels:
for word, x, y, color in zip(vocab, xvals, yvals, colors):
ax.annotate(word, (x, y), fontsize=8, color=color)
plt.axis('off')
# Output:
if output_filename:
plt.savefig(output_filename, bbox_inches='tight')
else:
plt.show()
Credentials
Storing a credential without pasting it into the REPL:
from getpass import getpass
api_key = getpass("Enter your API key: ")
GCP credentials
Using default and/or service account credentials with GCP auth:
import google.auth as gauth
from google.auth.credentials import Credentials
from google.auth.transport.requests import Request
target_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
creds, project = gauth.default(
# Must explicitly set scopes even in default to avoid "Invalid OAuth scope or ID token
# audience provided" in some environments.
scopes=target_scopes
)
# According to GCP documentation, `service_account_email` isn't guaranteed to be set until
# `refresh()` is called -- before this call, the value will be `default`.
creds.refresh(request=Request())
creds_email = (
creds.service_account_email
if hasattr(creds, "service_account_email")
else "UNKNOWN"
)
LOGGER.info(f"Trying to use {creds_email}")
– User stories –
“As a <role&rt;, I want to be able to <what&rt; so that <why&rt;.”
For instance: As a Platform and Application Developer, I want to be able to rollback my service to the previous version in the event that it does not function as I expect, so that I can keep my system up for its consumers.